Repository: lebrice/Sequoia
Branch: master
Commit: 7e12ff8ed67f
Files: 460
Total size: 2.6 MB

Directory structure:
gitextract_c6gc35b2/

├── .dockerignore
├── .gitattributes
├── .gitignore
├── .gitmodules
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── dockers/
│   ├── .gitignore
│   ├── base/
│   │   ├── Dockerfile
│   │   └── build.sh
│   └── branch/
│       ├── Dockerfile
│       └── build.sh
├── docs/
│   └── diagrams/
│       └── src/
│           ├── gym.puml
│           ├── pytorch_lightning.puml
│           └── seq_diagram.puml
├── examples/
│   ├── README.md
│   ├── __init__.py
│   ├── advanced/
│   │   ├── RL_and_SL_demo.py
│   │   ├── continual_rl_demo.py
│   │   ├── ewc_in_rl.py
│   │   ├── hat_demo.py
│   │   ├── hparam_tuning.py
│   │   ├── pnn/
│   │   │   ├── __init__.py
│   │   │   ├── layers.py
│   │   │   ├── model_rl.py
│   │   │   ├── model_sl.py
│   │   │   └── pnn_method.py
│   │   └── procgen_example.py
│   ├── basic/
│   │   ├── __init__.py
│   │   ├── base_method_demo.py
│   │   ├── pl_example.py
│   │   ├── pl_example_packnet.py
│   │   ├── pl_example_test.py
│   │   ├── quick_demo.ipynb
│   │   ├── quick_demo.py
│   │   ├── quick_demo_ewc.py
│   │   ├── quick_demo_packnet.py
│   │   └── quick_demo_test.py
│   ├── clcomp21/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── a2c_example.py
│   │   ├── a2c_example_test.py
│   │   ├── classifier.py
│   │   ├── classifier_test.py
│   │   ├── conftest.py
│   │   ├── dummy_method.py
│   │   ├── dummy_method_test.py
│   │   ├── multihead_classifier.py
│   │   ├── multihead_classifier_test.py
│   │   ├── regularization_example.py
│   │   ├── regularization_example_test.py
│   │   ├── sb3_example.py
│   │   └── sb3_example_test.py
│   ├── demo_utils.py
│   └── prerequisites/
│       └── dataclasses_example.py
├── mypy.ini
├── pytest.ini
├── requirements.txt
├── scripts/
│   ├── eai/
│   │   ├── cancel_all_queuing.sh
│   │   ├── cancel_all_running.sh
│   │   ├── job.sh
│   │   ├── rl_sweep.sh
│   │   ├── shell_job.sh
│   │   └── sl_sweep.sh
│   └── slurm/
│       ├── launch_many_sweeps.sh
│       ├── run.sh
│       └── sweep.sh
├── sequoia/
│   ├── README.md
│   ├── __init__.py
│   ├── _version.py
│   ├── client/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── env.proto
│   │   ├── env_proxy.py
│   │   ├── env_proxy_test.py
│   │   ├── server.py
│   │   ├── setting_proxy.py
│   │   └── setting_proxy_test.py
│   ├── common/
│   │   ├── __init__.py
│   │   ├── batch.py
│   │   ├── batch_test.py
│   │   ├── callbacks/
│   │   │   ├── __init__.py
│   │   │   ├── knn_callback.py
│   │   │   └── vae_callback.py
│   │   ├── config/
│   │   │   ├── __init__.py
│   │   │   ├── config.py
│   │   │   └── wandb_config.py
│   │   ├── gym_wrappers/
│   │   │   ├── __init__.py
│   │   │   ├── action_limit.py
│   │   │   ├── action_limit_test.py
│   │   │   ├── add_done.py
│   │   │   ├── add_info.py
│   │   │   ├── convert_tensors.py
│   │   │   ├── convert_tensors_test.py
│   │   │   ├── env_dataset.py
│   │   │   ├── env_dataset_test.py
│   │   │   ├── episode_limit.py
│   │   │   ├── episode_limit_test.py
│   │   │   ├── measure_performance.py
│   │   │   ├── multi_task_environment.py
│   │   │   ├── multi_task_environment_test.py
│   │   │   ├── observation_limit.py
│   │   │   ├── observation_limit_test.py
│   │   │   ├── pixel_observation.py
│   │   │   ├── pixel_observation_test.py
│   │   │   ├── policy_env.py
│   │   │   ├── policy_env_test.py
│   │   │   ├── smooth_environment.py
│   │   │   ├── smooth_environment_test.py
│   │   │   ├── step_callback_wrapper.py
│   │   │   ├── step_callback_wrapper_test.py
│   │   │   ├── transform_wrappers.py
│   │   │   ├── transform_wrappers_test.py
│   │   │   ├── utils.py
│   │   │   └── utils_test.py
│   │   ├── hparams/
│   │   │   └── __init__.py
│   │   ├── layers.py
│   │   ├── loss.py
│   │   ├── loss_test.py
│   │   ├── metrics/
│   │   │   ├── __init__.py
│   │   │   ├── classification.py
│   │   │   ├── classification_test.py
│   │   │   ├── get_metrics.py
│   │   │   ├── metrics.py
│   │   │   ├── metrics_utils.py
│   │   │   ├── metrics_utils_test.py
│   │   │   ├── regression.py
│   │   │   └── rl_metrics.py
│   │   ├── replay.py
│   │   ├── spaces/
│   │   │   ├── __init__.py
│   │   │   ├── image.py
│   │   │   ├── named_tuple.py
│   │   │   ├── named_tuple_test.py
│   │   │   ├── space.py
│   │   │   ├── sparse.py
│   │   │   ├── sparse_test.py
│   │   │   ├── tensor_spaces.py
│   │   │   ├── tensor_spaces_test.py
│   │   │   ├── typed_dict.py
│   │   │   └── typed_dict_test.py
│   │   ├── task.py
│   │   └── transforms/
│   │       ├── __init__.py
│   │       ├── channels.py
│   │       ├── compose.py
│   │       ├── resize.py
│   │       ├── split_batch.py
│   │       ├── to_tensor.py
│   │       ├── transform.py
│   │       ├── transform_enum.py
│   │       ├── transforms_test.py
│   │       └── utils.py
│   ├── common.puml
│   ├── conftest.py
│   ├── experiments/
│   │   ├── __init__.py
│   │   ├── experiment.py
│   │   ├── experiment_test.py
│   │   ├── hpo_sweep.py
│   │   └── hpo_sweep_test.py
│   ├── main.py
│   ├── methods/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── aux_tasks/
│   │   │   ├── __init__.py
│   │   │   ├── auxiliary_task.py
│   │   │   ├── ewc.py
│   │   │   ├── reconstruction/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ae.py
│   │   │   │   ├── decoder_for_dataset.py
│   │   │   │   ├── decoders.py
│   │   │   │   └── vae.py
│   │   │   └── transformation_based/
│   │   │       ├── __init__.py
│   │   │       ├── bases.py
│   │   │       └── rotation.py
│   │   ├── avalanche_methods/
│   │   │   ├── __init__.py
│   │   │   ├── agem.py
│   │   │   ├── agem_test.py
│   │   │   ├── ar1.py
│   │   │   ├── ar1_test.py
│   │   │   ├── base.py
│   │   │   ├── base_test.py
│   │   │   ├── conftest.py
│   │   │   ├── cwr_star.py
│   │   │   ├── cwr_star_test.py
│   │   │   ├── ewc.py
│   │   │   ├── ewc_test.py
│   │   │   ├── experience.py
│   │   │   ├── gdumb.py
│   │   │   ├── gdumb_test.py
│   │   │   ├── gem.py
│   │   │   ├── gem_test.py
│   │   │   ├── lwf.py
│   │   │   ├── lwf_test.py
│   │   │   ├── naive.py
│   │   │   ├── naive_test.py
│   │   │   ├── patched_models.py
│   │   │   ├── plugins.py
│   │   │   ├── replay.py
│   │   │   ├── replay_test.py
│   │   │   ├── synaptic_intelligence.py
│   │   │   └── synaptic_intelligence_test.py
│   │   ├── base_method.py
│   │   ├── base_method_test.py
│   │   ├── conftest.py
│   │   ├── d3rlpy_methods/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   └── base_test.py
│   │   ├── ewc_method.py
│   │   ├── ewc_method_test.py
│   │   ├── experience_replay.py
│   │   ├── experience_replay_test.py
│   │   ├── hat.py
│   │   ├── method_test.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── base_model/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base_model.py
│   │   │   │   ├── model.py
│   │   │   │   ├── multihead_model.py
│   │   │   │   ├── multihead_model_test.py
│   │   │   │   ├── self_supervised_model.py
│   │   │   │   ├── self_supervised_model_test.py
│   │   │   │   └── semi_supervised_model.py
│   │   │   ├── baseline_model.puml
│   │   │   ├── fcnet.py
│   │   │   ├── forward_pass.py
│   │   │   ├── output_heads/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── classification_head.py
│   │   │   │   ├── output_head.py
│   │   │   │   ├── regression_head.py
│   │   │   │   └── rl/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── actor_critic_head.py
│   │   │   │       ├── episodic_a2c.py
│   │   │   │       ├── episodic_a2c_test.py
│   │   │   │       ├── policy_head.py
│   │   │   │       ├── policy_head_test.py
│   │   │   │       └── wasted_steps_calc.py
│   │   │   ├── output_heads.puml
│   │   │   └── simple_convnet.py
│   │   ├── models.puml
│   │   ├── packnet_method.py
│   │   ├── packnet_method_test.py
│   │   ├── pl_bolts_methods/
│   │   │   └── __init__.py
│   │   ├── pl_dqn.py
│   │   ├── pnn/
│   │   │   ├── __init__.py
│   │   │   ├── layers.py
│   │   │   ├── model_rl.py
│   │   │   ├── model_sl.py
│   │   │   └── pnn_method.py
│   │   ├── random_baseline.py
│   │   ├── random_baseline_test.py
│   │   ├── stable_baselines3_methods/
│   │   │   ├── __init__.py
│   │   │   ├── a2c.py
│   │   │   ├── a2c_test.py
│   │   │   ├── base.py
│   │   │   ├── base_test.py
│   │   │   ├── ddpg.py
│   │   │   ├── ddpg_test.py
│   │   │   ├── dqn.py
│   │   │   ├── dqn_test.py
│   │   │   ├── off_policy_method.py
│   │   │   ├── off_policy_method_test.py
│   │   │   ├── on_policy_method.py
│   │   │   ├── policy_wrapper.py
│   │   │   ├── ppo.py
│   │   │   ├── ppo_test.py
│   │   │   ├── sac.py
│   │   │   ├── sac_test.py
│   │   │   ├── td3.py
│   │   │   └── td3_test.py
│   │   └── trainer.py
│   ├── methods.puml
│   ├── sequoia.puml
│   ├── settings/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── assumptions/
│   │   │   ├── __init__.py
│   │   │   ├── assumptions.puml
│   │   │   ├── base.py
│   │   │   ├── classification.py
│   │   │   ├── context_discreteness.py
│   │   │   ├── context_visibility.py
│   │   │   ├── continual.py
│   │   │   ├── discrete_results.py
│   │   │   ├── iid.py
│   │   │   ├── iid_results.py
│   │   │   ├── incremental.py
│   │   │   ├── incremental_results.py
│   │   │   ├── incremental_test.py
│   │   │   ├── task_incremental.py
│   │   │   └── task_type.py
│   │   ├── base/
│   │   │   ├── __init__.py
│   │   │   ├── base.puml
│   │   │   ├── bases.py
│   │   │   ├── environment.py
│   │   │   ├── objects.py
│   │   │   ├── results.py
│   │   │   ├── setting.py
│   │   │   ├── setting_meta.py
│   │   │   └── setting_test.py
│   │   ├── offline_rl/
│   │   │   └── setting.py
│   │   ├── presets/
│   │   │   ├── __init__.py
│   │   │   ├── cartpole_pixels.yaml
│   │   │   ├── cartpole_state.yaml
│   │   │   ├── cifar10.yaml
│   │   │   ├── cifar100.yaml
│   │   │   ├── classic_control/
│   │   │   │   ├── cartpole.yaml
│   │   │   │   └── mountaincar_continuous.yaml
│   │   │   ├── fashion_mnist.yaml
│   │   │   ├── mnist.yaml
│   │   │   ├── monsterkong/
│   │   │   │   ├── monsterkong_3each.yaml
│   │   │   │   ├── monsterkong_4each.yaml
│   │   │   │   ├── monsterkong_5each.yaml
│   │   │   │   ├── monsterkong_all.yaml
│   │   │   │   ├── monsterkong_jumps.yaml
│   │   │   │   ├── monsterkong_jumps_and_ladders.yaml
│   │   │   │   ├── monsterkong_ladders.yaml
│   │   │   │   └── monsterkong_mix.yaml
│   │   │   ├── mujoco/
│   │   │   │   └── half_cheetah.yaml
│   │   │   ├── rl_track.yaml
│   │   │   └── sl_track.yaml
│   │   ├── rl/
│   │   │   ├── __init__.py
│   │   │   ├── continual/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── environment.py
│   │   │   │   ├── environment_test.py
│   │   │   │   ├── make_env.py
│   │   │   │   ├── make_env_test.py
│   │   │   │   ├── objects.py
│   │   │   │   ├── results.py
│   │   │   │   ├── setting.py
│   │   │   │   ├── setting_test.py
│   │   │   │   ├── tasks.py
│   │   │   │   ├── tasks_test.py
│   │   │   │   └── test_environment.py
│   │   │   ├── discrete/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── multienv_wrappers.py
│   │   │   │   ├── multienv_wrappers_test.py
│   │   │   │   ├── results.py
│   │   │   │   ├── setting.py
│   │   │   │   ├── setting_test.py
│   │   │   │   ├── tasks.py
│   │   │   │   ├── tasks_test.py
│   │   │   │   └── test_environment.py
│   │   │   ├── environment.py
│   │   │   ├── environment_test.py
│   │   │   ├── envs/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── classic_control.py
│   │   │   │   ├── monsterkong.py
│   │   │   │   ├── mujoco/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── half_cheetah.py
│   │   │   │   │   ├── half_cheetah_test.py
│   │   │   │   │   ├── hopper.py
│   │   │   │   │   ├── hopper_test.py
│   │   │   │   │   ├── modified_friction.py
│   │   │   │   │   ├── modified_friction_test.py
│   │   │   │   │   ├── modified_gravity.py
│   │   │   │   │   ├── modified_gravity_test.py
│   │   │   │   │   ├── modified_mass.py
│   │   │   │   │   ├── modified_mass_test.py
│   │   │   │   │   ├── modified_size.py
│   │   │   │   │   ├── modified_size_test.py
│   │   │   │   │   ├── modified_wall.py
│   │   │   │   │   ├── mujoco_model_utils.py
│   │   │   │   │   ├── walker2d.py
│   │   │   │   │   └── walker2d_test.py
│   │   │   │   └── variant_spec.py
│   │   │   ├── incremental/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── objects.py
│   │   │   │   ├── results.py
│   │   │   │   ├── setting.py
│   │   │   │   ├── setting_test.py
│   │   │   │   └── tasks.py
│   │   │   ├── multi_task/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── setting.py
│   │   │   │   └── setting_test.py
│   │   │   ├── objects.py
│   │   │   ├── setting.py
│   │   │   ├── setting_test.py
│   │   │   ├── task_incremental/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── setting.py
│   │   │   │   ├── setting_test.py
│   │   │   │   └── tasks.py
│   │   │   ├── traditional/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── setting.py
│   │   │   │   └── setting_test.py
│   │   │   └── wrappers/
│   │   │       ├── __init__.py
│   │   │       ├── measure_performance.py
│   │   │       ├── measure_performance_test.py
│   │   │       ├── no_typed_objects.py
│   │   │       ├── task_labels.py
│   │   │       └── typed_objects.py
│   │   ├── settings.puml
│   │   └── sl/
│   │       ├── README.md
│   │       ├── __init__.py
│   │       ├── continual/
│   │       │   ├── __init__.py
│   │       │   ├── environment.py
│   │       │   ├── environment_test.py
│   │       │   ├── envs.py
│   │       │   ├── objects.py
│   │       │   ├── results.py
│   │       │   ├── setting.py
│   │       │   ├── setting_test.py
│   │       │   └── wrappers.py
│   │       ├── discrete/
│   │       │   ├── __init__.py
│   │       │   ├── setting.py
│   │       │   └── setting_test.py
│   │       ├── domain_incremental/
│   │       │   ├── __init__.py
│   │       │   ├── setting.py
│   │       │   └── setting_test.py
│   │       ├── environment.py
│   │       ├── environment_test.py
│   │       ├── incremental/
│   │       │   ├── __init__.py
│   │       │   ├── environment.py
│   │       │   ├── environment_test.py
│   │       │   ├── objects.py
│   │       │   ├── results.py
│   │       │   ├── setting.py
│   │       │   ├── setting_test.py
│   │       │   └── unused_batch_transforms.py
│   │       ├── multi_task/
│   │       │   ├── __init__.py
│   │       │   ├── setting.py
│   │       │   └── setting_test.py
│   │       ├── setting.py
│   │       ├── task_incremental/
│   │       │   ├── __init__.py
│   │       │   ├── setting.py
│   │       │   └── setting_test.py
│   │       ├── traditional/
│   │       │   ├── __init__.py
│   │       │   ├── results.py
│   │       │   ├── setting.py
│   │       │   └── setting_test.py
│   │       └── wrappers/
│   │           ├── __init__.py
│   │           ├── measure_performance.py
│   │           └── measure_performance_test.py
│   ├── settings.puml
│   └── utils/
│       ├── __init__.py
│       ├── categorical.py
│       ├── data_utils.py
│       ├── encode.py
│       ├── generic_functions/
│       │   ├── __init__.py
│       │   ├── _namedtuple.py
│       │   ├── _namedtuple_test.py
│       │   ├── concatenate.py
│       │   ├── detach.py
│       │   ├── move.py
│       │   ├── replace.py
│       │   ├── replace_test.py
│       │   ├── singledispatchmethod.py
│       │   ├── slicing.py
│       │   ├── slicing_test.py
│       │   ├── stack.py
│       │   └── to_from_tensor.py
│       ├── logging_utils.py
│       ├── module_dict.py
│       ├── parseable.py
│       ├── plotting.py
│       ├── pretrained_utils.py
│       ├── readme.py
│       ├── serialization.py
│       └── utils.py
├── setup.cfg
├── setup.py
└── versioneer.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
data
lightning_logs
checkpoints
results


================================================
FILE: .gitattributes
================================================
sequoia/_version.py export-subst


================================================
FILE: .gitignore
================================================
**/__pycache__/
.vscode

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

examples/results/*
results/*
!results/**/*.csv
data/*
*/data/*
!data/**/*.py
scripts/*.png
wandb
.idea
.ipynb_checkpoints
checkpoints
lightning_logs
.pylintrc

**.png

*.gz
*.pt
build
dist
*.egg-info
sequoia/results

mjkey.txt

================================================
FILE: .gitmodules
================================================
[submodule "sequoia/methods/cn_dpm"]
	path = sequoia/methods/cn_dpm
	url = https://github.com/ryanlindeborg/CN-DPM.git
[submodule "examples/clcomp21/Real_DEEL"]
	path = examples/clcomp21/Real_DEEL
	url = https://github.com/mostafaelaraby/Real-DEEL-Dark-Experience.git
[submodule "sequoia/methods/continual_world"]
	path = sequoia/methods/continual_world
	url = https://www.github.com/lebrice/continual_world.git


================================================
FILE: .travis.yml
================================================
language: python
python:
  - "3.7"
install:
  - pip install gym[atari]
  - pip install -r requirements.txt
script:
  - pytest
after_sucess:
  coveralls


================================================
FILE: LICENSE
================================================
                    GNU GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU General Public License is a free, copyleft license for
software and other kinds of works.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.  We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors.  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights.  Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received.  You must make sure that they, too, receive
or can get the source code.  And you must show them these terms so they
know their rights.

  Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.

  For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software.  For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.

  Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so.  This is fundamentally incompatible with the aim of
protecting users' freedom to change the software.  The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable.  Therefore, we
have designed this version of the GPL to prohibit the practice for those
products.  If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.

  Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary.  To prevent this, the GPL assures that
patents cannot be used to render the program non-free.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Use with the GNU Affero General Public License.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:

    <program>  Copyright (C) <year>  <name of author>
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.

  The GNU General Public License does not permit incorporating your program
into proprietary programs.  If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.  But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.


================================================
FILE: MANIFEST.in
================================================
include versioneer.py
include sequoia/_version.py


================================================
FILE: README.md
================================================
# Sequoia - The Research Tree 

A Playground for research at the intersection of Continual, Reinforcement, and Self-Supervised Learning.

- 5 minute intro: https://www.youtube.com/watch?v=0u48vr96zRQ
- Paper link: https://arxiv.org/abs/2108.01005
- [Continual Supervised Learning Study](https://wandb.ai/sequoia/csl_study) (~6K runs)
- [Continual Reinforcement Learning Study](https://wandb.ai/sequoia/crl_study) (~2300 runs)


## Note: This project is not being actively developed at the moment. If you encounter any difficulties, please create an issue and I'll help you out. 

If you have any questions or comments, please make an issue!

## Motivation:
Most applied ML research generally either proposes new Settings (research problems), new Methods (solutions to such problems), or both.

- When proposing new Settings, researchers almost always have to reimplement or heavily modify existing solutions before they can be applied onto their new problem.

- Likewise, when creating new Methods, it's often necessary to first re-create the experimental setting of other baseline papers, or even the baseline methods themselves, as experimental conditions may be *slightly* different between papers!

The goal of this repo is to:

- Organize various research Settings into an inheritance hierarchy (a tree!), with more *general*, challenging settings with few assumptions at the top, and more constrained problems at the bottom.

- Provide a mechanism for easily reusing existing solutions (Methods) onto new Settings through **Polymorphism**!

- Allow researchers to easily create new, general Methods and quickly gather results on a multitude of Settings, ranging from Supervised to Reinforcement Learning!


## Installation
Requires python >= 3.7


### Basic installation:

```console
$ git clone https://www.github.com/lebrice/Sequoia.git
$ pip install -e Sequoia
```

### Optional Addons
You can also install optional "addons" for Sequoia, each of which either adds new Methods, new environments/datasets, or both.
using either the usual `extras_require` feature of setuptools, or by pip-installing other repositories which register Methods for Sequoia using an `entry_point` in their `setup.py` file.


```console
pip install -e Sequoia[all|<plugin name>]
```

Here are some of the optional addons:

- `avalanche`:
  
  Continual Supervised Learning methods, provided by the [Avalanche](https://github.com/ContinualAI/avalanche) library:
  
    ```console
    $ pip install -e Sequoia[avalanche]
    ```

- `CN-DPM`: Continual Neural Dirichlet Process Mixture model:
    ```console
    $ cd Sequoia
    $ git submodule init  # to setup the submodules
    $ pip install -e sequoia/methods/cn_dpm    
    ```


- `orion`:
  
    Hyper-parameter optimization using [Orion](https://github.com/epistimio/orion)
    ```console
    $ pip install -e Sequoia[orion]
    ```

- `metaworld`:
  
    Continual / Multi-Task Reinforcement Learning environments, thanks to the [metaworld](https://github.com/rlworkgroup/metaworld) package. The usual setup for mujoco needs to be done, Sequoia unfortunately can't do it for you ;(
    ```console
    $ pip install -e Sequoia[metaworld]
    ```

- `monsterkong`:
  
    Continual Reinforcement Learning environment from [the Meta-MonsterKong repo](https://github.com/lebrice/MetaMonsterkong).
    ```console
    $ pip install -e Sequoia[monsterkong]
    ```


- `continual_world`: The Continual World benchmark for Continual Reinforcement learning. Adds 6 different Continual RL Methods to Sequoia.
    ```console
    $ cd Sequoia
    $ git submodule init  # to setup the submodules
    $ pip install -e sequoia/methods/continual_world   
    ```

See the `setup.py` file for all the optional extras.

### Additional Installation Steps for Mac

Install the latest XQuartz app from here: https://www.xquartz.org/releases/index.html

Then run the following commands on the terminal:

```console
mkdir /tmp/.X11-unix 
sudo chmod 1777 /tmp/.X11-unix 
sudo chown root /tmp/.X11-unix/
```

## Documentation overview:


- ### **[Getting Started / Examples (take a look at this first)](examples/)**
- ### Runing Experiments (below)
- ### [Settings overview](sequoia/settings/)
- ### [Methods overview](sequoia/methods/)


### Current Settings & Assumptions:

| Setting                                                                    | RL vs SL                                                                 | clear task boundaries? | Task boundaries given? | Task labels at training time? | task labels at test time | Stationary context? | Fixed action space |
| -------------------------------------------------------------------------- | ------------------------------------------------------------------------ | ---------------------- | ---------------------- | ----------------------------- | ------------------------ | ------------------- | ------------------ |
| [Continual RL](sequoia/settings/rl/continual/setting.py)                   | RL                                                                       | no                     | no                     | no                            | no                       | no                  | no(?)              |
| [Discrete Task-Agnostic RL](sequoia/settings/rl/discrete/setting.py)       | RL                                                                       | **yes**                | **yes**                | no                            | no                       | no                  | no(?)              |
| [Incremental RL](sequoia/settings/rl/incremental/setting.py)               | RL                                                                       | **yes**                | **yes**                | **yes**                       | no                       | no                  | no(?)              |
| [Task-Incremental RL](sequoia/settings/rl/task_incremental/setting.py)     | RL                                                                       | **yes**                | **yes**                | **yes**                       | **yes**                  | no                  | no(?)              |
| [Traditional RL](sequoia/settings/rl/task_incremental/setting.py)          | RL                                                                       | **yes**                | **yes**                | **yes**                       | no                       | **yes**             | no(?)              |
| [Multi-Task RL](sequoia/settings/rl/task_incremental/setting.py)           | RL                                                                       | **yes**                | **yes**                | **yes**                       | **yes**                  | **yes**             | no(?)              |
| [Continual SL](sequoia/settings/sl/continual/setting.py)                   | SL                                                                       | no                     | no                     | no                            | no                       | no                  | no                 |
| [Discrete Task-Agnostic SL](sequoia/settings/sl/discrete/setting.py)       | SL                                                                       | **yes**                | no                     | no                            | no                       | no                  | no                 |
| [(Class) Incremental SL](sequoia/settings/sl/incremental/setting.py)       | SL                                                                       | **yes**                | **yes**                | no                            | no                       | no                  | no                 |
| [Domain-Incremental SL](sequoia/settings/sl/domain_incremental/setting.py) | SL                                                                       | **yes**                | **yes**                | **yes**                       | no                       | no                  | **yes**            |
| [Task-Incremental SL](sequoia/settings/sl/task_incremental/setting.py)     | SL                                                                       | **yes**                | **yes**                | **yes**                       | **yes**                  | no                  | no                 |
| [Traditional SL](sequoia/settings/sl/traditional/setting.py)               | SL                                                                       | **yes**                | **yes**                | **yes**                       | no                       | **yes**             | no                 |
| [Multi-Task SL](sequoia/settings/sl/multi_task/setting.py)                 | SL                                                                       | **yes**                | **yes**                | **yes**                       | **yes**                  | **yes**             | no                 |
<!--|                                                                        | [Class-Incremental SL](sequoia/settings/sl/class_incremental/setting.py) | SL                     | **yes**                | **yes**                       | no                       | no                  | no                 |  |-->

#### Notes

- **Active / Passive**:
    Active settings are Settings where the next observation depends on the current action, i.e. where actions influence future observations, e.g. Reinforcement Learning.
    Passive settings are Settings where the current actions don't influence the next observations (e.g. Supervised Learning.)

- **Bold entries** in the table mark constant attributes which cannot be
   changed from their default value.

- \*: The environment is changing constantly over time in `ContinualRLSetting`, so
    there aren't really "tasks" to speak of.


## Running experiments

--> **(Reminder) First, take a look at the [Examples](/examples)** <--

#### Directly in code:

```python
from sequoia.settings import TaskIncrementalSLSetting
from sequoia.methods import BaseMethod
# Create the setting
setting = TaskIncrementalSLSetting(dataset="mnist")
# Create the method
method = BaseMethod(max_epochs=1)
# Apply the setting to the method to generate results.
results = setting.apply(method)
print(results.summary())
```

### Command-line:

```console
$ sequoia --help
usage: sequoia [-h] [--version] {run,sweep,info} ...

Sequoia - The Research Tree 

Used to run experiments, which consist in applying a Method to a Setting.

optional arguments:
  -h, --help        show this help message and exit
  --version         Displays the installed version of Sequoia and exits.

command:
  Command to execute

  {run,sweep,info}
    run             Run an experiment on a given setting.
    sweep           Run a hyper-parameter optimization sweep.
    info            Displays some information about a Setting or Method.
```
For example:
```console
$ sequoia run [--debug] <setting> (setting arguments) <method> (method arguments)
$ sequoia sweep [--debug] <setting> (setting arguments) <method> (method arguments)
$ sequoia info [setting or method]
```

For a detailed description of all the arguments, use the `--help` command for any of the actions:
```console 
$ sequoia --help
$ sequoia run --help
$ sequoia run <some_setting> --help
$ sequoia run <some_setting> <some_method> --help
$ sequoia sweep --help
$ sequoia sweep <some_setting> --help
$ sequoia sweep <some_setting> <some_method> --help
```

For example:

```console
$ sequoia run --debug task_incremental_sl --dataset mnist random_baseline
```

For example:
- Run the BaseMethod on task-incremental MNIST, with one epoch per task, and without wandb:
    ```console
    $ sequoia run task_incremental_sl --dataset mnist base --max_epochs 1
    ```
- Run the PPO Method from stable-baselines3 on an incremental RL setting, with the default dataset (CartPole) and 5 tasks: 
    ```console
    $ sequoia --setting incremental_rl --nb_tasks 5 --method sb3.ppo --steps_per_task 10_000
    ```

More questions? Please let us know by creating an issue or posting in the discussions!


================================================
FILE: dockers/.gitignore
================================================
# Hiding the 'eai' dockerfile
eai


================================================
FILE: dockers/base/Dockerfile
================================================
# syntax=docker/dockerfile:1
FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime
USER root
EXPOSE 2222
EXPOSE 6000
EXPOSE 8088
ENV LANG=en_US.UTF-8
RUN apt update && \
    apt install -y \
    git wget zsh unzip rsync build-essential \
        ca-certificates supervisor openssh-server ssh \
        curl wget vim procps htop locales nano man net-tools iputils-ping \
        libosmesa6-dev libgl1-mesa-glx libgl1-mesa-dev libglu1-mesa-dev libglfw3 \
        libglfw3-dev freeglut3 xvfb ffmpeg curl patchelf cmake zlib1g zlib1g-dev \
        swig libopenmpi-dev aptitude screen xz-utils locate && \
    sed -i "s/# en_US.UTF-8/en_US.UTF-8/" /etc/locale.gen && locale-gen && \
    useradd -m -u 13011 -s /bin/zsh toolkit && passwd -d toolkit && \
    useradd -m -u 13011 -s /bin/zsh --non-unique console && passwd -d console && \
    useradd -m -u 13011 -s /bin/zsh --non-unique _toolchain && passwd -d _toolchain && \
    useradd -m -u 13011 -s /bin/bash --non-unique coder && passwd -d coder && \
    chown -R toolkit:toolkit /run /etc/shadow /etc/profile && \
    apt autoremove --purge && apt-get clean && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    echo ssh >> /etc/securetty && \
    rm -f /etc/legal /etc/motd

# RUN conda install -c conda-forge opencv
RUN conda install matplotlib numpy scipy hdf5 h5py cython
# RUN pip install \ 
#     # Needed to build atari_py: (WHY don't they put it in a build_requires?)
#     lockfile 
    # fasteners \ 
    # pybullet \
    # wandb \
    # tqdm \
    # # tensorflow \
    # bs4 \
    # pandas notebook plotly tqdm pyamg lxml numba pyyaml torchmeta

# Removing this `torchtext` package, seems to be causing an import issue in pytorch!
RUN pip uninstall -y torchtext
RUN chown -R toolkit:root /workspace
RUN chmod -R 777 /workspace
# this doesn't do anything
RUN adduser toolkit sudo
RUN chown -R toolkit:root /mnt/
# RUN mkdir -p /mnt/home
RUN chmod 777 /opt/conda
RUN chmod 777 /mnt
RUN chmod -R 777 /workspace
SHELL [ "conda", "run", "-n", "base", "/bin/bash", "-c"]

## Unused zshell and oh-my-zsh stuff:
# RUN sh -c "$(wget -O- https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
# RUN sed -i 's/robbyrussell/clean/' ~/.zshrc
# RUN sed -i 's/plugins=(git)/plugins=(git debian history-substring-search)/' ~/.zshrc


# MuJoCo-related stuff:
# RUN curl -o ~/mujoco200_linux.zip -L -C - https://www.roboti.us/download/mujoco200_linux.zip
# RUN curl -o ~/mjpro150_linux.zip -L -C -  https://www.roboti.us/download/mjpro150_linux.zip
# RUN cd ~ && unzip mujoco200_linux.zip && rm mujoco200_linux.zip
# RUN cd ~ && unzip mjpro150_linux.zip && rm mjpro150_linux.zip
# RUN mkdir ~/.mujoco
# RUN mv ~/mujoco200_linux ~/.mujoco/mujoco200
# RUN mv ~/mjpro150 ~/.mujoco
# RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:~/.mujoco/mujoco200/bin" >> ~/.bashrc
# RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:~/.mujoco/mjpro150/bin" >> ~/.bashrc
# COPY mjkey.txt /home/toolkit/.mujoco/
# ENV LD_LIBRARY_PATH /home/toolkit/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}
# ENV LD_LIBRARY_PATH /home/toolkit/.mujoco/mjpro150/bin:${LD_LIBRARY_PATH}
# RUN mkdir /workspace/tools
# RUN cd /workspace/tools && git clone https://github.com/openai/mujoco-py.git && pip install -e mujoco-py

# For Wandb (TODO: Doesn't appear to work, using env variable with WANDB_API_KEY
# instead.)
# COPY .netrc /home/toolkit/.netrc
# COPY .netrc /root/.netrc
# COPY .netrc /tmp/.netrc

VOLUME /mnt/data
VOLUME /mnt/results
# USER toolkit

ENV DATA_DIR=/mnt/data
ENV RESULTS_DIR=/mnt/results
ENV WANDB_DIR=/mnt/results

# VOLUME /mnt/home
# WORKDIR /mnt/home
ENV PATH /home/toolkit/.local/bin:${PATH}
# RUN cd /workspace/tools && git clone https://github.com/openai/gym.git && cd gym && pip install -e '.[all]'
# RUN cd /workspace/tools && git clone https://github.com/openai/baselines.git && cd baselines && pip install -e .
RUN cd /workspace/ && git clone https://github.com/lebrice/Sequoia.git
RUN pip install -e /workspace/Sequoia[no_mujoco]
ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "base", "/bin/bash", "-c"]


================================================
FILE: dockers/base/build.sh
================================================
#!/bin/bash
set -o errexit    # Used to exit upon error, avoiding cascading errors
set -o errtrace    # Show error trace
set -o pipefail   # Unveils hidden failures
set -o nounset    # Exposes unset variables

if git diff-index --quiet HEAD --; then
    # No changes
    echo "All good, no uncommitted changes."
else
    # Changes
    echo "Can't build dockers when there are uncommited changes!"
    exit 1
fi


echo "Building the 'base' dockerfile"
docker build . --file dockers/base/Dockerfile --tag sequoia:base

REGISTRY=${REGISTRY:-`docker info | sed '/Username:/!d;s/.* //'`}
echo "Using registry $REGISTRY"

docker tag sequoia:base $REGISTRY/sequoia:base
docker push $REGISTRY/sequoia:base


================================================
FILE: dockers/branch/Dockerfile
================================================
# syntax=docker/dockerfile:1
FROM lebrice/sequoia:base
USER root
SHELL [ "conda", "run", "-n", "base", "/bin/bash", "-c"]
ARG BRANCH=master
RUN conda install -y cudatoolkit
RUN cd /workspace/Sequoia && git fetch -p && git checkout ${BRANCH} && pip install -e .[no_mujoco]
ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "base", "/bin/bash", "-c"]


================================================
FILE: dockers/branch/build.sh
================================================
#!/bin/bash
set -o errexit    # Used to exit upon error, avoiding cascading errors
set -o errtrace    # Show error trace
set -o pipefail   # Unveils hidden failures
set -o nounset    # Exposes unset variables

export CURRENT_BRANCH="`git branch --show-current`"
export BRANCH=${BRANCH:-$CURRENT_BRANCH}
echo "Using branch $BRANCH"

export REGISTRY=${REGISTRY:-`docker info | sed '/Username:/!d;s/.* //'`}
echo "Using registry $REGISTRY"


if git diff-index --quiet HEAD --; then
    # No changes
    echo "all good."
else
    # Changes
    echo "Can't build dockers when you have uncommited changes!"
    exit 1
fi
git push

echo "Building the container for branch $BRANCH (no cache)"
docker build . --file dockers/branch/Dockerfile \
    --no-cache \
    --build-arg BRANCH=$BRANCH \
    --tag sequoia:$BRANCH

docker tag sequoia:$BRANCH $REGISTRY/sequoia:$BRANCH
docker push $REGISTRY/sequoia:$BRANCH


================================================
FILE: docs/diagrams/src/gym.puml
================================================
@startuml gym

package gym {
    package spaces as gym.spaces {
        abstract class Space<T> {
            + contains(T sample) -> bool
            + sample() -> T
        }
        class Box extends Space {
            + low: np.ndarray
            + high: np.ndarray
            + shape: Tuple[int, ...]
            + dtype: np.dtype
            + contains(np.ndarray sample) -> bool
            + sample() -> np.ndarray
        }

        class Discrete extends Space {
            + n: int
            + contains(int sample) -> bool
            + sample() -> int
        }

        class Tuple extends Space {
            + spaces: Tuple[Space]
            + contains(Tuple sample) -> bool
            + sample() -> Tuple
        }
        ' Tuple spaces contain other spaces.
        Tuple *--  Space

        class Dict extends Space {
            + spaces: dict[str, Space]
            + contains(dict sample) -> bool
            + sample() -> dict
        }
        ' Same for Dicts.
        Dict *--  Space
    }

    abstract class gym.Env<Obs, Act, Rew> {
        + observation_space: Space<Obs>
        + action_space: Space<Act> 
        + step(Actions) -> Tuple[Obs, Rew, bool, dict]
        + reset() -> Obs
    }
    gym.Env .. Space

    abstract class Wrapper extends gym.Env{
        + env: gym.Env
    }
}

@enduml

================================================
FILE: docs/diagrams/src/pytorch_lightning.puml
================================================
@startuml pytorch_lightning
package pytorch_lightning {
    abstract class LightningDataModule {
        {abstract} + prepare_data()
        {abstract} + setup()
        {abstract} + train_dataloader(): torch.DataLoader
        {abstract} + val_dataloader(): torch.DataLoader
        {abstract} + test_dataloader(): torch.DataLoader
    }
    abstract class LightningModule {
        {abstract} + train_step(batch)
        + val_step()
        + test_step()
    }
}
@enduml

================================================
FILE: docs/diagrams/src/seq_diagram.puml
================================================
@startuml ContinualRLSetting
header Page Header
footer Page %page% of %lastpage%
title Overall Evaluation loop - Sequoia
note over User, Setting
Even though this diagram is somewhat large,
keep in mind that there are but a few key methods:
1. Method.configure()
2. Method.fit()
3. Method.get_actions()
4. Method.on_task_switch()  
end note

actor User
participant Setting << (A,#2121FF) Setting >>
collections TrainEnv
collections ValidEnv
collections TestEnv
' autoactivate on
participant Method << (C,#ADD1B2) Method >>
participant Model << (C,#ADD1B2) nn.Module >>
' activate Setting
' autoactivate on


User -> Setting: Create the Setting
Setting -> TrainEnv: Create temp env
return observation / action / reward spaces
User <-- Setting


User -> Method: Create the Method
User <-- Method


User -> Setting: setting.apply(method)

Setting -> Method: **method.configure(setting)**

    Method -> Method: create model, optimizer, etc.
    ' deactivate Method

    Method -> Model: Create
    ' activate Model
Setting <-- Method

autoactivate off

== training ==


group train_loop [for each task `i`]
    alt task_labels_at_train_time?
    else True
        Setting -> Method: **on_task_switch(i)**
        Method -> Method: consolidate knowledge, \n switch output heads, etc.
        Setting <-- Method
    else False 
        Setting -> Method: **on_task_switch(None)**
        Method -> Method: consolidate knowledge etc.
        Setting <-- Method

    end

    Setting -> TrainEnv: Create train env for task i
    Setting -> ValidEnv: Create valid env for task i
    ' activate ValidEnv
    Setting -> Method: **Method.fit(train_env, valid_env)**
    ' loop
    
    ' alt loop
    group loop
        note right
        The Method is free to do whatever
        it wants with the Train and Valid envs
        of the current task.
        end note
        Method -> Model: train()
        return

        ' group training
        Model <--> TrainEnv: train with the env
        ...

        Method -> Model: eval()
        return
        Model <--> ValidEnv: Evaluate performance
        ...
        ' autoactivate on
        ' Model -> TrainEnv: reset
        ' return Observations
        ' Model -> TrainEnv: step(actions)
        ' return Observations, Rewards, done, info
    end

end


== testing ==

note over Setting, Method
We currently only perform the test loop after training is complete on all tasks,
however, in the future we will run this test loop after the end of training on
each task. See issue#46 on GitHub for more info.
end note

group test_loop
    Setting --> Setting: Concatenate datasets for all tasks, \n create test wrappers, etc.
    Setting --> TestEnv: Create test environment (all tasks)
    autoactivate on
    Setting -> TestEnv: reset
    return observations
    ' loop
        alt
        else normal step

            Setting -> Method: **get_actions(observations)**
            Method -> Model: predict(x)
            return y_pred
            return actions
            Setting -> TestEnv: step(actions)
            return observations, rewards, done, info

        else end of episode reached
            Setting -> TestEnv: reset
            return observations

        else task boundary is reached
            ' TestEnv --> Method: **on_task_switch(i)**
            
            alt known_task_boundaries?
            else False: do nothing
                note over Method
                When known_task_boundaries=False, the Method doesn't get informed
                of task boundaries (it might have to perform some kind of change-point
                detection, for instance).
                end note
            else True
                note over TestEnv
                Minor note: here it's the TestEnv
                that calls the Method when a
                task boundary is reached.
                end note

                alt task_labels_at_test_time?
                else true
                    ' note right of Setting: If task labels are given
                    TestEnv -> Method: **on_task_switch(i)**
                    autoactivate off
                    Method -> Method
                    autoactivate on
                    return

                else false 
                    TestEnv -> Method: **on_task_switch(None)**
                    autoactivate off
                    Method -> Method
                    autoactivate on
                    return
                end
            end
        end
    autoactivate off
    note over TestEnv
    The test environment uses a `Monitor` wrapper, and gather
    statistics of interest like the mean reward, accuracy, etc.    
    end note
    TestEnv -> Setting: report performance of the Method
end
Setting -> Setting: Weigh performance of each task \n depending on the Setting
User <-- Setting: Results
' return Results
@enduml

================================================
FILE: examples/README.md
================================================
# Examples

Here's a brief description of the examples in this folder:

## Prerequisites:
- [Intro to dataclasses & simple-parsing](prerequisites/dataclasses_example.py)
- [Basics of openai gym](https://github.com/openai/gym#basics)


## Basic examples:

- [pl_example.py](basic/pl_example.py):
    **Recommended entry-point for ML Practicioners**. Shows an example method and model
    using [PyTorch Lightning](https://github.com/PyTorchLightning/pytorch-lightning).
    This is the best way to get started if you don't mind some level of abstraction in your code
    (a good thing in general!)


- [quick_demo.ipynb](basic/quick_demo.ipynb):
    **Recommended entry-point for new users**. Simple demo showing how to create a `Method`
    from scratch that targets a Supervised CL `Setting`, as well as how to
    improve this simple Method using a simple regularization loss.

    - [quick_demo.py](basic/quick_demo.py): First part of the above
        notebook: shows how to create a Method from scratch that
        targets a Supervised CL Setting.
    - [quick_demo_ewc.py](basic/quick_demo_ewc.py): Second part of the
        above notebook: shows how to improve upon an existing Method by adding a
        CL regularization loss.

- [baseline_demo.py](basic/baseline_demo.py): Shows how the
    BaseMethod can be applied to get results in both RL and SL Settings.


## CLVision Workshop Submission Examples:

Examples in this folder are aimed at solving the supervised learning track of the competition.

Each example builds on top of the previous, in a manner that improves the overall performance you can expect on any given CL setting.

As such, it is recommended that you take a look at the examples in the following order:

0. [DummyMethod](clcomp21/dummy_method.py)
    Non-parametric method that simply returns a random prediction for each observation.

1. [Simple Classifier](clcomp21/classifier.py):
    Standard neural net classifier without any CL-related mechanism. Works in the SL track, but has very poor performance.

2. [Multi-Head / Task Inference Classifier](clcomp21/multihead_classifier.py):
    Performs multi-head prediction, and a simple form of task inference. Gets better results that the example.

3. [CL Regularized Classifier](clcomp21/regularization_example.py):
    Adds a simple CL regularization loss to the multihead classifier above.


## Advanced examples:

- [RL_and_SL_demo.py](advanced/RL_and_SL_demo.py):
    
    Example that shows how the BaseMethod can easily be extended by adding
    AuxiliaryTasks to it, allows you to get results in both RL and SL.

- [continual_rl_demo.py](advanced/ewc_in_rl.py):
    
    Demonstrates how to create Reinforcement Learning (RL) Settings, as well as
    how methods from [stable-baselines3](https://github.com/DLR-RM/stable-baselines3)
    can be applied to these settings.


- [Extending Stable-Baselines3 (RL Settings only)](advanced/ewc_in_rl.py):

    (Not recommended for new users!)
    Very specific example which shows how, if you really wanted to, you could
    extend one or more of the Methods from SB3 with some kind of regularization
    loss hooking into the internal optimization loop of SB3.


================================================
FILE: examples/__init__.py
================================================


================================================
FILE: examples/advanced/RL_and_SL_demo.py
================================================
""" Demo where we add the same regularization loss from the other examples, but
this time as an `AuxiliaryTask` on top of the BaseMethod.

This makes it easy to create CL methods that apply to both RL and SL Settings!
"""

import copy
import random
import sys
from argparse import Namespace
from dataclasses import dataclass
from typing import ClassVar, List

import torch
from simple_parsing import ArgumentParser, field
from torch import Tensor

# This "hack" is required so we can run `python examples/custom_baseline_demo.py`
sys.path.extend([".", ".."])

from sequoia.common.config import Config
from sequoia.common.loss import Loss
from sequoia.methods import BaseMethod
from sequoia.methods.aux_tasks import AuxiliaryTask
from sequoia.methods.models import BaseModel, ForwardPass
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Environment, RLSetting, Setting
from sequoia.utils.utils import camel_case, dict_intersection
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


class SimpleRegularizationAuxTask(AuxiliaryTask):
    """Same regularization loss as in the previous examples, this time
    implemented as an `AuxiliaryTask`, which gets added to the BaseModel,
    making it applicable to both RL and SL.

    This adds a CL regularizaiton loss to the BaseModel.

    The most important methods of `AuxiliaryTask` is `get_loss`, which should
    return a `Loss` for the given forward pass and resulting rewards/labels.
    Take a look at the `AuxiliaryTask` class for more info.
    """

    name: ClassVar[str] = "simple_regularization"

    @dataclass
    class Options(AuxiliaryTask.Options):
        """Hyper-parameters / configuration options of this auxiliary task."""

        # Coefficient used to scale this regularization loss before it gets
        # added to the 'base' loss of the model.
        coefficient: float = 0.01
        # Wether to use the absolute difference of the weights or the difference
        # in the `regularize` method below.
        use_abs_diff: bool = False
        # The norm term for the 'distance' between the current and old weights.
        distance_norm: int = 2

    def __init__(
        self,
        *args,
        name: str = None,
        options: "SimpleRegularizationAuxTask.Options" = None,
        **kwargs,
    ):
        super().__init__(*args, options=options, name=name, **kwargs)
        self.options: SimpleRegularizationAuxTask.Options
        self.previous_task: int = None
        # TODO: Figure out a clean way to persist this dict into the state_dict.
        self.previous_model_weights: Dict[str, Tensor] = {}
        self.n_switches: int = 0

    def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss:
        """Get a `Loss` for the given forward pass and resulting rewards/labels.

        Take a look at the `AuxiliaryTask` class for more info,

        NOTE: This is the same simplified version of EWC used throughout the
        other examples: the loss is the P-norm between the current weights and
        the weights as they were on the begining of the task.
        Also note, this particular example doesn't actually use the provided
        arguments.
        """
        if self.previous_task is None:
            # We're in the first task: do nothing.
            return Loss(name=self.name)

        old_weights: Dict[str, Tensor] = self.previous_model_weights
        new_weights: Dict[str, Tensor] = dict(self.model.named_parameters())

        loss = 0.0
        for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
            loss += torch.dist(new_w, old_w.type_as(new_w), p=self.options.distance_norm)

        ewc_loss = Loss(name=self.name, loss=loss)
        return ewc_loss

    def on_task_switch(self, task_id: int) -> None:
        """Executed when the task switches (to either a new or known task)."""
        if not self.enabled:
            return
        if self.previous_task is None and self.n_switches == 0:
            logger.debug(f"Starting the first task, no update.")
            pass
        elif task_id is None or task_id != self.previous_task:
            logger.debug(
                f"Switching tasks: {self.previous_task} -> {task_id}: "
                f"Updating the 'anchor' weights."
            )
            self.previous_task = task_id
            self.previous_model_weights.clear()
            self.previous_model_weights.update(
                copy.deepcopy({k: v.detach() for k, v in self.model.named_parameters()})
            )
        self.n_switches += 1


class CustomizedBaselineModel(BaseModel):
    @dataclass
    class HParams(BaseModel.HParams):
        """Hyper-parameters of our customized baseline model."""

        # Hyper-parameters of our simple new auxiliary task.
        simple_reg: SimpleRegularizationAuxTask.Options = field(
            default_factory=SimpleRegularizationAuxTask.Options
        )

    def __init__(
        self,
        setting: Setting,
        hparams: "CustomizedBaselineModel.HParams",
        config: Config,
    ):
        super().__init__(setting=setting, hparams=hparams, config=config)
        self.hp: CustomizedBaselineModel.HParams

        # Here we add our new auxiliary task:
        self.add_auxiliary_task(SimpleRegularizationAuxTask(options=self.hp.simple_reg))

        # Or, add replay buffers of some sort:
        self.replay_buffer: List = []

        # (...)


@dataclass
class CustomMethod(BaseMethod, target_setting=Setting):
    """Example methods which adds regularization to the baseline in RL and SL.

    This extends the `BaseMethod` by adding the simple regularization
    auxiliary task defined above to the `BaseModel`.

    NOTE: Since this class inherits from `BaseMethod`, which targets the
    `Setting` setting, i.e. the "root" node, it is applicable to all settings,
    both in RL and SL. However, you could customize the `target_setting`
    argument above to limit this to any particular subtree (only SL, only RL,
    only when task labels are present, etc).
    """

    # Hyper-parameters of the customized Baseline Model used by this method.
    hparams: CustomizedBaselineModel.HParams = field(
        default_factory=CustomizedBaselineModel.HParams
    )

    def __init__(
        self,
        hparams: CustomizedBaselineModel.HParams = None,
        config: Config = None,
        trainer_options: TrainerConfig = None,
        **kwargs,
    ):
        super().__init__(
            hparams=hparams,
            config=config,
            trainer_options=trainer_options,
            **kwargs,
        )

    def create_model(self, setting: Setting) -> CustomizedBaselineModel:
        """Creates the Model to be used for the given `Setting`."""
        return CustomizedBaselineModel(setting=setting, hparams=self.hparams, config=self.config)

    def configure(self, setting: Setting):
        """Configure this Method before being trained / tested on this Setting."""
        super().configure(setting)

        # For example, change the value of the coefficient of our
        # regularization loss when in RL vs SL:
        if isinstance(setting, RLSetting):
            self.hparams.simple_reg.coefficient = 0.01
        else:
            self.hparams.simple_reg.coefficient = 1.0

    def fit(self, train_env: Environment, valid_env: Environment):
        """Called by the Setting to let the Method train on a given task.

        You can do whatever you want with the train and valid
        environments. As it is currently, in most `Settings`, the valid
        environment will contain data from only the current task. (See issue at
        https://github.com/lebrice/Sequoia/issues/46 for more context).
        """
        return super().fit(train_env=train_env, valid_env=valid_env)

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser):
        """Adds command-line arguments for this Method to an argument parser.

        NOTE: This doesn't do anything differently than the base implementation,
        but it's included here just for illustration purposes.
        """
        # 'dest' is where the arguments will be stored on the namespace.
        dest = camel_case(cls.__qualname__)
        # Add all command-line arguments. This adds arguments for all fields of
        # this dataclass.
        parser.add_arguments(cls, dest=dest)
        # You could add arguments here if you wanted to:
        # parser.add_argument("--foo", default=1.23, help="example argument")

    @classmethod
    def from_argparse_args(cls, args: Namespace):
        """Create an instance of this class from the parsed arguments."""
        # Retrieve the parsed arguments:
        dest = camel_case(cls.__qualname__)
        method: CustomMethod = getattr(args, dest)
        # You could retrieve other arguments like so:
        # foo: int = args.foo
        return method


def demo_manual():
    """Apply the custom method to a Setting, creating both manually in code."""
    # Create any Setting from the tree:
    from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSLSetting

    # setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=5)  # SL
    setting = TaskIncrementalRLSetting(  # RL
        dataset="cartpole",
        train_task_schedule={
            0: {"gravity": 10, "length": 0.5},
            5000: {"gravity": 10, "length": 1.0},
        },
        train_max_steps=10_000,
    )

    ## Create the BaseMethod:
    config = Config(debug=True)
    trainer_options = TrainerConfig(max_epochs=1)
    hparams = BaseModel.HParams()
    base_method = BaseMethod(hparams=hparams, config=config, trainer_options=trainer_options)

    ## Get the results of the baseline method:
    base_results = setting.apply(base_method, config=config)

    ## Create the CustomMethod:
    config = Config(debug=True)
    trainer_options = TrainerConfig(max_epochs=1)
    hparams = CustomizedBaselineModel.HParams()
    new_method = CustomMethod(hparams=hparams, config=config, trainer_options=trainer_options)

    ## Get the results for the 'improved' method:
    new_results = setting.apply(new_method, config=config)

    print(f"\n\nComparison: BaseMethod vs CustomMethod")
    print("\n BaseMethod results: ")
    print(base_results.summary())

    print("\n CustomMethod results: ")
    print(new_results.summary())


def demo_command_line():
    """Run the same demo as above, but customizing the Setting and Method from
    the command-line.

    NOTE: Remember to uncomment the function call below to use this instead of
    demo_simple!
    """
    ## Create the `Setting` and the `Config` from the command-line, like in
    ## the other examples.
    parser = ArgumentParser(description=__doc__)

    ## Add command-line arguments for any Setting in the tree:
    from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSLSetting

    # parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
    parser.add_arguments(TaskIncrementalRLSetting, dest="setting")
    parser.add_arguments(Config, dest="config")

    # Add the command-line arguments for our CustomMethod (including the
    # arguments for our simple regularization aux task).
    CustomMethod.add_argparse_args(parser, dest="method")

    args = parser.parse_args()

    setting: ClassIncrementalSetting = args.setting
    config: Config = args.config

    # Create the BaseMethod:
    base_method = BaseMethod.from_argparse_args(args, dest="method")
    # Get the results of the BaseMethod:
    base_results = setting.apply(base_method, config=config)

    ## Create the CustomMethod:
    new_method = CustomMethod.from_argparse_args(args, dest="method")
    # Get the results for the CustomMethod:
    new_results = setting.apply(new_method, config=config)

    print(f"\n\nComparison: BaseMethod vs CustomMethod:")
    print(base_results.summary())
    print(new_results.summary())


if __name__ == "__main__":
    demo_manual()
    # demo_command_line()


================================================
FILE: examples/advanced/continual_rl_demo.py
================================================
import sys

# This "hack" is required so we can run `python examples/continual_rl_demo.py`
sys.path.extend([".", ".."])
from sequoia.methods.stable_baselines3_methods import A2CMethod, DQNMethod
from sequoia.settings import (
    ContinualRLSetting,
    IncrementalRLSetting,
    RLSetting,
    TaskIncrementalRLSetting,
)

if __name__ == "__main__":
    task_schedule = {
        0: {"gravity": 10, "length": 0.2},
        1000: {"gravity": 100, "length": 1.2},
        2000: {"gravity": 10, "length": 0.2},
    }
    setting = ContinualRLSetting(
        # setting = IncrementalRLSetting(
        # setting = TaskIncrementalRLSetting(
        # setting = RLSetting(
        dataset="CartPole-v1",
        train_max_steps=2000,
        train_task_schedule=task_schedule,
    )
    # Create the method to use here:
    # NOTE: The DQN method doesn't seem to work nearly as well as A2C.
    # method = DQNMethod(train_steps_per_task=1_000)
    method = A2CMethod(train_steps_per_task=1_000)
    # You could change the hyper-parameters of the method too:
    # method.hparams.buffer_size = 100

    results = setting.apply(method)
    print(results.summary())


================================================
FILE: examples/advanced/ewc_in_rl.py
================================================
""" Example of how to add a simplified regularization method to algos from
stable-baseline-3.
"""
from collections import deque
from copy import deepcopy
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Type, TypeVar, Union

import gym
import torch
from nngeometry.generator.jacobian import Jacobian
from nngeometry.layercollection import LayerCollection
from nngeometry.object.pspace import PMatAbstract, PMatDiag, PMatKFAC, PVector
from simple_parsing import choice
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.policies import BasePolicy
from torch import Tensor
from torch.utils.data import DataLoader, TensorDataset

from sequoia.methods import register_method
from sequoia.methods.stable_baselines3_methods import StableBaselines3Method
from sequoia.methods.stable_baselines3_methods.policy_wrapper import PolicyWrapper
from sequoia.settings import TaskIncrementalRLSetting
from sequoia.settings.base import Actions, Environment, Method, Observations
from sequoia.utils.utils import dict_intersection
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)

Policy = TypeVar("Policy", bound=BasePolicy)


class NormRegularizer(PolicyWrapper[Policy]):
    """A Wrapper class that adds a `on_task_switch` and a `ewc_loss` method to
    an nn.Module (in this particular case, a Policy from SB3.)

    By subclassing PolicyWrapper, this is able to leverage some 'hooks' into the
    optimizer of the policy.
    """

    def __init__(self: Policy, *args, reg_coefficient: float = 1.0, ewc_p_norm: int = 2, **kwargs):
        super().__init__(*args, **kwargs)
        self.reg_coefficient = reg_coefficient
        self.ewc_p_norm = ewc_p_norm

        self.previous_model_weights: Dict[str, Tensor] = {}

        self._previous_task: Optional[int] = None
        self._n_switches: int = 0

    def on_task_switch(self: Policy, task_id: Optional[int], *args, **kwargs) -> None:
        """Executed when the task switches (to either a known or unknown task)."""
        logger.info(f"On task switch called: task_id={task_id}")
        if self._previous_task is None and self._n_switches == 0 and not task_id:
            logger.info("Starting the first task, no EWC update.")
        elif task_id is None or task_id != self._previous_task:
            # NOTE: We also switch between unknown tasks.
            logger.info(
                f"Switching tasks: {self._previous_task} -> {task_id}: "
                f"Updating the EWC 'anchor' weights."
            )
            self._previous_task = task_id
            self.previous_model_weights.clear()
            self.previous_model_weights.update(
                deepcopy({k: v.detach() for k, v in self.named_parameters()})
            )
        self._n_switches += 1

    def get_loss(self: Policy) -> Union[float, Tensor]:
        """This will get called before the call to `policy.optimizer.step()`
        from within the `train` method of the algos from stable-baselines3.

        You can use this to return some kind of loss tensor to use.
        """
        return self.reg_coefficient * self.ewc_loss()

    def after_zero_grad(self: Policy):
        """Called after `self.policy.optimizer.zero_grad()` in the training
        loop of the SB3 algos.
        """
        # Backpropagate the loss here, by default, so that any grad clipping
        # also affects the grads of the loss, for instance.
        wrapper_loss = self.get_loss()
        if isinstance(wrapper_loss, Tensor) and wrapper_loss != 0.0 and wrapper_loss.requires_grad:
            logger.info(f"{type(self).__name__} loss: {wrapper_loss.item()}")
            wrapper_loss.backward(retain_graph=True)

    def before_optimizer_step(self: Policy):
        """Called before `self.policy.optimizer.step()` in the training
        loop of the SB3 algos.
        """

    def ewc_loss(self: Policy) -> Union[float, Tensor]:
        """Gets an 'ewc-like' regularization loss.

        NOTE: This is a simplified version of EWC where the loss is the P-norm
        between the current weights and the weights as they were on the begining
        of the task.
        """
        if self._previous_task is None:
            # We're in the first task: do nothing.
            return 0.0

        old_weights: Dict[str, Tensor] = self.previous_model_weights
        new_weights: Dict[str, Tensor] = dict(self.named_parameters())

        loss = 0.0
        for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
            loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm)

        return loss


class EWCPolicy(NormRegularizer):
    """A Wrapper class that adds a `on_task_switch` and a `ewc_loss` method to
    an nn.Module (in this particular case, a Policy from SB3) and implements the EWC method.
    """

    def __init__(
        self: Policy,
        *args,
        reg_coefficient: float = 1.0,
        ewc_p_norm: int = 2,
        fim_representation: PMatAbstract = PMatDiag,
        **kwargs,
    ):
        super().__init__(*args, reg_coefficient, ewc_p_norm, **kwargs)
        self.FIMs: List[PMatAbstract] = None
        self.previous_model_weights: PVector = None
        self.FIM_representation = fim_representation

    def consolidate(self, new_fims: List[PMatAbstract], task: int) -> None:
        """
        Consolidates the previous FIMs and the new onces.
        See online EWC in https://arxiv.org/pdf/1805.06370.pdf.
        """
        if self.FIMs is None:
            self.FIMs = new_fims
            return
        assert len(new_fims) == len(self.FIMs)
        for i, (fim_previous, fim_new) in enumerate(zip(self.FIMs, new_fims)):
            if fim_previous is None:
                self.FIMs[i] = fim_new
            else:
                # consolidate the FIMs
                self.FIMs[i] = EWCPolicy._consolidate_fims(fim_previous, fim_new, task)

    @staticmethod
    def _consolidate_fims(
        fim_previous: PMatAbstract, fim_new: PMatAbstract, task: int
    ) -> PMatAbstract:
        # consolidate the fim_new into fim_previous in place
        if isinstance(fim_new, PMatDiag):
            fim_previous.data = ((deepcopy(fim_new.data)) + fim_previous.data * (task)) / (task + 1)

        elif isinstance(fim_new.data, dict):
            for (n, p), (n_, p_) in zip(fim_previous.data.items(), fim_new.data.items()):
                for item, item_ in zip(p, p_):
                    item.data = ((item.data * (task)) + deepcopy(item_.data)) / (task + 1)
        return fim_previous

    def on_task_switch(
        self: Policy, task_id: Optional[int], dataloader: DataLoader, method: str = "a2c"
    ) -> None:
        """Executed when the task switches (to either a known or unknown task)."""
        logger.info(f"On task switch called: task_id={task_id}")
        if self._previous_task is None and self._n_switches == 0 and not task_id:
            self._previous_task = task_id
            logger.info("Starting the first task, no EWC update.")
            self._n_switches += 1
        elif task_id is None or self._previous_task is None or task_id > self._previous_task:
            # we dont want to go here at test tiem
            # NOTE: We also switch between unknown tasks.
            logger.info(
                f"Switching tasks: {self._previous_task} -> {task_id}: "
                f"Updating the EWC 'anchor' weights."
            )
            self._previous_task = task_id
            self.previous_model_weights = PVector.from_model(self).clone().detach()

            # TODO: keepng to FIMs might be not the optimal way of doing this
            new_fims = []
            if method == "dqn":
                function = self.q_net
                n_output = self.action_space.n
            else:
                function = self
                n_output = 1
            # TODO: Import this FIM function, from wherever it was defined.
            new_fim = FIM(
                model=self,
                loader=dataloader,
                representation=self.FIM_representation,
                n_output=n_output,
                variant=method,
                function=function,
                device=self.device.type,
            )
            new_fims.append(new_fim)
            if method == "a2c":
                # apply EWC also to the value net
                new_fim_critic = FIM(
                    model=self,
                    loader=dataloader,
                    representation=self.FIM_representation,
                    n_output=1,
                    variant="regression",
                    function=lambda *x: self(x[0])[1],
                    device=self.device.type,
                )
                new_fims.append(new_fim_critic)
            self.consolidate(new_fims, task=self._previous_task)
            self._n_switches += 1

    def ewc_loss(self: Policy) -> Union[float, Tensor]:
        """Gets an 'ewc-like' regularization loss."""
        regularizer = 0.0
        if self._previous_task is None or self.reg_coefficient == 0 or self.FIMs is None:
            # We're in the first task: do nothing.
            return regularizer
        v_current = PVector.from_model(self)
        for fim in self.FIMs:
            regularizer += fim.vTMv(v_current - self.previous_model_weights)
        return regularizer


from sequoia.methods.stable_baselines3_methods import (
    A2CModel,
    DDPGModel,
    DQNModel,
    PPOModel,
    SACModel,
    TD3Model,
)


@register_method
@dataclass
class ExampleRegularizationMethod(StableBaselines3Method):
    Model: ClassVar[Type[BaseAlgorithm]]

    # You could use any of these 'backbones' from SB3:
    Model = A2CModel  # Works great! (fastest)
    # Model = PPOModel  # Works great! (somewhat fast)
    # Model = SACModel  # Works (seems to be quite a bit slower).

    # These don't yet work, they have the same error, which seems to be
    # related to the action space being Discrete:
    #     stable_baselines3/td3/td3.py", line 143, in train
    #     noise = replay_data.actions.clone().data.normal_(0, self.target_policy_noise)
    # RuntimeError: "normal_kernel_cuda" not implemented for 'Long'
    # Model = TD3Model  # TODO
    # Model = DDPGModel  # TODO
    # Model = DQNModel  # Doesn't work: predictions have more than one value?!

    # Coefficient for the EWC-like loss.
    reg_coefficient: float = 1.0
    # norm of the 'distance' used in the ewc-like loss above.
    ewc_p_norm: int = 2

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm:
        # Create the model, as usual:
        model = super().create_model(train_env, valid_env)
        # 'Wrap' the algorithm's policy with the EWC wrapper.
        model = NormRegularizer.wrap_algorithm(
            model,
            reg_coefficient=self.reg_coefficient,
            ewc_p_norm=self.ewc_p_norm,
        )
        return model

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        if self.model:
            self.model.policy.on_task_switch(task_id)


@register_method
@dataclass
class EWCExampleMethod(StableBaselines3Method):
    Model: ClassVar[Type[BaseAlgorithm]]
    # Model = A2CModel  # Works great! (fastest)
    Model = DQNModel  # Works great! (fastest)
    # Coefficient for the EWC-like loss.
    reg_coefficient: float = 1.0
    # Number of observations to use for FIM calculation
    total_steps_fim: int = 1000
    # Fisher information type  (diagonal or block diagobnal)
    fim_representation: PMatAbstract = choice(
        {"diagonal": PMatDiag, "block_diagonal": PMatKFAC}, default=PMatKFAC
    )

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm:
        # Create the model, as usual:
        model = super().create_model(train_env, valid_env)
        # 'Wrap' the algorithm's policy with the EWC wrapper.
        model = EWCPolicy.wrap_algorithm(
            model,
            reg_coefficient=self.reg_coefficient,
            fim_representation=self.fim_representation,
        )
        return model

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.
        """
        if self.model:
            # create onbservation collection to use for FIM calculation
            observation_collection = []
            while len(observation_collection) < self.total_steps_fim:
                state = self.model.env.reset()
                for _ in range(1000):
                    action = self.get_actions(Observations(state), self.model.env.action_space)
                    state, _, done, _ = self.model.env.step(action)
                    observation_collection.append(torch.tensor(state).to(self.model.device))
                    if done:
                        break
            dataloader = DataLoader(
                TensorDataset(torch.cat(observation_collection)), batch_size=100, shuffle=False
            )
            if "a2c" in str(self.model.__class__):
                rl_method = "a2c"
            elif "dqn" in str(self.model.__class__):
                rl_method = "dqn"
            else:
                raise NotImplementedError
            self.model.policy.on_task_switch(task_id, dataloader, method=rl_method)


if __name__ == "__main__":
    setting = TaskIncrementalRLSetting(
        dataset="cartpole",
        nb_tasks=2,
        train_task_schedule={
            0: {"gravity": 10, "length": 0.3},
            1000: {"gravity": 10, "length": 0.5},  # second task is 'easier' than the first one.
        },
        train_max_steps=2000,
    )
    method = EWCExampleMethod(reg_coefficient=0.0)
    results_without_reg = setting.apply(method)
    method = EWCExampleMethod(reg_coefficient=100)
    results_with_reg = setting.apply(method)
    print("-" * 40)
    print("WITHOUT EWC ")
    print(results_without_reg.summary())
    print(f"With EWC (coefficient={method.reg_coefficient}):")
    print(results_with_reg.summary())


================================================
FILE: examples/advanced/hat_demo.py
================================================
import sys
from argparse import Namespace
from dataclasses import dataclass
from typing import Dict, NamedTuple, Optional, Tuple

import gym
import numpy as np
import torch
import tqdm
from gym import Space, spaces
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor

from sequoia.common import Config
from sequoia.common.spaces import Image
from sequoia.methods import register_method
from sequoia.settings import Environment, Method
from sequoia.settings.sl import TaskIncrementalSLSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental import Actions, Observations, Rewards


class Masks(NamedTuple):
    """Named tuple for the masked tensors created in the HATNet."""

    gc1: Tensor
    gc2: Tensor
    gc3: Tensor
    gfc1: Tensor
    gfc2: Tensor


class HatNet(torch.nn.Module):
    """
    @inproceedings{serra2018overcoming,
      title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
      author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
      booktitle={International Conference on Machine Learning},
      pages={4548--4557},
      year={2018}
    }

    The model is where the model weights are initialized.
    Just like a classic PyTorch, here the different layers and components of the model are defined
    """

    def __init__(self, image_space: Image, n_classes_per_task: Dict[int, int], s_hat: int = 50):
        super().__init__()

        ncha = image_space.channels
        size = image_space.width
        self.n_classes_per_task = n_classes_per_task
        self.s_hat = s_hat

        self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8)
        s = compute_conv_output_size(size, size // 8)
        s //= 2
        self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10)
        s = compute_conv_output_size(s, size // 10)
        s //= 2
        self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2)
        s = compute_conv_output_size(s, 2)
        s //= 2
        self.smid = s
        self.maxpool = torch.nn.MaxPool2d(2)
        self.relu = torch.nn.ReLU()

        self.drop1 = torch.nn.Dropout(0.2)
        self.drop2 = torch.nn.Dropout(0.5)
        self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, 2048)
        self.fc2 = torch.nn.Linear(2048, 2048)
        self.output_layers = torch.nn.ModuleList()

        n_tasks = len(self.n_classes_per_task)
        # TODO: (@lebrice) Here I'm 'fixing' this, by making it so each output head has
        # as many outputs as there are classes in total. It's not super efficient, but
        # it should work.
        total_classes = sum(self.n_classes_per_task.values())
        for task_index, n_classes_in_task in self.n_classes_per_task.items():
            self.output_layers.append(torch.nn.Linear(2048, total_classes))

        self.gate = torch.nn.Sigmoid()
        # All embedding stuff should start with 'e'
        self.ec1 = torch.nn.Embedding(n_tasks, 64)
        self.ec2 = torch.nn.Embedding(n_tasks, 128)
        self.ec3 = torch.nn.Embedding(n_tasks, 256)
        self.efc1 = torch.nn.Embedding(n_tasks, 2048)
        self.efc2 = torch.nn.Embedding(n_tasks, 2048)

        self.flatten = torch.nn.Flatten()

        self.loss = torch.nn.CrossEntropyLoss()
        self.current_task: Optional[int] = 0

    def forward(self, observations: TaskIncrementalSLSetting.Observations) -> Tuple[Tensor, Masks]:
        observations.as_list_of_tuples()
        x = observations.x
        t = observations.task_labels
        # BUG: This won't work if task_labels is None (which is the case at
        # test-time in the ClassIncrementalSetting)
        masks = self.mask(t, s_hat=self.s_hat)
        gc1, gc2, gc3, gfc1, gfc2 = masks
        # Gated
        h = self.maxpool(self.drop1(self.relu(self.c1(x))))
        h = h * gc1.unsqueeze(2).unsqueeze(3)
        h = self.maxpool(self.drop1(self.relu(self.c2(h))))
        h = h * gc2.unsqueeze(2).unsqueeze(3)
        h = self.maxpool(self.drop2(self.relu(self.c3(h))))
        h = h * gc3.unsqueeze(2).unsqueeze(3)
        h = self.flatten(h)
        h = self.drop2(self.relu(self.fc1(h)))
        h = h * gfc1.expand_as(h)
        h = self.drop2(self.relu(self.fc2(h)))
        h = h * gfc2.expand_as(h)

        # Each batch can have elements of more than one Task (in test)
        # In Task Incremental Learning, each task have it own classification head.
        y: Optional[Tensor] = None
        task_masks = {}
        for task_id in set(t.tolist()):
            task_mask = t == task_id
            task_masks[task_id] = task_mask

            y_pred_t = self.output_layers[task_id](h.clone())
            if y is None:
                y = y_pred_t
            else:
                y[task_mask] = y_pred_t[task_mask]
        assert y is not None
        return y, masks

    def mask(self, t: Tensor, s_hat: float) -> Masks:
        gc1 = self.gate(s_hat * self.ec1(t))
        gc2 = self.gate(s_hat * self.ec2(t))
        gc3 = self.gate(s_hat * self.ec3(t))
        gfc1 = self.gate(s_hat * self.efc1(t))
        gfc2 = self.gate(s_hat * self.efc2(t))
        return Masks(gc1, gc2, gc3, gfc1, gfc2)

    def shared_step(
        self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
    ) -> Tuple[Tensor, Dict]:
        """Shared step used for both training and validation.

        Parameters
        ----------
        batch : Tuple[Observations, Optional[Rewards]]
            Batch containing Observations, and optional Rewards. When the Rewards are
            None, it means that we'll need to provide the Environment with actions
            before we can get the Rewards (e.g. image labels) back.

            This happens for example when being applied in a Setting which cares about
            sample efficiency or training performance, for example.

        environment : Environment
            The environment we're currently interacting with. Used to provide the
            rewards when they aren't already part of the batch (as mentioned above).

        Returns
        -------
        Tuple[Tensor, Dict]
            The Loss tensor, and a dict of metrics to be logged.
        """
        # Since we're training on a Passive environment, we will get both observations
        # and rewards, unless we're being evaluated based on our training performance,
        # in which case we will need to send actions to the environments before we can
        # get the corresponding rewards (image labels) back.
        observations: Observations = batch[0]
        rewards: Optional[Rewards] = batch[1]

        # Get the predictions:
        logits, _ = self(observations)
        y_pred = logits.argmax(-1)

        if rewards is None:
            # If the rewards in the batch were None, it means we're expected to give
            # actions before we can get rewards back from the environment.
            # This happens when the Setting is monitoring our training performance.
            rewards = environment.send(Actions(y_pred))

        assert rewards is not None
        image_labels = rewards.y

        loss = self.loss(logits, image_labels)

        accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
        metrics_dict = {"accuracy": accuracy}
        return loss, metrics_dict


def compute_conv_output_size(
    Lin: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1
) -> int:
    return int(np.floor((Lin + 2 * padding - dilation * (kernel_size - 1) - 1) / float(stride) + 1))


@register_method
class HatDemoMethod(Method, target_setting=TaskIncrementalSLSetting):
    """
    Here we implement the method according to the characteristics and methodology of the current proposal.
    It should be as much as possible agnostic to the model and setting we are going to use.

    The method proposed can be specific to a setting to make comparisons easier.
    Here what we control is the model's training process, given a setting that delivers data in a certain way.
    """

    @dataclass
    class HParams:
        """Hyper-parameters of the Settings."""

        # Learning rate of the optimizer.
        learning_rate: float = 0.001
        # Batch size
        batch_size: int = 128
        # weight/importance of the task embedding to the gate function
        s_hat: float = 50.0
        # Maximum number of training epochs per task
        max_epochs_per_task: int = 2

    def __init__(self, hparams: HParams = None):
        self.hparams: HatDemoMethod.HParams = hparams or self.HParams()

        # We will create those when `configure` will be called, before training.
        self.model: HatNet
        self.optimizer: torch.optim.Optimizer

    def configure(self, setting: TaskIncrementalSLSetting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        setting.batch_size = self.hparams.batch_size
        assert (
            setting.increment == setting.test_increment
        ), "Assuming same number of classes per task for training and testing."
        n_classes_per_task = {
            i: setting.num_classes_in_task(i, train=True) for i in range(setting.nb_tasks)
        }
        image_space: Image = setting.observation_space["x"]
        self.model = HatNet(
            image_space=image_space,
            n_classes_per_task=n_classes_per_task,
            s_hat=self.hparams.s_hat,
        )
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=self.hparams.learning_rate,
        )

    def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
        """
        Train loop

        Different Settings can return elements from tasks in an other  way,
        be it class incremental, task incremental, etc.

        Batch can have information about en environment, rewards, input, task labels, etc.
        And we call the forward training function of our method, independent of the settings
        """

        # configure() will have been called by the setting before we get here,

        best_val_loss = inf
        best_epoch = 0
        for epoch in range(self.hparams.max_epochs_per_task):
            self.model.train()
            print(f"Starting epoch {epoch}")
            # Training loop:
            with tqdm.tqdm(train_env) as train_pbar:
                postfix = {}
                train_pbar.set_description(f"Training Epoch {epoch}")
                for i, batch in enumerate(train_pbar):
                    loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=train_env,
                    )
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    postfix.update(metrics_dict)
                    train_pbar.set_postfix(postfix)

            # Validation loop:
            self.model.eval()
            torch.set_grad_enabled(False)
            with tqdm.tqdm(valid_env) as val_pbar:
                postfix = {}
                val_pbar.set_description(f"Validation Epoch {epoch}")
                epoch_val_loss = 0.0

                for i, batch in enumerate(val_pbar):
                    batch_val_loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=valid_env,
                    )
                    epoch_val_loss += batch_val_loss
                    postfix.update(metrics_dict, val_loss=epoch_val_loss)
                    val_pbar.set_postfix(postfix)
            torch.set_grad_enabled(True)

            if epoch_val_loss < best_val_loss:
                best_val_loss = epoch_val_loss
                best_epoch = i

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        """Get a batch of predictions (aka actions) for these observations."""
        with torch.no_grad():
            logits, _ = self.model(observations)
        # Get the predicted classes
        y_pred = logits.argmax(dim=-1)
        return self.target_setting.Actions(y_pred)

    def on_task_switch(self, task_id: Optional[int]):
        # This method gets called if task boundaries are known in the current
        # setting. Furthermore, if task labels are available, task_id will be
        # the index of the new task. If not, task_id will be None.
        # TODO: Does this method actually work when task_id is None?
        self.model.current_task = task_id

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser) -> None:
        parser.add_arguments(cls.HParams, dest="hparams")
        # You can also add arguments as usual:
        # parser.add_argument("--foo", default=123)

    @classmethod
    def from_argparse_args(cls, args: Namespace) -> "HatDemoMethod":
        hparams: HatDemoMethod.HParams = args.hparams
        # foo: int = args.foo
        method = cls(hparams=hparams)
        return method


if __name__ == "__main__":
    # Example: Evaluate a Method on a single CL setting:
    parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)

    """
    We must define 3 main components:
     1.- Setting: It is the continual learning scenario that we are working, SL or RL, TI or CI
                  Each settings has it own parameters that can be customized.
     2.- Model: Is the parameters and layers of the model, just like in PyTorch.
                We can use a predefined model or create your own
     3.- Method: It is how we are going to use what the settings give us to train our model.
                 Same as before, we can define our own or use pre-defined Methods.
    """
    ## Add arguments for the Method, the Setting, and the Config.
    ## (Config contains options like the log_dir, the data_dir, etc.)
    HatDemoMethod.add_argparse_args(parser, dest="method")
    parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
    parser.add_arguments(Config, "config")

    args = parser.parse_args()

    ## Create the Method from the args, and extract the Setting, and the Config:
    method: HatDemoMethod = HatDemoMethod.from_argparse_args(args, dest="method")
    setting: TaskIncrementalSLSetting = args.setting
    config: Config = args.config

    ## Apply the method to the setting, optionally passing in a Config,
    ## producing Results.
    results = setting.apply(method, config=config)
    print(results.summary())
    print(f"objective: {results.objective}")


================================================
FILE: examples/advanced/hparam_tuning.py
================================================
"""Runs a hyper-parameter tuning sweep, using Orion for HPO and wandb for visualization. 

# PREREQUISITES:


1.  (Optional): If you want to run the sweep on the monsterkong env:
    At the time of writing, the monsterkong repo is private. Once the challenge is out,
    it will most probably be made public. In the meantime, you'll need to ask
    @mattriemer for access to the MonsterKong_examples repo.

    ```
    pip install -e .[rl]
    ```

2.  Install the repo, along with the optional dependencies for Hyper-Parameter
    Optimization (HPO):

    ```console
    pip install -e .[hpo]
    ```

    NOTE: You can also fuse the two steps above with `pip install -e .[rl,hpo]`

3.  (Optional) Setup a database to hold the hyper-parameter configurations, following
    the [Orion database configuration documentation](https://orion.readthedocs.io/en/stable/install/database.html)

    The quickest way to get this setup is to run the `orion db setup` wizard, entering
    "pickleddb" as the database type:

    ```console
    $ orion db setup
    Enter the database type:  (default: mongodb) pickleddb
    Enter the database name:  (default: test) 
    Enter the database host:  (default: localhost)
    Default configuration file will be saved at: 
    /home/<your username>/.config/orion.core/orion_config.yaml
    ```

"""
import wandb
from sequoia.common import Config
from sequoia.methods.base_method import BaseMethod
from sequoia.settings import Results, Setting, TraditionalSLSetting
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


if __name__ == "__main__":
    from simple_parsing import ArgumentParser

    ## Create the Setting:
    from sequoia.settings import RLSetting

    setting = RLSetting(dataset="monsterkong")

    # from sequoia.settings import TaskIncrementalSLSetting
    # setting = TaskIncrementalSLSetting(dataset="cifar10")

    ## Create the BaseMethod:
    # Option 1: Create the method manually:
    # method = BaseMethod()

    # Option 2: From the command-line:
    method, unused_args = BaseMethod.from_known_args()  # allow unused args.
    # parser = ArgumentParser(description=__doc__)
    # BaseMethod.add_argparse_args(parser, dest="method")
    # args, unused_args = parser.parse_known_args()
    # method: BaseMethod = BaseMethod.from_argparse_args(args, dest="method")

    # Search space for the Hyper-Parameter optimization algorithm.
    # NOTE: This is just a copy of the spaces that are auto-generated from the fields of
    # the `BaseModel.HParams` class. You can change those as you wish though.
    search_space = {
        "learning_rate": "loguniform(1e-06, 1e-02, default_value=0.001)",
        "weight_decay": "loguniform(1e-12, 1e-03, default_value=1e-06)",
        "optimizer": "choices(['sgd', 'adam', 'rmsprop'], default_value='adam')",
        "encoder": "choices({'resnet18': 0.5, 'simple_convnet': 0.5}, default_value='resnet18')",
        "output_head": {
            "activation": "choices(['relu', 'tanh', 'elu', 'gelu', 'relu6'], default_value='tanh')",
            "dropout_prob": "uniform(0, 0.8, default_value=0.2)",
            "gamma": "uniform(0.9, 0.999, default_value=0.99)",
            "normalize_advantages": "choices([True, False])",
            "actor_loss_coef": "uniform(0.1, 1, default_value=0.5)",
            "critic_loss_coef": "uniform(0.1, 1, default_value=0.5)",
            "entropy_loss_coef": "uniform(0, 1, discrete=True, default_value=0)",
        },
    }
    best_hparams, best_results = method.hparam_sweep(
        setting, search_space=search_space, experiment_id="123"
    )

    print(f"Best hparams: {best_hparams}, best perf: {best_results}")
    # results = setting.apply(method, config=Config(debug=True))


================================================
FILE: examples/advanced/pnn/__init__.py
================================================


================================================
FILE: examples/advanced/pnn/layers.py
================================================
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

"""
Based on https://github.com/TomVeniat/ProgressiveNeuralNetworks.pytorch
"""


class PNNConvLayer(nn.Module):
    def __init__(self, col, depth, n_in, n_out, kernel_size=3):
        super(PNNConvLayer, self).__init__()
        self.col = col
        self.layer = nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1)

        self.u = nn.ModuleList()
        if depth > 0:
            self.u.extend(
                [nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) for _ in range(col)]
            )

    def forward(self, inputs):
        if not isinstance(inputs, list):
            inputs = [inputs]

        cur_column_out = self.layer(inputs[-1])
        prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]

        return F.relu(cur_column_out + sum(prev_columns_out))


class PNNLinearBlock(nn.Module):
    def __init__(self, col: int, depth: int, n_in: int, n_out: int):
        super(PNNLinearBlock, self).__init__()
        self.layer = nn.Linear(n_in, n_out)

        self.u = nn.ModuleList()
        if depth > 0:
            self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)])

    def forward(self, inputs):
        if not isinstance(inputs, list):
            inputs = [inputs]

        cur_column_out = self.layer(inputs[-1])
        prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]

        return F.relu(cur_column_out + sum(prev_columns_out))


================================================
FILE: examples/advanced/pnn/model_rl.py
================================================
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

from .layers import PNNConvLayer, PNNLinearBlock


class PnnA2CAgent(nn.Module):
    """
    @article{rusu2016progressive,
      title={Progressive neural networks},
      author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
      journal={arXiv preprint arXiv:1606.04671},
      year={2016}
    }
    """

    def __init__(self, arch="mlp", hidden_size=256):
        super(PnnA2CAgent, self).__init__()
        self.columns_actor = nn.ModuleList([])
        self.columns_critic = nn.ModuleList([])
        self.columns_conv = nn.ModuleList([])
        self.arch = arch
        self.hidden_size = hidden_size

        # Original size 3 x 400 x 600
        self.transformation = transforms.Compose(
            [
                transforms.ToPILImage(),
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
            ]
        )

    def forward(self, observations):
        assert (
            self.columns_actor
        ), "PNN should at least have one column (missing call to `new_task` ?)"
        t = observations.task_labels

        if self.arch == "mlp":
            x = torch.from_numpy(observations.x).unsqueeze(0).float()
            inputs_critic = [c[1](c[0](x)) for c in self.columns_critic]
            inputs_actor = [c[1](c[0](x)) for c in self.columns_actor]

            outputs_critic = []
            outputs_actor = []
            for i, column in enumerate(self.columns_critic):
                outputs_critic.append(column[2](inputs_critic[: i + 1]))
                outputs_actor.append(self.columns_actor[i][2](inputs_actor[: i + 1]))

            ind_depth = 3

        else:
            x = self.transfor_img(observations.x).unsqueeze(0).float()
            inputs = [c[1](c[0](x)) for c in self.columns_conv]

            outputs = []
            for i, column in enumerate(self.columns_conv):
                outputs.append(column[3](column[2](inputs[: i + 1])))

            inputs = outputs
            outputs = []
            for i, column in enumerate(self.columns_conv):
                outputs.append(column[5](column[4](inputs[: i + 1])))

            inputs_critic = [c[6](outputs[i]).view(1, -1) for i, c in enumerate(self.columns_conv)]
            inputs_actor = inputs_critic[:]

            outputs_critic = []
            outputs_actor = []
            for i, column in enumerate(self.columns_critic):
                outputs_critic.append(column[0](inputs_critic[: i + 1]))
                outputs_actor.append(self.columns_actor[i][0](inputs_actor[: i + 1]))

            ind_depth = 1

        critic = []
        for i, column in enumerate(self.columns_critic):
            critic.append(column[ind_depth](outputs_critic[i]))

        actor = []
        for i, column in enumerate(self.columns_actor):
            actor.append(F.softmax(column[ind_depth](outputs_actor[i]), dim=1))

        return critic[t], actor[t]

    def new_task(self, device, num_inputs, num_actions=5):
        task_id = len(self.columns_actor)

        if self.arch == "conv":
            sizes = [num_inputs, 32, 64, self.hidden_size]
            modules_conv = nn.Sequential()

            modules_conv.add_module("Conv1", PNNConvLayer(task_id, 0, sizes[0], sizes[1]))
            modules_conv.add_module("MaxPool1", nn.MaxPool2d(3))
            modules_conv.add_module("Conv2", PNNConvLayer(task_id, 1, sizes[1], sizes[2]))
            modules_conv.add_module("MaxPool2", nn.MaxPool2d(3))
            modules_conv.add_module("Conv3", PNNConvLayer(task_id, 2, sizes[2], sizes[3]))
            modules_conv.add_module("MaxPool3", nn.MaxPool2d(3))
            modules_conv.add_module("globavgpool2d", nn.AdaptiveAvgPool2d((1, 1)))
            self.columns_conv.append(modules_conv)

        modules_actor = nn.Sequential()
        modules_critic = nn.Sequential()

        if self.arch == "mlp":
            modules_actor.add_module("linAc1", nn.Linear(num_inputs, self.hidden_size))
            modules_actor.add_module("relAc", nn.ReLU(inplace=True))
        modules_actor.add_module(
            "linAc2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
        )
        modules_actor.add_module("linAc3", nn.Linear(self.hidden_size, num_actions))

        if self.arch == "mlp":
            modules_critic.add_module("linCr1", nn.Linear(num_inputs, self.hidden_size))
            modules_critic.add_module("relCr", nn.ReLU(inplace=True))
        modules_critic.add_module(
            "linCr2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
        )
        modules_critic.add_module("linCr3", nn.Linear(self.hidden_size, 1))

        self.columns_actor.append(modules_actor)
        self.columns_critic.append(modules_critic)

        print("Add column of the new task")

    def unfreeze_columns(self):
        for i, c in enumerate(self.columns_actor):
            for params in c.parameters():
                params.requires_grad = True

            for params in self.columns_critic[i].parameters():
                params.requires_grad = True

        for i, c in enumerate(self.columns_conv):
            for params in c.parameters():
                params.requires_grad = True

    def freeze_columns(self, skip=None):
        if skip == None:
            skip = []

        self.unfreeze_columns()

        for i, c in enumerate(self.columns_actor):
            if i not in skip:
                for params in c.parameters():
                    params.requires_grad = False

                for params in self.columns_critic[i].parameters():
                    params.requires_grad = False

        for i, c in enumerate(self.columns_conv):
            if i not in skip:
                for params in c.parameters():
                    params.requires_grad = False

        print("Freeze columns from previous tasks")

    def parameters(self, task_id):
        param = []
        for p in self.columns_critic[task_id].parameters():
            param.append(p)
        for p in self.columns_actor[task_id].parameters():
            param.append(p)

        if len(self.columns_conv) > 0:
            for p in self.columns_conv[task_id].parameters():
                param.append(p)

        return param

    def transfor_img(self, img):
        return self.transformation(img)
        # return lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255.


================================================
FILE: examples/advanced/pnn/model_sl.py
================================================
from typing import Dict, List, Optional, Tuple

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor

from sequoia.settings import Actions, PassiveEnvironment
from sequoia.settings.sl.incremental import Observations, Rewards

from .layers import PNNConvLayer, PNNLinearBlock


class PnnClassifier(nn.Module):
    """
    @article{rusu2016progressive,
      title={Progressive neural networks},
      author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
      journal={arXiv preprint arXiv:1606.04671},
      year={2016}
    }
    """

    def __init__(self, n_layers):
        super().__init__()
        self.n_layers = n_layers
        self.columns = nn.ModuleList([])

        self.loss = torch.nn.CrossEntropyLoss()
        self.device = None
        self.n_tasks = 0
        self.n_classes_per_task: List[int] = []

    def forward(self, observations):
        assert self.columns, "PNN should at least have one column (missing call to `new_task` ?)"
        x = observations.x
        x = torch.flatten(x, start_dim=1)
        labels = observations.task_labels
        # TODO: Debug this:
        inputs = [
            c[0](x) + n_classes_in_task
            for n_classes_in_task, c in zip(self.n_classes_per_task, self.columns)
        ]
        for l in range(1, self.n_layers):
            outputs = []

            for i, column in enumerate(self.columns):
                outputs.append(column[l](inputs[: i + 1]))

            inputs = outputs

        y: Optional[Tensor] = None
        task_masks = {}
        for task_id in set(labels.tolist()):
            task_mask = labels == task_id
            task_masks[task_id] = task_mask

            if y is None:
                y = inputs[task_id]
            else:
                y[task_mask] = inputs[task_id][task_mask]

        assert y is not None, "Can't get prediction in model PNN"
        return y

    # def new_task(self, device, num_inputs, num_actions = 5):
    def new_task(self, device, sizes: List[int]):
        assert len(sizes) == self.n_layers + 1, (
            f"Should have the out size for each layer + input size (got {len(sizes)} "
            f"sizes but {self.n_layers} layers)."
        )
        self.n_tasks += 1
        # TODO: Fix this to use the actual number of classes per task.
        self.n_classes_per_task.append(2)
        task_id = len(self.columns)
        modules = []
        for i in range(0, self.n_layers):
            modules.append(PNNLinearBlock(col=task_id, depth=i, n_in=sizes[i], n_out=sizes[i + 1]))

        new_column = nn.ModuleList(modules).to(device)
        self.columns.append(new_column)
        self.device = device

        print("Add column of the new task")

    def freeze_columns(self, skip=None):
        if skip == None:
            skip = []

        for i, c in enumerate(self.columns):
            for params in c.parameters():
                params.requires_grad = True

        for i, c in enumerate(self.columns):
            if i not in skip:
                for params in c.parameters():
                    params.requires_grad = False

        print("Freeze columns from previous tasks")

    def shared_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        environment: PassiveEnvironment,
    ):
        """Shared step used for both training and validation.

        Parameters
        ----------
        batch : Tuple[Observations, Optional[Rewards]]
            Batch containing Observations, and optional Rewards. When the Rewards are
            None, it means that we'll need to provide the Environment with actions
            before we can get the Rewards (e.g. image labels) back.

            This happens for example when being applied in a Setting which cares about
            sample efficiency or training performance, for example.

        environment : Environment
            The environment we're currently interacting with. Used to provide the
            rewards when they aren't already part of the batch (as mentioned above).

        Returns
        -------
        Tuple[Tensor, Dict]
            The Loss tensor, and a dict of metrics to be logged.
        """
        # Since we're training on a Passive environment, we will get both observations
        # and rewards, unless we're being evaluated based on our training performance,
        # in which case we will need to send actions to the environments before we can
        # get the corresponding rewards (image labels).
        observations: Observations = batch[0].to(self.device)
        rewards: Optional[Rewards] = batch[1]

        # Get the predictions:
        logits = self(observations)
        y_pred = logits.argmax(-1)
        # TODO: PNN is coded for the DomainIncrementalSetting, where the action space
        # is the same for each task.

        # Get the rewards, if necessary:
        if rewards is None:
            rewards = environment.send(Actions(y_pred))

        image_labels = rewards.y.to(self.device)
        # print(logits.size())
        loss = self.loss(logits, image_labels)

        accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
        metrics_dict = {"accuracy": accuracy}
        return loss, metrics_dict

    def parameters(self, task_id):
        return self.columns[task_id].parameters()


================================================
FILE: examples/advanced/pnn/pnn_method.py
================================================
import sys
from argparse import Namespace
from dataclasses import dataclass
from typing import Any, Dict, Optional, Tuple, Union

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import tqdm
from gym import spaces
from gym.spaces import Box
from numpy import inf
from scipy.signal import lfilter
from simple_parsing import ArgumentParser
from torchvision import transforms

from examples.advanced.pnn.model_rl import PnnA2CAgent
from examples.advanced.pnn.model_sl import PnnClassifier
from sequoia import Environment
from sequoia.common import Config
from sequoia.common.spaces import Image
from sequoia.common.transforms.utils import is_image
from sequoia.settings import Actions, Method, Observations, Rewards, Setting
from sequoia.settings.assumptions import IncrementalAssumption
from sequoia.settings.rl import ActiveEnvironment, RLSetting, TaskIncrementalRLSetting
from sequoia.settings.sl import (
    DomainIncrementalSLSetting,
    PassiveEnvironment,
    SLSetting,
    TaskIncrementalSLSetting,
)


class PnnMethod(Method, target_setting=Setting):
    """
    Here we implement the PNN Method according to the characteristics and methodology of
    the current proposal.  It should be as much as possible agnostic to the model and
    setting we are going to use.

    The method proposed can be specific to a setting to make comparisons easier.
    Here what we control is the model's training process, given a setting that delivers
    data in a certain way.
    """

    @dataclass
    class HParams:
        """Hyper-parameters of the Pnn method."""

        # Learning rate of the optimizer. Defauts to 0.0001 when in SL.
        learning_rate: float = 2e-4
        num_steps: int = 200  # (only applicable in RL settings.)
        # Discount factor (Only used in RL settings).
        gamma: float = 0.99
        # Number of hidden units (only used in RL settings.)
        hidden_size: int = 256
        # Batch size in SL, and number of parallel environments in RL.
        # Defaults to None in RL, and 32 when in SL.
        batch_size: Optional[int] = None
        # Maximum number of training epochs per task. (only used in SL Settings)
        max_epochs_per_task: int = 2

    def __init__(self, hparams: HParams = None):
        # We will create those when `configure` will be called, before training.
        self.config: Optional[Config] = None
        self.task_id: Optional[int] = 0
        self.hparams: Optional[PnnMethod.HParams] = hparams
        self.model: Union[PnnA2CAgent, PnnClassifier]
        self.optimizer: torch.optim.Optimizer

    def configure(self, setting: Setting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """

        input_space: Box = setting.observation_space["x"]
        task_label_space = setting.observation_space["task_labels"]

        # For now all Settings have `Discrete` (i.e. classification) action spaces.
        action_space: spaces.Discrete = setting.action_space

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.num_actions = action_space.n
        self.num_inputs = np.prod(input_space.shape)

        self.added_tasks = []

        if isinstance(setting, RLSetting):
            # If we're applied to an RL setting:

            # Used these as the default hparams in RL:
            self.hparams = self.hparams or self.HParams(
                learning_rate=2e-4,
                num_steps=200,
                gamma=0.99,
                hidden_size=256,
                batch_size=None,
            )
            assert self.hparams
            self.train_steps_per_task = setting.steps_per_task

            # We want a batch_size of None, i.e. only one observation at a time.
            setting.batch_size = None

            self.num_steps = self.hparams.num_steps
            # Otherwise, we can train basically as long as we want on each task.
            self.loss_function = {
                "gamma": self.hparams.gamma,
            }

            x_space = setting.observation_space.x
            if is_image(setting.observation_space.x):
                # Observing pixel input.
                self.arch = "conv"
            else:
                # Observing state input (e.g. the 4 floats in cartpole rather than images)
                self.arch = "mlp"
            self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)

        else:
            # If we're applied to a Supervised Learning setting:
            # Used these as the default hparams in SL:
            self.hparams = self.hparams or self.HParams(
                learning_rate=0.0001,
                batch_size=32,
            )
            if self.hparams.batch_size is None:
                self.hparams.batch_size = 32

            # Set the batch size on the setting.
            setting.batch_size = self.hparams.batch_size
            # For now all Settings on the supervised side of the tree have images as
            # inputs, so the observation spaces are of type `Image` (same as Box, but with
            # additional `h`, `w`, `c` and `b` attributes).
            assert isinstance(input_space, Image)
            assert (
                setting.increment == setting.test_increment
            ), "Assuming same number of classes per task for training and testing."
            # TODO: (@lebrice): Temporarily 'fixing' this by making it so each output
            # head has as many outputs as there are classes in total, which might make
            # no sense, but currently works.
            # It would be better to refactor this so that each output head can have only
            # as many outputs as is required, and then reshape / offset the predictions.
            n_outputs = setting.increment
            n_outputs = setting.action_space.n
            self.layer_size = [self.num_inputs, 256, n_outputs]
            self.model = PnnClassifier(
                n_layers=len(self.layer_size) - 1,
            )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting."""
        # This method gets called if task boundaries are known in the current
        # setting. Furthermore, if task labels are available, task_id will be
        # the index of the new task. If not, task_id will be None.
        # For example, you could do something like this:
        # self.model.current_task = task_id
        # This freezes all columns except the one for the next task.. but there might
        # not yet be a column for the new task!
        self.model.freeze_columns(skip=[task_id])
        if task_id not in self.added_tasks:
            if isinstance(self.model, PnnA2CAgent):
                self.model.new_task(
                    device=self.device,
                    num_inputs=self.num_inputs,
                    num_actions=self.num_actions,
                )
            else:
                self.model.new_task(device=self.device, sizes=self.layer_size)

            self.added_tasks.append(task_id)

        self.task_id = task_id

    def set_optimizer(self):
        self.optimizer = torch.optim.Adam(
            self.model.parameters(self.task_id),
            lr=self.hparams.learning_rate,
        )

    def get_actions(self, observations: Observations, action_space: spaces.Space) -> Actions:
        """Get a batch of predictions (aka actions) for the given observations."""

        observations = observations.to(self.device)
        with torch.no_grad():
            if isinstance(self.model, PnnA2CAgent):
                predictions = self.model(observations)
                _, logit = predictions
                # get the predicted action:
                action = torch.argmax(logit).item()
            else:
                logits = self.model(observations)
                # Get the predicted classes
                y_pred = logits.argmax(dim=-1)
                action = y_pred

        assert action in action_space, (action, action_space)
        return action

    def fit(self, train_env: Environment, valid_env: Environment):
        """Train and validate this method using the "environments" for the current task.

        NOTE: `train_env` and `valid_env` are both `gym.Env`s as well as `DataLoader`s.
        This means that if you want to write a "regular" SL training loop, you totally
        can, and if you want to write you RL-style training loop, you can also do that.
        """
        if isinstance(train_env.unwrapped, PassiveEnvironment):
            self.fit_sl(train_env, valid_env)
        else:
            self.fit_rl(train_env, valid_env)

    def fit_rl(self, train_env: gym.Env, valid_env: gym.Env):
        """Training loop for Reinforcement Learning (a.k.a. "active") environment."""
        """
        base on https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f
        """
        if self.model is None:
            self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)
        assert isinstance(self.model, PnnA2CAgent)

        self.set_optimizer()
        assert self.hparams
        # self.model.float()

        all_lengths = []
        average_lengths = []
        all_rewards = []
        entropy_term = 0

        for episode in range(self.train_steps_per_task):
            values = []
            rewards = []
            log_probs = []

            state = train_env.reset()
            for steps in range(self.num_steps):
                value, policy_dist = self.model(state)

                value = value.item()
                dist = policy_dist.detach().numpy()

                action = np.random.choice(self.num_actions, p=np.squeeze(dist))
                log_prob = torch.log(policy_dist.squeeze(0)[action])
                entropy = -np.sum(np.mean(dist) * np.log(dist))
                new_state, reward, done, _ = train_env.step(action)

                rewards.append(reward.y)
                values.append(value)
                log_probs.append(log_prob)
                entropy_term += entropy
                state = new_state

                if done or steps == self.num_steps - 1:
                    Qval, _ = self.model(state)
                    Qval = Qval.item()
                    all_rewards.append(np.sum(rewards))
                    all_lengths.append(steps)
                    average_lengths.append(np.mean(all_lengths[-10:]))

                    if episode % 10 == 0:
                        print(
                            f"episode: {episode}, "
                            f"reward: {np.sum(rewards)}, "
                            f"total length: {steps}, "
                            f"average length: {average_lengths[-1]}"
                        )
                    break

            Qvals = np.zeros_like(values)
            for t in reversed(range(len(rewards))):
                Qval = rewards[t] + self.hparams.gamma * Qval
                Qvals[t] = Qval

            # update actor critic
            values_tensor = torch.as_tensor(values, dtype=torch.float)
            Qvals = torch.as_tensor(Qvals, dtype=torch.float)
            log_probs_tensor = torch.stack(log_probs)

            advantage = Qvals - values_tensor
            actor_loss = (-log_probs_tensor * advantage).mean()
            critic_loss = 0.5 * advantage.pow(2).mean()
            ac_loss = actor_loss + critic_loss + 0.001 * entropy_term

            self.optimizer.zero_grad()
            ac_loss.backward()
            self.optimizer.step()

    def fit_sl(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
        """Train on a Supervised Learning (a.k.a. "passive") environment."""
        observations: TaskIncrementalSLSetting.Observations = train_env.reset()
        cuda_observations = observations.to(self.device)
        assert isinstance(self.model, PnnClassifier)
        assert self.hparams

        self.set_optimizer()

        best_val_loss = inf
        best_epoch = 0
        for epoch in range(self.hparams.max_epochs_per_task):
            self.model.train()
            print(f"Starting epoch {epoch}")
            # Training loop:
            with torch.set_grad_enabled(True), tqdm.tqdm(train_env) as train_pbar:
                postfix: Dict[str, Any] = {}
                train_pbar.set_description(f"Training Epoch {epoch}")
                for i, batch in enumerate(train_pbar):
                    loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=train_env,
                    )
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    postfix.update(metrics_dict)
                    train_pbar.set_postfix(postfix)

            # Validation loop:
            self.model.eval()
            with torch.set_grad_enabled(False), tqdm.tqdm(valid_env) as val_pbar:
                postfix = {}
                val_pbar.set_description(f"Validation Epoch {epoch}")
                epoch_val_loss = 0.0

                for i, batch in enumerate(val_pbar):
                    batch_val_loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=valid_env,
                    )
                    epoch_val_loss += batch_val_loss
                    postfix.update(metrics_dict, val_loss=epoch_val_loss)
                    val_pbar.set_postfix(postfix)

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser) -> None:
        parser.add_arguments(cls.HParams, dest="hparams", default=None)

    @classmethod
    def from_argparse_args(cls, args: Namespace) -> "PnnMethod":
        hparams: PnnMethod.HParams = args.hparams
        method = cls(hparams=hparams)
        return method


def main_rl():
    """Applies the PnnMethod in a RL Setting."""
    parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)

    Config.add_argparse_args(parser, dest="config")
    PnnMethod.add_argparse_args(parser, dest="method")

    setting = TaskIncrementalRLSetting(
        dataset="cartpole",
        nb_tasks=2,
        train_task_schedule={
            0: {"gravity": 10, "length": 0.3},
            1000: {"gravity": 10, "length": 0.5},
        },
    )

    args = parser.parse_args()

    config: Config = Config.from_argparse_args(args, dest="config")
    method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")
    method.config = config

    # 2. Creating the Method
    # method = ImproveMethod()

    # 3. Applying the method to the setting:
    results = setting.apply(method, config=config)

    print(results.summary())
    print(f"objective: {results.objective}")
    return results


def main_sl():
    """Applies the PnnMethod in a SL Setting."""
    parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)

    # Add arguments for the Setting
    # TODO: PNN is coded for the DomainIncrementalSetting, where the action space
    # is the same for each task.
    # parser.add_arguments(DomainIncrementalSetting, dest="setting")
    parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
    # TaskIncrementalSLSetting.add_argparse_args(parser, dest="setting")
    Config.add_argparse_args(parser, dest="config")

    # Add arguments for the Method:
    PnnMethod.add_argparse_args(parser, dest="method")

    args = parser.parse_args()

    # setting: TaskIncrementalSLSetting = args.setting
    setting: TaskIncrementalSLSetting = TaskIncrementalSLSetting.from_argparse_args(
        # setting: DomainIncrementalSetting = DomainIncrementalSetting.from_argparse_args(
        args,
        dest="setting",
    )
    config: Config = Config.from_argparse_args(args, dest="config")

    method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")

    method.config = config

    results = setting.apply(method, config=config)
    print(results.summary())
    return results


if __name__ == "__main__":
    # Run RL Setting
    main_sl()
    # Run SL Setting
    # main_rl()


================================================
FILE: examples/advanced/procgen_example.py
================================================
""" Example of how to create an incremental RL Setting with custom environments for each task.

In this example, we create environments using [the `procgen` package](https://github.com/openai/procgen).
"""

import dataclasses
from dataclasses import dataclass, replace
from typing import Dict, List, NamedTuple, Optional, Type, TypeVar

import gym
import numpy as np

from sequoia.settings.rl import (
    IncrementalRLSetting,
    MultiTaskRLSetting,
    TaskIncrementalRLSetting,
    TraditionalRLSetting,
)


@dataclass
class ProcGenConfig:
    """Options for creating an environment from ProcGen.

    The fields on this dataclass match the arguments that can be passed to `gym.make`, based on the
    README of the procgen repo.
    """

    # Name of environment, or comma-separate list of environment names to instantiate as each env
    # in the VecEnv.
    env_name: str = "coinrun-v0"
    # The number of unique levels that can be generated. Set to 0 to use unlimited levels.
    num_levels: int = 0
    # The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully
    # specify the set of possible levels.
    start_level: int = 0
    # Paint player velocity info in the top left corner. Only supported by certain games.
    paint_vel_info: bool = False
    # Use randomly generated assets in place of human designed assets.
    use_generated_assets: bool = False
    # Set to True to use the debug build if building from source.
    debug: bool = False
    # Useful flag that's passed through to procgen envs. Use however you want during debugging.
    debug_mode: int = 0
    # Determines whether observations are centered on the agent or display the full level.
    # Override at your own risk.
    center_agent: bool = True
    # When you reach the end of a level, the episode is ended and a new level is selected.
    # If use_sequential_levels is set to True, reaching the end of a level does not end the episode,
    # and the seed for the new level is derived from the current level seed.
    # If you combine this with start_level=<some seed> and num_levels=1, you can have a single
    # linear series of levels similar to a gym-retro or ALE game.
    use_sequential_levels: bool = False
    # What variant of the levels to use, the options are "easy", "hard", "extreme", "memory",
    # "exploration". All games support "easy" and "hard", while other options are game-specific.
    # The default is "hard". Switching to "easy" will reduce the number of timesteps required to
    # solve each game and is useful for testing or when working with limited compute resources.
    distribution_mode: str = "hard"
    # Normally games use human designed backgrounds, if this flag is set to False, games will use
    # pure black backgrounds.
    use_backgrounds: bool = True
    # Some games select assets from multiple themes, if this flag is set to True, those games will
    # only use a single theme.
    restrict_themes: bool = False
    # If set to True, games will use monochromatic rectangles instead of human designed assets.
    # Best used with restrict_themes=True.
    use_monochrome_assets: bool = False

    def make_env(self) -> gym.Env:
        """Creates the environment using these options."""
        env_id = f"procgen:procgen-{self.env_name}"
        # Create the env by passing the arguments to gym.make, same as what is done in the README of
        # the procgen repo.
        procgen_env = gym.make(
            id=env_id,
            num_levels=self.num_levels,
            start_level=self.start_level,
            paint_vel_info=self.paint_vel_info,
            use_generated_assets=self.use_generated_assets,
            debug=self.debug,
            center_agent=self.center_agent,
            use_sequential_levels=self.use_sequential_levels,
            distribution_mode=self.distribution_mode,
            use_backgrounds=self.use_backgrounds,
            restrict_themes=self.restrict_themes,
            use_monochrome_assets=self.use_monochrome_assets,
        )
        # NOTE: The environments that are created with `gym.make("procgen:procgen-...")` are
        # instances of the `gym3.interop:ToGymEnv` class, which has a slightly different API than
        # the `gym.Env` class:
        # (Taken From gym3/interop.py:)
        # > - The `render()` method does nothing in "human" mode, in "rgb_array" mode the info dict
        #     is checked for a key named "rgb" and info["rgb"][0] is returned if present
        # > - `seed()` and `close() are ignored since gym3 environments do not require these methods
        #
        # Therefore, for now, since in Sequoia we assume that the envs fit the gym.Env API, we have to
        # "patch" these different methods up a bit. Here I suggest we do this using a wrapper
        # (defined below)
        wrapped_env = SequoiaProcGenAdapterWrapper(env=procgen_env)
        return wrapped_env


class SequoiaProcGenAdapterWrapper(gym.Wrapper):
    """A wrapper around an environment from ProcGen to patch up the methods/properties that differ
    from the gym API:

    - The `seed` method doesn't ahve the right number of arguments.
    - The `done` value is of type `np.bool_` instead of a plain bool.
    - `render` returns None.
    """

    def __init__(self, env):
        super().__init__(env=env)

    def step(self, action):
        obs, rewards, done, info = self.env.step(action)
        if isinstance(done, np.bool_):
            done = bool(done)
        return obs, rewards, done, info

    def seed(self, seed: Optional[int] = None) -> List[int]:
        # The procgen env apparently doesn't have (or need?) a `seed` method, but they don't
        # implement it corrently, by not accepting a `seed` argument!
        return []

    def render(self, mode: str = "rgb_array"):
        # note: rendering doesn't seem to be working: `self.env.render("rgb_array")` returns None.
        array: Optional[np.ndarray] = self.env.render("rgb_array")
        return array


# Type variable for a type of setting that supports passing envs for each task (all settings below
# `InrementalRLSetting`).
SettingType = TypeVar("SettingType", bound=IncrementalRLSetting)

available_envs = [
    "bigfish",
    "bossfight",
    "caveflyer",
    "chaser",
    "climber",
    "coinrun",
    "dodgeball",
    "fruitbot",
    "heist",
    "jumper",
    "leaper",
    "maze",
    "miner",
    "ninja",
    "plunder",
    "starpilot",
]


def make_procgen_setting(
    env_name: str,
    nb_tasks: int,
    num_levels_per_task: int = 1,
    overlapping_levels_between_tasks: int = 0,
    common_options: ProcGenConfig = None,
    setting_type: Type[SettingType] = TaskIncrementalRLSetting,
) -> SettingType:
    """Creates an RL Setting that uses environments from procgen.

    Parameters
    ----------
    env_name : str
        Name of the environment from procgen to use. Should include the version tag.
        For example: "coinrun-v0".
    nb_tasks : int
        Number of tasks in the setting.
    num_levels_per_task : int, optional
        Number of generated levels per task, by default 1
    overlapping_levels_between_tasks : int, optional
        Number of levels in common between neighbouring tasks. Needs to be less than
        `num_levels_per_task`. Defaults to 0, in which case all tasks distinct levels.
    common_options : ProcGenConfig, optional
        Set of options common to the envs of all the tasks. This can be used to set the starting
        level, for example. Defaults to None, in which case the default options from `ProcGenConfig`
        are used.
    setting_type : Type[SettingType], optional
        The type of setting to create, by default TaskIncrementalRLSetting.

    For example, say `nb_tasks`=5, `num_levels_per_task`=2, `overlapping_levels_between_tasks`=1:

    task #1: levels: [0, 1]
    task #2: levels: [1, 2]
    task #3: levels: [2, 3]
    task #4: levels: [3, 4]
    task #5: levels: [4, 5]

    For example, say `nb_tasks`=5, `num_levels_per_task`=5, `overlapping_levels_between_tasks`=2:
    task #1: levels: [0, 1, 2, 3, 4]
    task #2: levels: [3, 4, 5, 6, 7]
    task #3: levels: [6, 7, 8, 9, 10]
    task #4: levels: [9, 10, 11, 12, 13]
    task #5: levels: [12, 13, 14, 15, 16]

    NOTE: (lebrice): Maybe this (and other benchmark-creating functions) could be classmethods on
    the settings, instead of passing the setting_type as a parameter!

    Returns
    -------
    SettingType
        A Setting of type `setting_type` (`TaskIncrementalRLSetting`) by default, where each task
        uses environments from ProcGen.
    """
    assert overlapping_levels_between_tasks < num_levels_per_task

    # Create the options common to every task.
    if common_options is None:
        common_options = ProcGenConfig(env_name=env_name)
    else:
        common_options = dataclasses.replace(common_options, env_name=env_name)

    # Get the starting levels for each task, as shown in the docstring above.
    offset = num_levels_per_task - overlapping_levels_between_tasks
    first_task_start_level = common_options.start_level
    last_task_start_level = common_options.start_level + offset * nb_tasks
    start_levels: List[int] = list(range(first_task_start_level, last_task_start_level, offset))

    # Create the configurations that will be used to create the train/valid/test environments for
    # each task by starting from the common options, and overwriting the values of `start_level`.
    train_env_configs: List[ProcGenConfig] = [
        replace(common_options, start_level=start_levels[task_id], num_levels=num_levels_per_task)
        for task_id in range(nb_tasks)
    ]
    # NOTE: For now the validation and testing environment are the same as those for training.
    # This could easily be different though!
    # For example:
    # - the test environments could have a background while the train/valid envs don't!
    #   --> This could be super interesting to researchers in Out-of-Distribution RL!
    valid_env_configs: List[ProcGenConfig] = train_env_configs.copy()
    test_env_configs: List[ProcGenConfig] = train_env_configs.copy()

    # Here we pass a list of functions to be called to create each env. This can be a bit better
    # than passing the envs themselves, as it saves some memory, and also because we'll be able to
    # close the envs after each task (since we can always re-create them).
    setting = setting_type(
        dataset=None,
        train_envs=[config.make_env for config in train_env_configs],
        val_envs=[config.make_env for config in valid_env_configs],
        test_envs=[config.make_env for config in test_env_configs],
    )
    return setting


from sequoia.common.config import Config
from sequoia.methods.random_baseline import RandomBaselineMethod


def main_simple():
    # Simple example: Create a Task-Incremental RL setting using procgen envs.
    setting = make_procgen_setting(env_name="coinrun-v0", nb_tasks=5)
    method = RandomBaselineMethod()
    # NOTE: The `render` option isn't yet working (see above)
    results = setting.apply(method, config=Config(debug=True, render=False))
    print(results.summary())


def main_using_other_setting():
    # Example where we change what kind of setting we want to create.
    class Key(NamedTuple):
        stationary_context: bool
        task_labels_at_test_time: bool

    # This is here just to give an idea of the differences between these settings.
    available_settings: Dict[Key, Type[IncrementalRLSetting]] = {
        Key(task_labels_at_test_time=False, stationary_context=False): IncrementalRLSetting,
        Key(task_labels_at_test_time=True, stationary_context=False): TaskIncrementalRLSetting,
        Key(task_labels_at_test_time=False, stationary_context=True): TraditionalRLSetting,
        Key(task_labels_at_test_time=True, stationary_context=True): MultiTaskRLSetting,
    }

    # You can choose whichever setting you want, but for example:
    setting_type = available_settings[Key(task_labels_at_test_time=True, stationary_context=False)]
    # Create the Method.
    method = RandomBaselineMethod()

    setting = make_procgen_setting(env_name="coinrun-v0", nb_tasks=5, setting_type=setting_type)
    results = setting.apply(method, config=Config(debug=True, render=False))
    print(results.summary())


if __name__ == "__main__":
    main_simple()


================================================
FILE: examples/basic/__init__.py
================================================


================================================
FILE: examples/basic/base_method_demo.py
================================================
""" Example showing how the BaseMethod can be applied to get results in both
RL and SL settings.
"""

from simple_parsing import ArgumentParser

from sequoia.common import Config
from sequoia.methods import BaseMethod
from sequoia.settings import Setting, TaskIncrementalRLSetting, TaskIncrementalSLSetting


def baseline_demo_simple():
    config = Config()
    method = BaseMethod(config=config, max_epochs=1)

    ## Create *any* Setting from the tree, for example:
    # Supervised Learning Setting:
    setting = TaskIncrementalSLSetting(
        dataset="cifar10",
        nb_tasks=2,
    )
    ## Reinforcement Learning Setting:
    # setting = TaskIncrementalRLSetting(
    #     dataset="cartpole",
    #     train_max_steps=4000,
    #     nb_tasks=2,
    # )
    results = setting.apply(method, config=config)
    print(results.summary())
    return results


def baseline_demo_command_line():
    parser = ArgumentParser(__doc__, add_dest_to_option_strings=False)

    # Supervised Learning Setting:
    parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
    # Reinforcement Learning Setting:
    # parser.add_arguments(TaskIncrementalRLSetting, dest="setting")

    parser.add_arguments(Config, dest="config")
    BaseMethod.add_argparse_args(parser, dest="method")

    args = parser.parse_args()

    setting: Setting = args.setting
    config: Config = args.config
    method: BaseMethod = BaseMethod.from_argparse_args(args, dest="method")

    results = setting.apply(method, config=config)
    print(results.summary())
    return results


if __name__ == "__main__":
    ### Option 1: Create the BaseMethod and Settings manually.
    baseline_demo_simple()

    ### Option 2: Create the BaseMethod and Settings from the command-line.
    # baseline_demo_command_line()


================================================
FILE: examples/basic/pl_example.py
================================================
"""A simple example for creating a Method using PyTorch-Lightning.

Run this as:

```console
$> python examples/basic/pl_examples.py
```
"""
from dataclasses import asdict, dataclass
from typing import Optional, Tuple

import torch
from gym import spaces
from pytorch_lightning import LightningModule, Trainer
from torch import Tensor, nn
from torch.optim import Adam

from sequoia.common.config import Config
from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings.assumptions.task_type import ClassificationActions
from sequoia.settings.sl.continual import (
    Actions,
    ContinualSLSetting,
    Observations,
    ObservationSpace,
    Rewards,
)


class Model(LightningModule):
    """Example Pytorch Lightning model used for continual image classification.

    Used by the `ExampleMethod` below.
    """

    @dataclass
    class HParams:
        """Hyper-parameters of our model.

        NOTE: dataclasses are totally optional. This is just much nicer than dicts or
        ugly namespaces.
        """

        # Learning rate.
        learning_rate: float = 1e-3
        # Maximum number of training epochs per task.
        max_epochs_per_task: int = 1

    def __init__(
        self,
        input_space: ObservationSpace,
        output_space: spaces.Discrete,
        hparams: HParams = None,
    ):
        super().__init__()
        hparams = hparams or self.HParams()
        # NOTE: `input_space` is a subclass of `gym.spaces.Dict`. It contains (at least)
        # the `x` key, but can also contain other things, for example the task labels.
        # Doing things this way makes sure that this Model can also be applied to any
        # more specific Setting in the future (any setting with more information given)!
        image_space: Image = input_space.x
        # NOTE: `Image` is just a subclass of `gym.spaces.Box` with a few extra properties

        self.input_dims = image_space.shape
        # NOTE: Can't set the `hparams` attribute in PL, so use hp instead:
        self.hp = hparams
        self.save_hyperparameters({"hparams": asdict(hparams)})
        in_channels: int = image_space.channels
        num_classes: int = output_space.n

        # Imitates the SimpleConvNet from  sequoia.common.models.simple_convnet
        self.features = nn.Sequential(
            nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(6),
            nn.ReLU(inplace=True),
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.AdaptiveAvgPool2d(output_size=(8, 8)),  # [16, 8, 8]
            # [32, 6, 6]
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            # [32, 4, 4]
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.Flatten(),
        )
        # Quick tip: In this case we have a fixed hidden size (thanks to the Adaptive
        # pooling layer above), but you could also use the cool new `nn.LazyLinear` when
        # you don't know the hidden size in advance!
        self.fc = nn.Sequential(
            nn.Flatten(),
            # nn.LazyLinear(out_features=120),
            nn.Linear(512, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, num_classes),
        )
        self.loss = nn.CrossEntropyLoss()
        self.trainer: Trainer

    def forward(self, observations: ContinualSLSetting.Observations) -> Tensor:
        """Returns the logits for the given observation.

        Parameters
        ----------
        observations : ContinualSLSetting.Observations
            dataclass with (at least) the following attributes:
            - "x" (Tensor): the samples (images)
            - "task_labels" (Optional[Tensor]): Task labels, when applicable.

        Returns
        -------
        Tensor
            Classification logits for each class.
        """
        x: Tensor = observations.x
        # Task labels for each sample. We don't use them in this example.
        t: Optional[Tensor] = observations.task_labels
        h_x = self.features(x)
        logits = self.fc(h_x)
        return logits

    def training_step(
        self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int
    ) -> Tensor:
        return self.shared_step(batch=batch, batch_idx=batch_idx, stage="train")

    def validation_step(
        self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int
    ) -> Tensor:
        return self.shared_step(batch=batch, batch_idx=batch_idx, stage="val")

    def test_step(self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int) -> Tensor:
        return self.shared_step(batch=batch, batch_idx=batch_idx, stage="test")

    def shared_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        stage: str,
    ) -> Tensor:
        observations, rewards = batch

        logits = self(observations)
        y_pred = logits.argmax(-1)
        actions = ClassificationActions(y_pred=y_pred, logits=logits)

        if rewards is None:
            environment: ContinualSLSetting.Environment
            # The rewards (image labels) might not be given at the same time as the
            # observations (images), for example during testing, or if we're being
            # evaluated based on our online performance during training!
            #
            # When that is the case, we need to send the "action" (predictions) to the
            # environment using `send()` to get the rewards.
            actions = y_pred
            # Get the current environment / dataloader from the Trainer.
            environment = self.trainer.request_dataloader(self, stage)
            rewards = environment.send(actions)
        y: Tensor = rewards.y

        accuracy = (y_pred == y).int().sum() / len(y)
        self.log(f"{stage}/accuracy", accuracy, prog_bar=True)

        loss = self.loss(logits, y)
        return loss

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=self.hp.learning_rate)


class ExampleMethod(Method, target_setting=ContinualSLSetting):
    """Example method for solving Continual SL Settings with PyTorch-Lightning

    This ExampleMethod declares that it can be applied to any `Setting` that inherits
    from this `ContinualSLSetting`.

    NOTE: Settings in Sequoia are a subclass of `LightningDataModule`, which create
    the training/validation/testing `Environment`s that methods will interact with.
    Each setting defines an `apply` method, which serves as a "main loop", and describes
    when and on what data to train the Method, and how it will be evaluated, according
    to the usual methodology for that setting in the litterature.

    Importantly, settings do NOT describe **how** the method is to be trained, that is
    entirely up to the Method!
    """

    def __init__(self, hparams: Model.HParams = None):
        super().__init__()
        self.hparams = hparams or Model.HParams()
        self.current_task: Optional[int] = None
        # NOTE: These get assigned in `configure` below:
        self.model: Model
        self.trainer: Trainer

    def configure(self, setting: ContinualSLSetting):
        """Called by the Setting so the method can configure itself before training.

        This could be used to, for example, create a model, since the observation space
        (which describes the types and shapes of the data) and the `nb_tasks` can be
        read from the Setting.

        Parameters
        ----------
        setting : ContinualSLSetting
            The research setting that this `Method` will be applied to.
        """
        if not setting.known_task_boundaries_at_train_time:
            # If we're being applied on a Setting where we don't have access to task
            # boundaries, then there is only one training environment that transitions
            # between all tasks and then closes itself.
            # We therefore limit the number of epochs per task to 1 in that case.
            self.hparams.max_epochs_per_task = 1
        self.model = Model(
            input_space=setting.observation_space,
            output_space=setting.action_space,
            hparams=self.hparams,
        )

    def fit(
        self,
        train_env: ContinualSLSetting.Environment,
        valid_env: ContinualSLSetting.Environment,
    ):
        """Called by the Setting to allow the method to train.

        The passed environments inherit from `DataLoader` as well as from `gym.Env`.
        They produce `Observations` (which have an `x` Tensor field, for instance), and
        return `Rewards` when they receive `Actions`.
        This interface is the same between RL and SL, making it easy to create methods
        that can adapt to both domains.

        Parameters
        ----------
        train_env : ContinualSLSetting.Environment
            The Training environment. In the case of a `ContinualSLSetting`, this
            environment will smoothly transition between the different tasks.
            NOTE: Regardless of what exact type of `Setting` this method is being
            applied to, this environment will always be a subclass of
            `ContinualSLSetting.Environment`, and the `Observations`, `Actions`,
            `Rewards` produced by this environment will also always follow this
            hierarchy.
            This is important to note, since it makes it possible to create a Method
            that also works in other settings which add extra information in the
            observations (e.g. task labels)!

        valid_env : ContinualSLSetting.Environment
            The Validation environment.
        """
        # NOTE: Currently have to 'reset' the Trainer for each call to `fit`.
        self.trainer = Trainer(
            gpus=torch.cuda.device_count(),
            max_epochs=self.hparams.max_epochs_per_task,
        )
        self.trainer.fit(self.model, train_dataloader=train_env, val_dataloaders=valid_env)

    def test(self, test_env: ContinualSLSetting.Environment):
        """Called to let the Method handle the test loop by itself.

        The `test_env` will only give back rewards (y) once an action (y_pred) is sent
        to it via its `send` method.

        This test environment keeps track of some metrics of interest for its `Setting`
        (accuracy in this case) and reports them back to the `Setting` once the test
        environment has been exhausted.

        NOTE: The test environment will close itself when done, signifying the end
        of the test period. At that point, `test_env.is_closed()` will return `True`.
        """
        # BUG: There is currently a bug with the test loop with Trainer: on_task_switch
        # doesn't get called properly.
        raise NotImplementedError
        # Use ckpt_path=None to use the current weights, rather than the "best" ones.
        self.trainer.test(self.model, ckpt_path=None, test_dataloaders=test_env)

    def get_actions(self, observations: Observations, action_space: spaces.MultiDiscrete):
        """Called by the Setting to query for individual predictions.

        You currently have to implement this, but if `test` is implemented, it will be
        used instead. Sorry if this isn't super clear.
        """
        self.model.eval()
        with torch.no_grad():
            logits = self.model(observations.to(self.model.device))
            y_pred = logits.argmax(-1)
        return Actions(y_pred=y_pred)

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Can be called by the Setting when a task boundary is reached.

        This will be called if `setting.known_task_boundaries_at_[train/test]_time` is
        True, depending on if this is called during training or during testing.

        If `setting.task_labels_at_[train/test]_time` is True, then `task_id` will be
        the identifyer (index) of the next task. If the value is False, then `task_id`
        will be None.
        """
        if task_id != self.current_task:
            phase = "training" if self.training else "testing"
            print(f"Switching tasks during {phase}: {self.current_task} -> {task_id}")
            self.current_task = task_id


def main():
    """Runs the example: applies the method on a Continual Supervised Learning Setting."""
    # You could use any of the settings in SL, since this example methods targets the
    # most general Continual SL Setting in Sequoia: `ContinualSLSetting`:
    # from sequoia.settings.sl import ClassIncrementalSetting

    # Create the Setting:
    # NOTE: Since our model above uses an adaptive pooling layer, it should work on any
    # dataset!
    setting = ContinualSLSetting(dataset="mnist", monitor_training_performance=True)

    # Create the Method:
    method = ExampleMethod()

    # Create a config for the experiment (just so we can set a few options for this
    # example)
    config = Config(debug=True, log_dir="results/pl_example")

    # Launch the experiment: trains and tests the method according to the chosen
    # setting and returns a Results object.
    results = setting.apply(method, config=config)

    # Print the results, and show some plots!
    print(results.summary())
    for figure_name, figure in results.make_plots().items():
        print("Figure:", figure_name)
        figure.show()
        # figure.waitforbuttonpress(10)


if __name__ == "__main__":
    main()


================================================
FILE: examples/basic/pl_example_packnet.py
================================================
from dataclasses import dataclass
from typing import Optional

import torch
from simple_parsing import mutable_field

from examples.basic.pl_example import ExampleMethod, Model
from sequoia.common import Config
from sequoia.methods import BaseModel
from sequoia.methods.packnet_method import PackNet
from sequoia.methods.trainer import Trainer, TrainerConfig
from sequoia.settings.sl import ContinualSLSetting, TaskIncrementalSLSetting


class ExamplePackNetMethod(ExampleMethod, target_setting=TaskIncrementalSLSetting):
    def __init__(self, hparams: Model.HParams = None, packnet_hparams: PackNet.HParams = None):
        super().__init__(hparams=hparams)
        self.packnet_hparams = packnet_hparams or PackNet.HParams()
        # TODO: Modify `hparams.max_epochs_per_task` to at least be enough so that
        # PackNet will work.
        min_epochs = self.packnet_hparams.train_epochs + self.packnet_hparams.fine_tune_epochs
        if self.hparams.max_epochs_per_task < min_epochs:
            self.hparams.max_epochs_per_task = min_epochs
        self.p_net: PackNet

    def configure(self, setting: TaskIncrementalSLSetting):
        super().configure(setting)
        # TODO: Why does PackNet need access to the number of tasks again?
        self.p_net = PackNet(
            n_tasks=setting.nb_tasks,
            hparams=self.packnet_hparams,
        )
        # TODO: This could be set as default values in the PackNet constructor.
        self.p_net.current_task = -1
        self.p_net.config_instructions()

    def fit(
        self,
        train_env: TaskIncrementalSLSetting.Environment,
        valid_env: TaskIncrementalSLSetting.Environment,
    ):
        # NOTE: PackNet is not compatible with EarlyStopping, thus we set max_epochs==min_epochs
        self.trainer = Trainer(
            gpus=torch.cuda.device_count(),
            min_epochs=self.p_net.total_epochs(),
            max_epochs=self.p_net.total_epochs(),
            callbacks=[self.p_net],
        )

        self.trainer.fit(self.model, train_dataloader=train_env, val_dataloaders=valid_env)

    def on_task_switch(self, task_id: Optional[int]):
        """Called when switching between tasks.

        Args:
            task_id (int, optional): the id of the new task. When None, we are
            basically being informed that there is a task boundary, but without
            knowing what task we're switching to.
        """
        super().on_task_switch(task_id=task_id)
        if task_id is not None and len(self.p_net.masks) > task_id:
            self.p_net.load_final_state(model=self.model)
            self.p_net.apply_eval_mask(task_idx=task_id, model=self.model)
        self.p_net.current_task = task_id


def main():
    """Runs the example: applies the method on a Continual Supervised Learning Setting."""
    # You could use any of the settings in SL, since this example methods targets the
    # most general Continual SL Setting in Sequoia: `ContinualSLSetting`:
    # from sequoia.settings.sl import ClassIncrementalSetting

    # Create the Setting:
    # NOTE: Since our model above uses an adaptive pooling layer, it should work on any
    # dataset!
    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )

    # Create the Method:
    method = ExamplePackNetMethod()

    # Create a config for the experiment (just so we can set a few options for this
    # example)
    config = Config(debug=False, log_dir="results/pl_example_packnet")

    # Launch the experiment: trains and tests the method according to the chosen
    # setting and returns a Results object.
    results = setting.apply(method, config=config)

    # Print the results, and show some plots!
    print(results.summary())
    for figure_name, figure in results.make_plots().items():
        print("Figure:", figure_name)
        figure.show()
        # figure.waitforbuttonpress(10)


if __name__ == "__main__":
    main()


================================================
FILE: examples/basic/pl_example_test.py
================================================
""" Unit-tests for the PyTorch-Lightning Example.

Can be run like so:
```console
$ pytest examples/basic/pl_example_test.py
```
"""
from typing import Type

import pytest

from examples.basic.pl_example import ExampleMethod, Model
from sequoia.common.config import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.methods import Method
from sequoia.methods.method_test import MethodTests, config, session_config  # type: ignore
from sequoia.settings import Results
from sequoia.settings.sl import ContinualSLSetting, IncrementalSLSetting


class TestPLExample(MethodTests):
    """Tests for this PL Example.

    This `MethodTests` base class generates a `test_debug` test for us.
    """

    Method: Type[Method] = ExampleMethod

    @pytest.fixture()
    def method(self, config: Config):
        """Required fixture, which creates a Method that can be used for quick tests."""
        return ExampleMethod(hparams=Model.HParams(max_epochs_per_task=1))

    def validate_results(
        self, setting: ContinualSLSetting, method: ExampleMethod, results: Results
    ):
        """This gets called by `test_debug` to check that the results make sense for
        the given setting and method.

        """
        # NOTE: This particular example isn't that great: We just check that the average
        # final test accuracy and the average online accuracy are both non-zero.
        # It would be best to do some kind of branching depending on what type of
        # Setting was used, since each setting can produce different types of results.
        print(results.summary())

        average_metrics: ClassificationMetrics
        online_metrics: ClassificationMetrics

        assert setting.monitor_training_performance

        todo = 0.0
        if isinstance(setting, IncrementalSLSetting):
            # The results in this case include the entire nb_tasks x nb_tasks transfer
            # matrix.
            assert isinstance(results, IncrementalSLSetting.Results)
            average_metrics = results.average_final_performance
            online_metrics = results.average_online_performance

            if setting.stationary_context:
                # Example: Should expect better performance if the data is i.i.d!
                assert average_metrics.accuracy > todo
            else:
                assert average_metrics.accuracy > todo

            if setting.monitor_training_performance:
                assert online_metrics.accuracy > todo
        else:
            # In this case, there aren't clear 'tasks' to speak of, so the results are
            # just aggregated metrics for each test batch:
            assert isinstance(results, ContinualSLSetting.Results)
            average_metrics = results.average_metrics
            online_metrics = results.online_performance_metrics

            assert average_metrics.accuracy > todo
            assert online_metrics.accuracy > todo


================================================
FILE: examples/basic/quick_demo.ipynb
================================================
{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python38364bitpy38conda80a8f432976e4e99926307fddceb6e0b",
   "display_name": "Python 3.8.3 64-bit ('py38': conda)",
   "language": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "# Quick Demo (Notebook version)\n",
    "\n",
    "(I hate notebooks.)\n",
    "\n",
    "In this demo, we will create a simple method and apply it to various Continual Learning settings.\n",
    "\n",
    "For the purposes of this demo, we will restrict ourselves to classification problems on the mnist and fashion-mnist datasets."
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports:\n",
    "import sys\n",
    "from dataclasses import dataclass\n",
    "from typing import Dict, Optional, Tuple, Type\n",
    "\n",
    "import gym\n",
    "import torch\n",
    "from gym import spaces\n",
    "from torch import Tensor, nn\n",
    "from simple_parsing import ArgumentParser\n",
    "\n",
    "sys.path.extend([\".\", \"..\"])\n",
    "from sequoia.settings import Method, Setting\n",
    "from sequoia.settings.sl.class_incremental import ClassIncrementalSetting, DomainIncrementalSetting\n",
    "from sequoia.settings.sl.class_incremental.objects import (\n",
    "    Actions,\n",
    "    Environment,\n",
    "    Observations,\n",
    "    PassiveEnvironment,\n",
    "    Results,\n",
    "    Rewards,\n",
    ")"
   ]
  },
  {
   "source": [
    "# Basic Model:"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "class MyModel(nn.Module):\n",
    "    \"\"\" Simple classification model without any CL-related mechanism.\n",
    "\n",
    "    To keep things simple, this demo model is designed for supervised\n",
    "    (classification) settings where observations have shape [3, 28, 28] (ie the\n",
    "    MNIST variants: Mnist, FashionMnist, RotatedMnist, EMnist, etc.)\n",
    "    \"\"\"\n",
    "    def __init__(self,\n",
    "                 observation_space: gym.Space,\n",
    "                 action_space: gym.Space,\n",
    "                 reward_space: gym.Space):\n",
    "        super().__init__()\n",
    "        image_shape = observation_space["x"].shape\n",
    "        assert image_shape == (3, 28, 28)\n",
    "        assert isinstance(action_space, spaces.Discrete)\n",
    "        assert action_space == reward_space\n",
    "        n_classes = action_space.n\n",
    "        image_channels = image_shape[0]\n",
    "\n",
    "        self.encoder = nn.Sequential(\n",
    "            nn.Conv2d(image_channels, 6, 5),\n",
    "            nn.ReLU(),\n",
    "            nn.MaxPool2d(2),\n",
    "            nn.Conv2d(6, 16, 5),\n",
    "            nn.ReLU(),\n",
    "            nn.MaxPool2d(2),\n",
    "        )\n",
    "        self.classifier = nn.Sequential(\n",
    "            nn.Flatten(),\n",
    "            nn.Linear(256, 120),\n",
    "            nn.ReLU(),\n",
    "            nn.Linear(120, 84),\n",
    "            nn.ReLU(),\n",
    "            nn.Linear(84, n_classes),\n",
    "        )\n",
    "        self.loss = nn.CrossEntropyLoss()\n",
    "\n",
    "    def forward(self, observations: Observations) -> Tensor:\n",
    "        # NOTE: here we don't make use of the task labels.\n",
    "        x = observations.x\n",
    "        task_labels = observations.task_labels\n",
    "        features = self.encoder(x)\n",
    "        logits = self.classifier(features)\n",
    "        return logits\n",
    "\n",
    "    def shared_step(\n",
    "        self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment\n",
    "    ) -> Tuple[Tensor, Dict]:\n",
    "        \"\"\"Shared step used for both training and validation.\n",
    "                \n",
    "        Parameters\n",
    "        ----------\n",
    "        batch : Tuple[Observations, Optional[Rewards]]\n",
    "            Batch containing Observations, and optional Rewards. When the Rewards are\n",
    "            None, it means that we'll need to provide the Environment with actions\n",
    "            before we can get the Rewards (e.g. image labels) back.\n",
    "            \n",
    "            This happens for example when being applied in a Setting which cares about\n",
    "            sample efficiency or training performance, for example.\n",
    "            \n",
    "        environment : Environment\n",
    "            The environment we're currently interacting with. Used to provide the\n",
    "            rewards when they aren't already part of the batch (as mentioned above).\n",
    "\n",
    "        Returns\n",
    "        -------\n",
    "        Tuple[Tensor, Dict]\n",
    "            The Loss tensor, and a dict of metrics to be logged.\n",
    "        \"\"\"\n",
    "        # Since we're training on a Passive environment, we will get both observations\n",
    "        # and rewards, unless we're being evaluated based on our training performance,\n",
    "        # in which case we will need to send actions to the environments before we can\n",
    "        # get the corresponding rewards (image labels).\n",
    "        observations: Observations = batch[0]\n",
    "        rewards: Optional[Rewards] = batch[1]\n",
    "        # Get the predictions:\n",
    "        logits = self(observations)\n",
    "        y_pred = logits.argmax(-1)\n",
    "\n",
    "        if rewards is None:\n",
    "            # If the rewards in the batch is None, it means we're expected to give\n",
    "            # actions before we can get rewards back from the environment.\n",
    "            rewards = environment.send(Actions(y_pred))\n",
    "\n",
    "        assert rewards is not None\n",
    "        image_labels = rewards.y\n",
    "\n",
    "        loss = self.loss(logits, image_labels)\n",
    "\n",
    "        accuracy = (y_pred == image_labels).sum().float() / len(image_labels)\n",
    "        metrics_dict = {\"accuracy\": accuracy.item()}\n",
    "        return loss, metrics_dict\n"
   ]
  },
  {
   "source": [
    "## Creating our Method\n",
    "\n",
    "Here by subclassing 'MethodABC' and passing in a target_setting, we indicate that we are creating a new method, and that it will work on any Setting that is an instance of ClassIncrementalSetting or one of its subclasses. "
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "class DemoMethod(Method, target_setting=ClassIncrementalSetting):\n",
    "    \"\"\" Minimal example of a Method targetting the Class-Incremental CL setting.\n",
    "    \n",
    "    For a quick intro to dataclasses, see examples/dataclasses_example.py    \n",
    "    \"\"\"\n",
    "\n",
    "    @dataclass\n",
    "    class HParams:\n",
    "        \"\"\" Hyper-parameters of the demo model. \"\"\"\n",
    "        # Learning rate of the optimizer.\n",
    "        learning_rate: float = 0.001\n",
    "    \n",
    "    def __init__(self, hparams: HParams):\n",
    "        self.hparams: DemoMethod.HParams = hparams\n",
    "        self.max_epochs: int = 1\n",
    "        self.early_stop_patience: int = 2\n",
    "\n",
    "        # We will create those when `configure` will be called, before training.\n",
    "        self.model: MyModel\n",
    "        self.optimizer: torch.optim.Optimizer\n",
    "\n",
    "    def configure(self, setting: ClassIncrementalSetting):\n",
    "        \"\"\" Called before the method is applied on a setting (before training). \n",
    "\n",
    "        You can use this to instantiate your model, for instance, since this is\n",
    "        where you get access to the observation & action spaces.\n",
    "        \"\"\"\n",
    "        self.model = MyModel(\n",
    "            observation_space=setting.observation_space,\n",
    "            action_space=setting.action_space,\n",
    "            reward_space=setting.reward_space,\n",
    "        )\n",
    "        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.learning_rate)\n",
    "\n",
    "    def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):\n",
    "        # configure() will have been called by the setting before we get here.\n",
    "        import tqdm\n",
    "        from numpy import inf\n",
    "        best_val_loss = inf\n",
    "        best_epoch = 0\n",
    "        for epoch in range(self.max_epochs):\n",
    "            self.model.train()\n",
    "            # Training loop:\n",
    "            with tqdm.tqdm(train_env) as train_pbar:\n",
    "                train_pbar.set_description(f\"Training Epoch {epoch}\")\n",
    "                for i, batch in enumerate(train_pbar):\n",
    "                    loss, metrics_dict = self.model.shared_step(batch, environment=train_env)\n",
    "                    self.optimizer.zero_grad()\n",
    "                    loss.backward()\n",
    "                    self.optimizer.step()\n",
    "                    train_pbar.set_postfix(**metrics_dict)\n",
    "\n",
    "            # Validation loop:\n",
    "            self.model.eval()\n",
    "            torch.set_grad_enabled(False)\n",
    "            with tqdm.tqdm(valid_env) as val_pbar:\n",
    "                val_pbar.set_description(f\"Validation Epoch {epoch}\")\n",
    "                epoch_val_loss = 0.\n",
    "\n",
    "                for i, batch in enumerate(val_pbar):\n",
    "                    batch_val_loss, metrics_dict = self.model.shared_step(batch, environment=valid_env)\n",
    "                    epoch_val_loss += batch_val_loss\n",
    "                    val_pbar.set_postfix(**metrics_dict, val_loss=epoch_val_loss)\n",
    "            torch.set_grad_enabled(True)\n",
    "\n",
    "            if epoch_val_loss < best_val_loss:\n",
    "                best_val_loss = valid_env\n",
    "                best_epoch = epoch\n",
    "            if epoch - best_epoch > self.early_stop_patience:\n",
    "                print(f\"Early stopping at epoch {i}.\")\n",
    "                break\n",
    "\n",
    "    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:\n",
    "        \"\"\" Get a batch of predictions (aka actions) for these observations. \"\"\" \n",
    "        with torch.no_grad():\n",
    "            logits = self.model(observations)\n",
    "        # Get the predicted classes\n",
    "        y_pred = logits.argmax(dim=-1)\n",
    "        return self.target_setting.Actions(y_pred)\n",
    "    \n",
    "    @classmethod\n",
    "    def add_argparse_args(cls, parser: ArgumentParser, dest: str = \"\"):\n",
    "        \"\"\"Adds command-line arguments for this Method to an argument parser.\"\"\"\n",
    "        parser.add_arguments(cls.HParams, \"hparams\")\n",
    "\n",
    "    @classmethod\n",
    "    def from_argparse_args(cls, args, dest: str = \"\"):\n",
    "        \"\"\"Creates an instance of this Method from the parsed arguments.\"\"\"\n",
    "        hparams: cls.HParams = args.hparams\n",
    "        return cls(hparams=hparams)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "2021-02-25:17:29:01,958 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 0.\n",
      "2021-02-25:17:29:01,959 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:02,13 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
      "2021-02-25:17:29:02,14 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:04<00:00, 64.17it/s, accuracy=1]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 155.53it/s, accuracy=1, val_loss=tensor(3.1905)]\n",
      "2021-02-25:17:29:07,205 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 0.\n",
      "2021-02-25:17:29:07,246 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
      "2021-02-25:17:29:07,246 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
      "2021-02-25:17:29:07,274 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:   0%|          | 0/312 [00:00<?, ?it/s]2021-02-25:17:29:07,361 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:07,365 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:07,373 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:07,382 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:07,394 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 232.18it/s]\n",
      "2021-02-25:17:29:08,713 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.626102\n",
      "2021-02-25:17:29:08,713 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 1.\n",
      "2021-02-25:17:29:08,714 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 79.71it/s, accuracy=0.969]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 170.55it/s, accuracy=0.969, val_loss=tensor(5.7692)]\n",
      "2021-02-25:17:29:12,923 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 1.\n",
      "2021-02-25:17:29:12,926 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:   0%|          | 0/312 [00:00<?, ?it/s]2021-02-25:17:29:13,14 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:13,19 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:13,27 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:13,36 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:13,46 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 248.27it/s]\n",
      "2021-02-25:17:29:14,276 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.568409\n",
      "2021-02-25:17:29:14,277 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 2.\n",
      "2021-02-25:17:29:14,278 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 86.51it/s, accuracy=1]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 152.03it/s, accuracy=1, val_loss=tensor(0.0980)]\n",
      "2021-02-25:17:29:18,245 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 2.\n",
      "2021-02-25:17:29:18,249 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:   0%|          | 0/312 [00:00<?, ?it/s]2021-02-25:17:29:18,339 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:18,343 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:18,356 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:18,362 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:18,371 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 243.46it/s]\n",
      "2021-02-25:17:29:19,632 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.757212\n",
      "2021-02-25:17:29:19,632 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 3.\n",
      "2021-02-25:17:29:19,633 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 79.67it/s, accuracy=1]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 140.42it/s, accuracy=1, val_loss=tensor(0.1427)]\n",
      "2021-02-25:17:29:23,940 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 3.\n",
      "2021-02-25:17:29:23,942 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:   0%|          | 0/312 [00:00<?, ?it/s]2021-02-25:17:29:24,35 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:24,71 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:24,82 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:24,96 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:24,103 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 223.35it/s]\n",
      "2021-02-25:17:29:25,441 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.791366\n",
      "2021-02-25:17:29:25,441 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 4.\n",
      "2021-02-25:17:29:25,442 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 81.25it/s, accuracy=0.969]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 157.25it/s, accuracy=1, val_loss=tensor(0.7817)]\n",
      "2021-02-25:17:29:29,616 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 4.\n",
      "2021-02-25:17:29:29,619 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:   0%|          | 0/312 [00:00<?, ?it/s]2021-02-25:17:29:29,706 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:29,710 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:29,719 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:29,727 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "2021-02-25:17:29:29,735 WARNING  [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 247.82it/s]\n",
      "2021-02-25:17:29:30,971 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.798978\n",
      "2021-02-25:17:29:30,971 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:237] Finished main loop in 30.118470110999997 seconds.\n",
      "2021-02-25:17:29:31,57 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:257] {\n",
      "\t\"Task 0\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.989919\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.666667\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.481351\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.494048\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.5\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 1\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.61744\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.96131\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.422379\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.360119\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.477823\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 2\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.506048\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.564484\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 1.0\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.996528\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.718246\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 3\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.498488\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.502976\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.996472\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 1.0\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.960181\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 4\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.537802\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.549603\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.918851\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.994048\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.995464\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Final/Average Online Performance\": 0,\n",
      "\t\"Final/Average Final Performance\": 0.798978,\n",
      "\t\"Final/Runtime (seconds)\": 30.118470110999997,\n",
      "\t\"Final/CL Score\": 0.6793868\n",
      "}\n",
      "\n",
      "2021-02-25:17:29:31,143 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:395] {\n",
      "\t\"Task 0\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.989919\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.666667\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.481351\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.494048\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.5\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 1\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.61744\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.96131\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.422379\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.360119\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.477823\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 2\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.506048\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.564484\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 1.0\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.996528\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.718246\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 3\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.498488\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.502976\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.996472\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 1.0\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.960181\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 4\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.537802\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.549603\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.918851\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.994048\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.995464\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Final/Average Online Performance\": 0,\n",
      "\t\"Final/Average Final Performance\": 0.798978,\n",
      "\t\"Final/Runtime (seconds)\": 30.118470110999997,\n",
      "\t\"Final/CL Score\": 0.6793868\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "method = DemoMethod(hparams=DemoMethod.HParams())\n",
    "setting = DomainIncrementalSetting(dataset=\"fashionmnist\")\n",
    "\n",
    "results = setting.apply(method)"
   ]
  },
  {
   "source": [
    "## Results:"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "print(results.summary())"
   ],
   "cell_type": "code",
   "metadata": {},
   "execution_count": 5,
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "{\n\t\"Task 0\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.989919\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.666667\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.481351\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.494048\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.5\n\t\t}\n\t},\n\t\"Task 1\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.61744\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.96131\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.422379\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.360119\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.477823\n\t\t}\n\t},\n\t\"Task 2\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.506048\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.564484\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 1.0\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.996528\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.718246\n\t\t}\n\t},\n\t\"Task 3\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.498488\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.502976\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.996472\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 1.0\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.960181\n\t\t}\n\t},\n\t\"Task 4\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.537802\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.549603\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.918851\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.994048\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.995464\n\t\t}\n\t},\n\t\"Final/Average Online Performance\": 0,\n\t\"Final/Average Final Performance\": 0.798978,\n\t\"Final/Runtime (seconds)\": 30.118470110999997,\n\t\"Final/CL Score\": 0.6793868\n}\n\n"
     ]
    }
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "{'task_metrics': <Figure size 432x288 with 1 Axes>}"
      ]
     },
     "metadata": {},
     "execution_count": 6
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"277.314375pt\" version=\"1.1\" viewBox=\"0 0 385.78125 277.314375\" width=\"385.78125pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <metadata>\n  <rdf:RDF xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n   <cc:Work>\n    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n    <dc:date>2021-02-25T17:29:31.358397</dc:date>\n    <dc:format>image/svg+xml</dc:format>\n    <dc:creator>\n     <cc:Agent>\n      <dc:title>Matplotlib v3.3.4, https://matplotlib.org/</dc:title>\n     </cc:Agent>\n    </dc:creator>\n   </cc:Work>\n  </rdf:RDF>\n </metadata>\n <defs>\n  <style type=\"text/css\">*{stroke-linecap:butt;stroke-linejoin:round;}</style>\n </defs>\n <g id=\"figure_1\">\n  <g id=\"patch_1\">\n   <path d=\"M 0 277.314375 \nL 385.78125 277.314375 \nL 385.78125 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n  </g>\n  <g id=\"axes_1\">\n   <g id=\"patch_2\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \nL 378.58125 22.318125 \nL 43.78125 22.318125 \nz\n\" style=\"fill:#ffffff;\"/>\n   </g>\n   <g id=\"patch_3\">\n    <path clip-path=\"url(#p3f79f8a23b)\" d=\"M 58.999432 239.758125 \nL 109.726705 239.758125 \nL 109.726705 122.818458 \nL 58.999432 122.818458 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_4\">\n    <path clip-path=\"url(#p3f79f8a23b)\" d=\"M 122.408523 239.758125 \nL 173.135795 239.758125 \nL 173.135795 120.252449 \nL 122.408523 120.252449 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_5\">\n    <path clip-path=\"url(#p3f79f8a23b)\" d=\"M 185.817614 239.758125 \nL 236.544886 239.758125 \nL 236.544886 39.963164 \nL 185.817614 39.963164 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_6\">\n    <path clip-path=\"url(#p3f79f8a23b)\" d=\"M 249.226705 239.758125 \nL 299.953977 239.758125 \nL 299.953977 23.612328 \nL 249.226705 23.612328 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_7\">\n    <path clip-path=\"url(#p3f79f8a23b)\" d=\"M 312.635795 239.758125 \nL 363.363068 239.758125 \nL 363.363068 23.304433 \nL 312.635795 23.304433 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"matplotlib.axis_1\">\n    <g id=\"xtick_1\">\n     <g id=\"line2d_1\">\n      <defs>\n       <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m68c5620304\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"84.363068\" xlink:href=\"#m68c5620304\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_1\">\n      <!-- 0 -->\n      <g transform=\"translate(81.181818 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_2\">\n     <g id=\"line2d_2\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"147.772159\" xlink:href=\"#m68c5620304\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_2\">\n      <!-- 1 -->\n      <g transform=\"translate(144.590909 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 12.40625 8.296875 \nL 28.515625 8.296875 \nL 28.515625 63.921875 \nL 10.984375 60.40625 \nL 10.984375 69.390625 \nL 28.421875 72.90625 \nL 38.28125 72.90625 \nL 38.28125 8.296875 \nL 54.390625 8.296875 \nL 54.390625 0 \nL 12.40625 0 \nz\n\" id=\"DejaVuSans-49\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-49\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_3\">\n     <g id=\"line2d_3\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"211.18125\" xlink:href=\"#m68c5620304\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_3\">\n      <!-- 2 -->\n      <g transform=\"translate(208 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 19.1875 8.296875 \nL 53.609375 8.296875 \nL 53.609375 0 \nL 7.328125 0 \nL 7.328125 8.296875 \nQ 12.9375 14.109375 22.625 23.890625 \nQ 32.328125 33.6875 34.8125 36.53125 \nQ 39.546875 41.84375 41.421875 45.53125 \nQ 43.3125 49.21875 43.3125 52.78125 \nQ 43.3125 58.59375 39.234375 62.25 \nQ 35.15625 65.921875 28.609375 65.921875 \nQ 23.96875 65.921875 18.8125 64.3125 \nQ 13.671875 62.703125 7.8125 59.421875 \nL 7.8125 69.390625 \nQ 13.765625 71.78125 18.9375 73 \nQ 24.125 74.21875 28.421875 74.21875 \nQ 39.75 74.21875 46.484375 68.546875 \nQ 53.21875 62.890625 53.21875 53.421875 \nQ 53.21875 48.921875 51.53125 44.890625 \nQ 49.859375 40.875 45.40625 35.40625 \nQ 44.1875 33.984375 37.640625 27.21875 \nQ 31.109375 20.453125 19.1875 8.296875 \nz\n\" id=\"DejaVuSans-50\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_4\">\n     <g id=\"line2d_4\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"274.590341\" xlink:href=\"#m68c5620304\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_4\">\n      <!-- 3 -->\n      <g transform=\"translate(271.409091 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 40.578125 39.3125 \nQ 47.65625 37.796875 51.625 33 \nQ 55.609375 28.21875 55.609375 21.1875 \nQ 55.609375 10.40625 48.1875 4.484375 \nQ 40.765625 -1.421875 27.09375 -1.421875 \nQ 22.515625 -1.421875 17.65625 -0.515625 \nQ 12.796875 0.390625 7.625 2.203125 \nL 7.625 11.71875 \nQ 11.71875 9.328125 16.59375 8.109375 \nQ 21.484375 6.890625 26.8125 6.890625 \nQ 36.078125 6.890625 40.9375 10.546875 \nQ 45.796875 14.203125 45.796875 21.1875 \nQ 45.796875 27.640625 41.28125 31.265625 \nQ 36.765625 34.90625 28.71875 34.90625 \nL 20.21875 34.90625 \nL 20.21875 43.015625 \nL 29.109375 43.015625 \nQ 36.375 43.015625 40.234375 45.921875 \nQ 44.09375 48.828125 44.09375 54.296875 \nQ 44.09375 59.90625 40.109375 62.90625 \nQ 36.140625 65.921875 28.71875 65.921875 \nQ 24.65625 65.921875 20.015625 65.03125 \nQ 15.375 64.15625 9.8125 62.3125 \nL 9.8125 71.09375 \nQ 15.4375 72.65625 20.34375 73.4375 \nQ 25.25 74.21875 29.59375 74.21875 \nQ 40.828125 74.21875 47.359375 69.109375 \nQ 53.90625 64.015625 53.90625 55.328125 \nQ 53.90625 49.265625 50.4375 45.09375 \nQ 46.96875 40.921875 40.578125 39.3125 \nz\n\" id=\"DejaVuSans-51\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-51\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_5\">\n     <g id=\"line2d_5\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"337.999432\" xlink:href=\"#m68c5620304\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_5\">\n      <!-- 4 -->\n      <g transform=\"translate(334.818182 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 37.796875 64.3125 \nL 12.890625 25.390625 \nL 37.796875 25.390625 \nz\nM 35.203125 72.90625 \nL 47.609375 72.90625 \nL 47.609375 25.390625 \nL 58.015625 25.390625 \nL 58.015625 17.1875 \nL 47.609375 17.1875 \nL 47.609375 0 \nL 37.796875 0 \nL 37.796875 17.1875 \nL 4.890625 17.1875 \nL 4.890625 26.703125 \nz\n\" id=\"DejaVuSans-52\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_6\">\n     <!-- Task -->\n     <g transform=\"translate(200.388281 268.034687)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M -0.296875 72.90625 \nL 61.375 72.90625 \nL 61.375 64.59375 \nL 35.5 64.59375 \nL 35.5 0 \nL 25.59375 0 \nL 25.59375 64.59375 \nL -0.296875 64.59375 \nz\n\" id=\"DejaVuSans-84\"/>\n       <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n       <path d=\"M 44.28125 53.078125 \nL 44.28125 44.578125 \nQ 40.484375 46.53125 36.375 47.5 \nQ 32.28125 48.484375 27.875 48.484375 \nQ 21.1875 48.484375 17.84375 46.4375 \nQ 14.5 44.390625 14.5 40.28125 \nQ 14.5 37.15625 16.890625 35.375 \nQ 19.28125 33.59375 26.515625 31.984375 \nL 29.59375 31.296875 \nQ 39.15625 29.25 43.1875 25.515625 \nQ 47.21875 21.78125 47.21875 15.09375 \nQ 47.21875 7.46875 41.1875 3.015625 \nQ 35.15625 -1.421875 24.609375 -1.421875 \nQ 20.21875 -1.421875 15.453125 -0.5625 \nQ 10.6875 0.296875 5.421875 2 \nL 5.421875 11.28125 \nQ 10.40625 8.6875 15.234375 7.390625 \nQ 20.0625 6.109375 24.8125 6.109375 \nQ 31.15625 6.109375 34.5625 8.28125 \nQ 37.984375 10.453125 37.984375 14.40625 \nQ 37.984375 18.0625 35.515625 20.015625 \nQ 33.0625 21.96875 24.703125 23.78125 \nL 21.578125 24.515625 \nQ 13.234375 26.265625 9.515625 29.90625 \nQ 5.8125 33.546875 5.8125 39.890625 \nQ 5.8125 47.609375 11.28125 51.796875 \nQ 16.75 56 26.8125 56 \nQ 31.78125 56 36.171875 55.265625 \nQ 40.578125 54.546875 44.28125 53.078125 \nz\n\" id=\"DejaVuSans-115\"/>\n       <path d=\"M 9.078125 75.984375 \nL 18.109375 75.984375 \nL 18.109375 31.109375 \nL 44.921875 54.6875 \nL 56.390625 54.6875 \nL 27.390625 29.109375 \nL 57.625 0 \nL 45.90625 0 \nL 18.109375 26.703125 \nL 18.109375 0 \nL 9.078125 0 \nz\n\" id=\"DejaVuSans-107\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-84\"/>\n      <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"matplotlib.axis_2\">\n    <g id=\"ytick_1\">\n     <g id=\"line2d_6\">\n      <defs>\n       <path d=\"M 0 0 \nL -3.5 0 \n\" id=\"m13396888ec\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m13396888ec\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_7\">\n      <!-- 0.0 -->\n      <g transform=\"translate(20.878125 243.557344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 10.6875 12.40625 \nL 21 12.40625 \nL 21 0 \nL 10.6875 0 \nz\n\" id=\"DejaVuSans-46\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_2\">\n     <g id=\"line2d_7\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m13396888ec\" y=\"196.270125\"/>\n      </g>\n     </g>\n     <g id=\"text_8\">\n      <!-- 0.2 -->\n      <g transform=\"translate(20.878125 200.069344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_3\">\n     <g id=\"line2d_8\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m13396888ec\" y=\"152.782125\"/>\n      </g>\n     </g>\n     <g id=\"text_9\">\n      <!-- 0.4 -->\n      <g transform=\"translate(20.878125 156.581344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_4\">\n     <g id=\"line2d_9\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m13396888ec\" y=\"109.294125\"/>\n      </g>\n     </g>\n     <g id=\"text_10\">\n      <!-- 0.6 -->\n      <g transform=\"translate(20.878125 113.093344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 33.015625 40.375 \nQ 26.375 40.375 22.484375 35.828125 \nQ 18.609375 31.296875 18.609375 23.390625 \nQ 18.609375 15.53125 22.484375 10.953125 \nQ 26.375 6.390625 33.015625 6.390625 \nQ 39.65625 6.390625 43.53125 10.953125 \nQ 47.40625 15.53125 47.40625 23.390625 \nQ 47.40625 31.296875 43.53125 35.828125 \nQ 39.65625 40.375 33.015625 40.375 \nz\nM 52.59375 71.296875 \nL 52.59375 62.3125 \nQ 48.875 64.0625 45.09375 64.984375 \nQ 41.3125 65.921875 37.59375 65.921875 \nQ 27.828125 65.921875 22.671875 59.328125 \nQ 17.53125 52.734375 16.796875 39.40625 \nQ 19.671875 43.65625 24.015625 45.921875 \nQ 28.375 48.1875 33.59375 48.1875 \nQ 44.578125 48.1875 50.953125 41.515625 \nQ 57.328125 34.859375 57.328125 23.390625 \nQ 57.328125 12.15625 50.6875 5.359375 \nQ 44.046875 -1.421875 33.015625 -1.421875 \nQ 20.359375 -1.421875 13.671875 8.265625 \nQ 6.984375 17.96875 6.984375 36.375 \nQ 6.984375 53.65625 15.1875 63.9375 \nQ 23.390625 74.21875 37.203125 74.21875 \nQ 40.921875 74.21875 44.703125 73.484375 \nQ 48.484375 72.75 52.59375 71.296875 \nz\n\" id=\"DejaVuSans-54\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-54\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_5\">\n     <g id=\"line2d_10\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m13396888ec\" y=\"65.806125\"/>\n      </g>\n     </g>\n     <g id=\"text_11\">\n      <!-- 0.8 -->\n      <g transform=\"translate(20.878125 69.605344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 34.625 \nQ 24.75 34.625 20.71875 30.859375 \nQ 16.703125 27.09375 16.703125 20.515625 \nQ 16.703125 13.921875 20.71875 10.15625 \nQ 24.75 6.390625 31.78125 6.390625 \nQ 38.8125 6.390625 42.859375 10.171875 \nQ 46.921875 13.96875 46.921875 20.515625 \nQ 46.921875 27.09375 42.890625 30.859375 \nQ 38.875 34.625 31.78125 34.625 \nz\nM 21.921875 38.8125 \nQ 15.578125 40.375 12.03125 44.71875 \nQ 8.5 49.078125 8.5 55.328125 \nQ 8.5 64.0625 14.71875 69.140625 \nQ 20.953125 74.21875 31.78125 74.21875 \nQ 42.671875 74.21875 48.875 69.140625 \nQ 55.078125 64.0625 55.078125 55.328125 \nQ 55.078125 49.078125 51.53125 44.71875 \nQ 48 40.375 41.703125 38.8125 \nQ 48.828125 37.15625 52.796875 32.3125 \nQ 56.78125 27.484375 56.78125 20.515625 \nQ 56.78125 9.90625 50.3125 4.234375 \nQ 43.84375 -1.421875 31.78125 -1.421875 \nQ 19.734375 -1.421875 13.25 4.234375 \nQ 6.78125 9.90625 6.78125 20.515625 \nQ 6.78125 27.484375 10.78125 32.3125 \nQ 14.796875 37.15625 21.921875 38.8125 \nz\nM 18.3125 54.390625 \nQ 18.3125 48.734375 21.84375 45.5625 \nQ 25.390625 42.390625 31.78125 42.390625 \nQ 38.140625 42.390625 41.71875 45.5625 \nQ 45.3125 48.734375 45.3125 54.390625 \nQ 45.3125 60.0625 41.71875 63.234375 \nQ 38.140625 66.40625 31.78125 66.40625 \nQ 25.390625 66.40625 21.84375 63.234375 \nQ 18.3125 60.0625 18.3125 54.390625 \nz\n\" id=\"DejaVuSans-56\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-56\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_6\">\n     <g id=\"line2d_11\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m13396888ec\" y=\"22.318125\"/>\n      </g>\n     </g>\n     <g id=\"text_12\">\n      <!-- 1.0 -->\n      <g transform=\"translate(20.878125 26.117344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-49\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_13\">\n     <!-- Accuracy -->\n     <g transform=\"translate(14.798438 153.86625)rotate(-90)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M 34.1875 63.1875 \nL 20.796875 26.90625 \nL 47.609375 26.90625 \nz\nM 28.609375 72.90625 \nL 39.796875 72.90625 \nL 67.578125 0 \nL 57.328125 0 \nL 50.6875 18.703125 \nL 17.828125 18.703125 \nL 11.1875 0 \nL 0.78125 0 \nz\n\" id=\"DejaVuSans-65\"/>\n       <path d=\"M 48.78125 52.59375 \nL 48.78125 44.1875 \nQ 44.96875 46.296875 41.140625 47.34375 \nQ 37.3125 48.390625 33.40625 48.390625 \nQ 24.65625 48.390625 19.8125 42.84375 \nQ 14.984375 37.3125 14.984375 27.296875 \nQ 14.984375 17.28125 19.8125 11.734375 \nQ 24.65625 6.203125 33.40625 6.203125 \nQ 37.3125 6.203125 41.140625 7.25 \nQ 44.96875 8.296875 48.78125 10.40625 \nL 48.78125 2.09375 \nQ 45.015625 0.34375 40.984375 -0.53125 \nQ 36.96875 -1.421875 32.421875 -1.421875 \nQ 20.0625 -1.421875 12.78125 6.34375 \nQ 5.515625 14.109375 5.515625 27.296875 \nQ 5.515625 40.671875 12.859375 48.328125 \nQ 20.21875 56 33.015625 56 \nQ 37.15625 56 41.109375 55.140625 \nQ 45.0625 54.296875 48.78125 52.59375 \nz\n\" id=\"DejaVuSans-99\"/>\n       <path d=\"M 8.5 21.578125 \nL 8.5 54.6875 \nL 17.484375 54.6875 \nL 17.484375 21.921875 \nQ 17.484375 14.15625 20.5 10.265625 \nQ 23.53125 6.390625 29.59375 6.390625 \nQ 36.859375 6.390625 41.078125 11.03125 \nQ 45.3125 15.671875 45.3125 23.6875 \nL 45.3125 54.6875 \nL 54.296875 54.6875 \nL 54.296875 0 \nL 45.3125 0 \nL 45.3125 8.40625 \nQ 42.046875 3.421875 37.71875 1 \nQ 33.40625 -1.421875 27.6875 -1.421875 \nQ 18.265625 -1.421875 13.375 4.4375 \nQ 8.5 10.296875 8.5 21.578125 \nz\nM 31.109375 56 \nz\n\" id=\"DejaVuSans-117\"/>\n       <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n       <path d=\"M 32.171875 -5.078125 \nQ 28.375 -14.84375 24.75 -17.8125 \nQ 21.140625 -20.796875 15.09375 -20.796875 \nL 7.90625 -20.796875 \nL 7.90625 -13.28125 \nL 13.1875 -13.28125 \nQ 16.890625 -13.28125 18.9375 -11.515625 \nQ 21 -9.765625 23.484375 -3.21875 \nL 25.09375 0.875 \nL 2.984375 54.6875 \nL 12.5 54.6875 \nL 29.59375 11.921875 \nL 46.6875 54.6875 \nL 56.203125 54.6875 \nz\n\" id=\"DejaVuSans-121\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-65\"/>\n      <use x=\"66.658203\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"121.638672\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"176.619141\" xlink:href=\"#DejaVuSans-117\"/>\n      <use x=\"239.998047\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"281.111328\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"342.390625\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"397.371094\" xlink:href=\"#DejaVuSans-121\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"patch_8\">\n    <path d=\"M 43.78125 239.758125 \nL 43.78125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_9\">\n    <path d=\"M 378.58125 239.758125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_10\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_11\">\n    <path d=\"M 43.78125 22.318125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"text_14\">\n    <!-- 54% -->\n    <g transform=\"translate(73.249787 117.738771)scale(0.1 -0.1)\">\n     <defs>\n      <path d=\"M 10.796875 72.90625 \nL 49.515625 72.90625 \nL 49.515625 64.59375 \nL 19.828125 64.59375 \nL 19.828125 46.734375 \nQ 21.96875 47.46875 24.109375 47.828125 \nQ 26.265625 48.1875 28.421875 48.1875 \nQ 40.625 48.1875 47.75 41.5 \nQ 54.890625 34.8125 54.890625 23.390625 \nQ 54.890625 11.625 47.5625 5.09375 \nQ 40.234375 -1.421875 26.90625 -1.421875 \nQ 22.3125 -1.421875 17.546875 -0.640625 \nQ 12.796875 0.140625 7.71875 1.703125 \nL 7.71875 11.625 \nQ 12.109375 9.234375 16.796875 8.0625 \nQ 21.484375 6.890625 26.703125 6.890625 \nQ 35.15625 6.890625 40.078125 11.328125 \nQ 45.015625 15.765625 45.015625 23.390625 \nQ 45.015625 31 40.078125 35.4375 \nQ 35.15625 39.890625 26.703125 39.890625 \nQ 22.75 39.890625 18.8125 39.015625 \nQ 14.890625 38.140625 10.796875 36.28125 \nz\n\" id=\"DejaVuSans-53\"/>\n      <path d=\"M 72.703125 32.078125 \nQ 68.453125 32.078125 66.03125 28.46875 \nQ 63.625 24.859375 63.625 18.40625 \nQ 63.625 12.0625 66.03125 8.421875 \nQ 68.453125 4.78125 72.703125 4.78125 \nQ 76.859375 4.78125 79.265625 8.421875 \nQ 81.6875 12.0625 81.6875 18.40625 \nQ 81.6875 24.8125 79.265625 28.4375 \nQ 76.859375 32.078125 72.703125 32.078125 \nz\nM 72.703125 38.28125 \nQ 80.421875 38.28125 84.953125 32.90625 \nQ 89.5 27.546875 89.5 18.40625 \nQ 89.5 9.28125 84.9375 3.921875 \nQ 80.375 -1.421875 72.703125 -1.421875 \nQ 64.890625 -1.421875 60.34375 3.921875 \nQ 55.8125 9.28125 55.8125 18.40625 \nQ 55.8125 27.59375 60.375 32.9375 \nQ 64.9375 38.28125 72.703125 38.28125 \nz\nM 22.3125 68.015625 \nQ 18.109375 68.015625 15.6875 64.375 \nQ 13.28125 60.75 13.28125 54.390625 \nQ 13.28125 47.953125 15.671875 44.328125 \nQ 18.0625 40.71875 22.3125 40.71875 \nQ 26.5625 40.71875 28.96875 44.328125 \nQ 31.390625 47.953125 31.390625 54.390625 \nQ 31.390625 60.6875 28.953125 64.34375 \nQ 26.515625 68.015625 22.3125 68.015625 \nz\nM 66.40625 74.21875 \nL 74.21875 74.21875 \nL 28.609375 -1.421875 \nL 20.796875 -1.421875 \nz\nM 22.3125 74.21875 \nQ 30.03125 74.21875 34.609375 68.875 \nQ 39.203125 63.53125 39.203125 54.390625 \nQ 39.203125 45.171875 34.640625 39.84375 \nQ 30.078125 34.515625 22.3125 34.515625 \nQ 14.546875 34.515625 10.03125 39.859375 \nQ 5.515625 45.21875 5.515625 54.390625 \nQ 5.515625 63.484375 10.046875 68.84375 \nQ 14.59375 74.21875 22.3125 74.21875 \nz\n\" id=\"DejaVuSans-37\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-53\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-52\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_15\">\n    <!-- 55% -->\n    <g transform=\"translate(136.658878 115.172761)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-53\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-53\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_16\">\n    <!-- 92% -->\n    <g transform=\"translate(200.067969 34.883476)scale(0.1 -0.1)\">\n     <defs>\n      <path d=\"M 10.984375 1.515625 \nL 10.984375 10.5 \nQ 14.703125 8.734375 18.5 7.8125 \nQ 22.3125 6.890625 25.984375 6.890625 \nQ 35.75 6.890625 40.890625 13.453125 \nQ 46.046875 20.015625 46.78125 33.40625 \nQ 43.953125 29.203125 39.59375 26.953125 \nQ 35.25 24.703125 29.984375 24.703125 \nQ 19.046875 24.703125 12.671875 31.3125 \nQ 6.296875 37.9375 6.296875 49.421875 \nQ 6.296875 60.640625 12.9375 67.421875 \nQ 19.578125 74.21875 30.609375 74.21875 \nQ 43.265625 74.21875 49.921875 64.515625 \nQ 56.59375 54.828125 56.59375 36.375 \nQ 56.59375 19.140625 48.40625 8.859375 \nQ 40.234375 -1.421875 26.421875 -1.421875 \nQ 22.703125 -1.421875 18.890625 -0.6875 \nQ 15.09375 0.046875 10.984375 1.515625 \nz\nM 30.609375 32.421875 \nQ 37.25 32.421875 41.125 36.953125 \nQ 45.015625 41.5 45.015625 49.421875 \nQ 45.015625 57.28125 41.125 61.84375 \nQ 37.25 66.40625 30.609375 66.40625 \nQ 23.96875 66.40625 20.09375 61.84375 \nQ 16.21875 57.28125 16.21875 49.421875 \nQ 16.21875 41.5 20.09375 36.953125 \nQ 23.96875 32.421875 30.609375 32.421875 \nz\n\" id=\"DejaVuSans-57\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-50\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_17\">\n    <!-- 99% -->\n    <g transform=\"translate(263.47706 18.53264)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_18\">\n    <!-- 100% -->\n    <g transform=\"translate(323.704901 18.224745)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-49\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-48\"/>\n     <use x=\"190.869141\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_19\">\n    <!-- Task Accuracy -->\n    <g transform=\"translate(168.929063 16.318125)scale(0.12 -0.12)\">\n     <defs>\n      <path id=\"DejaVuSans-32\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-84\"/>\n     <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n     <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     <use x=\"215.873047\" xlink:href=\"#DejaVuSans-32\"/>\n     <use x=\"247.660156\" xlink:href=\"#DejaVuSans-65\"/>\n     <use x=\"314.318359\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"369.298828\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"424.279297\" xlink:href=\"#DejaVuSans-117\"/>\n     <use x=\"487.658203\" xlink:href=\"#DejaVuSans-114\"/>\n     <use x=\"528.771484\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"590.050781\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"645.03125\" xlink:href=\"#DejaVuSans-121\"/>\n    </g>\n   </g>\n  </g>\n </g>\n <defs>\n  <clipPath id=\"p3f79f8a23b\">\n   <rect height=\"217.44\" width=\"334.8\" x=\"43.78125\" y=\"22.318125\"/>\n  </clipPath>\n </defs>\n</svg>\n",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdO0lEQVR4nO3de7wVdd328c+1YXPLVu+QQCXQSINQCXe6RTuY3CpEaHqTGeKBDj7QCStPBSqmhlooeaRb8cmbNExNyVBRKNuJ8oiAhoqSCUaCmghBHrah6Pf5YwZcbPZhbWDWYu+53q/Xejnzm9+a9Z3lZq41v5k1SxGBmZnlV0W5CzAzs/JyEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CCx3JE2RNL7cdZhtLxwEtt2T9EbB4z1JbxXMn1SiGqZIWi+pWylez6yUHAS23YuInTY8gBeALxS0Tc369SXtCBwH/As4OevXq/fa7Uv5epZPDgJrtST1l/SIpLWSXpZ0raQO6TJJukLSSkmvSXpKUt8G1rGzpFpJV0tSIy91HLAWuAj4Sr3nd5b0v5JekrRG0l0Fy46VtDB9/aWSBqftyyQdWdDvAkm/Sqd7SgpJp0p6Afhj2v4bSf+Q9C9JsyXtV/D8jpImSvp7uvzhtO1eSafVq/dJSUNb8DZbDjgIrDV7Fzgd6AJ8EjgC+Ha6bBDwWaA38AHgy8DqwidL+iDwADAnIr4bjd9v5SvAr4FbgT6SDixYdjNQBewH7Apcka67P3ATcDbQKa1lWQu27TBgH+Bz6fx9QK/0NR4HCo+ELgcOBD4FdAZ+ALwH/JKCIxhJ+wPdgXtbUAeSvidpkaSnJX1/w7rSEH5K0t2S/jNt/3QaNgsk9UrbOkmaJalV7G8k3Zh+gFhU0NZZ0u8lPZf+d5e0XemHiCXpdh+Qtn9M0mNp2yfTtvaS/iCpqjxb1oSI8MOPVvMg2Zke2ciy7wO/TacPB/4KHAJU1Os3BbgRWASc3czr7UmyU61O52cCV6XT3dJluzTwvOuBK4rZBuAC4FfpdE8ggL2aqKlT2ucDJB/m3gL2b6DfDsAaoFc6fznw8xa+333T96kKaA/8AfgoMB84LO3zdeDH6fQ0oAfwGWBiwesOKPffTgu2+bPAAcCigrYJwJh0egzw03R6CElIK/1bezRt/1n6HvQA7kzbTgO+Wu7ta+jRKhLarCGSeku6Jx0yeQ24hOTogIj4I3AtMAlYKWnyhk+tqaOAjsB1zbzMKcDiiFiYzk8FTpRUCewB/DMi1jTwvD2ApVu4aQDLN0xIaifpJ+nw0mu8f2TRJX3s0NBrRcS/gduAk9NP48NJjmBaYh+SnVtdRKwHHgS+SHKkNTvt83uS4TOAd0hCowp4R9LewB4R8acWvm7ZRMRs4J/1mo8lOcIi/e9/F7TfFIm5QKf0goL670Mn4AskR4nbHQeBtWb/A/yF5BPvfwLnkHwyAyAiro6IA4F9SXZcZxc89wbgfmBGejK4MSOAvdKw+QfJJ70uJJ8ElwOd03/k9S0H9m5knW+S7CA22L2BPoXDVCeS7HCOJDkK6Jm2C1gF/LuJ1/olcBLJsFldRDzSSL/GLAIOlfTBdEhjCEnIPZ3WBHB82gZwKcnObixJEF8MnNfC19we7RYRL6fT/wB2S6e7UxDawIq0bRLJ3+MvST6gjAMuiYj3SlNuyzgIrDXbGXgNeENSH+BbGxZIOkjSwekn9zdJdpb1/xGOBp4F7pbUsf7K07HdvYH+QHX66AvcAoxIdwz3AT+XtIukSkmfTZ/+C+Brko6QVCGpe1ojwELghLR/DfClIrZzHck5jiqSHQsA6Y7lRuBnkj6UHj18UtJ/pMsfSbd7Ii0/GiAiFgM/BWaRBOdCknMzXwe+LemxtL630/4LI+KQiPgvYC/gZZKh9Nsk/UrSbg28TKsSyThPk/fvj4gXImJARHwSqCMZIlos6eb0vehdilqLVu6xKT/8aMmDgvF1krHcvwBvAA+RXNXzcLrsCODJdNkqkiGdndJlU4Dx6XQFySfYWcAO9V7rOtLx3Xrt/Ul2zJ3Txy+BV0jG46cV9Bua1vA6sAT4XNq+F/BoWtu9wNVsfo6gfcF6dgJ+l67n7yRHKQF8NF3eEbgSeJHkEtfZQMeC559HM+cdWvD+XwJ8u15bb2BevTal72nn9L3/MMkJ8IvL/TdU5Hb2ZNNzBM8C3dLpbsCz6fT1wPCG+hW03UZyov/i9D34MDC13NtY+FBaqJm1UZJGAKMi4jNb+PxdI2KlpD1Jdu6HAB3StgqSYP1TRNxY8JyvkJxEv1LSb4HvkuxcvxgRp2/dFmVPUk/gnojom85fBqyOiJ9IGgN0jogfSDqK5MhyCHAwcHVE9C9Yz2HAf0fE6ZKuIDmZviztt91cxusvq5i1Yem4/reBn2/Fau5ML7V9B/hORKxNLyn9Trp8GvC/9V7zqySX8EJyXmUGyfDRiVtRR0lI+jUwAOgiaQXwI+AnwO2STiU5Kvty2n0GSQgsIRkC+lrBekRyNDYsbZpMcnTUnoJhzO1BZkcEkm4EjgZWbkjVessFXEXyJtaRXFb1eCbFmOWQpM+R7KT/ABwXyVU/ZpvJ8mTxFGBwE8s/TzJu1gsYRXIFiJltIxExMyJ2jIhjHQLWlMyCIBq+FrdQY9ffmplZCZXzHEFj19++XL+jpFEkRw3suOOOB/bp06d+FzMza8Jjjz22KiK6NrSsVZwsjojJJCdaqKmpiQULFpS5IjNrLXqOadGtlbZry35y1BY/V9LfG1tWziB4kfe/jQjJFy5eLFMtZm1aW9kZbs2O0BpXzm8WTwdGpHfvOwT4V7z/FW4zMyuRzI4IGrkWtxIgIq6jietvzcysdDILgogY3szyAL7TVB8zM8uebzpnVoSrrrqKvn37st9++3HllVcCcPbZZ9OnTx/69evH0KFDWbt2LQBz5syhX79+1NTU8NxzzwGwdu1aBg0axHvvbZc3n7SccxCYNWPRokXccMMNzJs3jyeeeIJ77rmHJUuWMHDgQBYtWsSTTz5J7969ufTSSwGYOHEiM2bM4Morr+S665KfOxg/fjznnHMOFRX+J2fbH/9VmjVj8eLFHHzwwVRVVdG+fXsOO+wwpk2bxqBBg2jfPhldPeSQQ1ixYgUAlZWV1NXVUVdXR2VlJUuXLmX58uUMGDCgjFth1rhW8T0Cs3Lq27cv5557LqtXr6Zjx47MmDGDmpqaTfrceOONDBuW3Fts7NixjBgxgo4dO3LzzTdz1llnMX78+HKUblYUB4FZM/bZZx9++MMfMmjQIHbccUeqq6tp167dxuUXX3wx7du356STTgKgurqauXPnAjB79my6detGRDBs2DAqKyuZOHEiu+3W6n+fxdoQDw2ZFeHUU0/lscceY/bs2eyyyy707p38wNSUKVO45557mDp1KskNdd8XEYwfP55x48Zx4YUXMmHCBEaOHMnVV19djk0wa5SPCMyKsHLlSnbddVdeeOEFpk2bxty5c7n//vuZMGECDz74IFVVVZs956abbmLIkCF07tyZuro6KioqqKiooK6urgxbYNY4B4FZEY477jhWr15NZWUlkyZNolOnTowePZp169YxcOBAIDlhvOEqobq6OqZMmcKsWbMAOOOMMxgyZAgdOnTglltuKdt2mDXEQWBWhIceemiztiVLljTav6qqitra2o3zhx56KE899VQmtZltLZ8jMDPLOQeBmVnOOQjMzHLO5wgsF9rK/fjB9+S3bc9HBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OcyzQIJA2W9KykJZLGNLB8T0m1kv4s6UlJQ7Ksx8zMNpdZEEhqB0wCPg/sCwyXtG+9bucBt0fEJ4ATgJ9nVY+ZmTUsyyOC/sCSiHg+It4GbgWOrdcngP9Mpz8AvJRhPWZm1oAsg6A7sLxgfkXaVugC4GRJK4AZwGkNrUjSKEkLJC149dVXs6jVzCy3yn2yeDgwJSJ6AEOAmyVtVlNETI6Imoio6dq1a8mLNDNry7IMgheBPQrme6RthU4FbgeIiEeAHYAuGdZkZmb1ZBkE84Fekj4iqQPJyeDp9fq8ABwBIGkfkiDw2I+ZWQllFgQRsR4YDcwEFpNcHfS0pIskHZN2OxMYKekJ4NfAVyMisqrJzMw21z7LlUfEDJKTwIVt5xdMPwN8OssazMysaeU+WWxmZmXmIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8u5TINA0mBJz0paImlMI32+LOkZSU9LuiXLeszMbHPts1qxpHbAJGAgsAKYL2l6RDxT0KcXMBb4dESskbRrVvWYmVnDsjwi6A8siYjnI+Jt4Fbg2Hp9RgKTImINQESszLAe20o9e/bk4x//ONXV1dTU1ABwwQUX0L17d6qrq6murmbGjBkAzJkzh379+lFTU8Nzzz0HwNq1axk0aBDvvfde2bbBzDaX2REB0B1YXjC/Aji4Xp/eAJLmAO2ACyLi/vorkjQKGAWw5557ZlKsFae2tpYuXbps0nb66adz1llnbdI2ceJEZsyYwbJly7juuuuYOHEi48eP55xzzqGiwqemzLYn5f4X2R7oBQwAhgM3SOpUv1NETI6Imoio6dq1a2krtC1SWVlJXV0ddXV1VFZWsnTpUpYvX86AAQPKXZqZ1dNsEEj6gqQtCYwXgT0K5nukbYVWANMj4p2I+BvwV5Jg2C41NDSywcSJE5HEqlWrALjzzjvZb7/9OPTQQ1m9ejUAS5cuZdiwYSWve1uRxKBBgzjwwAOZPHnyxvZrr72Wfv368fWvf501a9YAMHbsWEaMGMGll17K6NGjOffccxk/fny5SjezJhSzgx8GPCdpgqQ+LVj3fKCXpI9I6gCcAEyv1+cukqMBJHUhGSp6vgWvUXK1tbUsXLiQBQsWbGxbvnw5s2bN2mTY6pprrmH+/Pl84xvf4JZbkouhzjvvvFa9M3z44Yd5/PHHue+++5g0aRKzZ8/mW9/6FkuXLmXhwoV069aNM888E4Dq6mrmzp1LbW0tzz//PN26dSMiGDZsGCeffDKvvPJKmbfGzDZoNggi4mTgE8BSYIqkRySNkrRzM89bD4wGZgKLgdsj4mlJF0k6Ju02E1gt6RmgFjg7IlZvxfaUxemnn86ECROQtLGtoqKCdevWbRwaeeihh9h9993p1Wu7PeBpVvfu3QHYddddGTp0KPPmzWO33XajXbt2VFRUMHLkSObNm7fJcyKC8ePHM27cOC688EImTJjAyJEjufrqq8uxCWbWgKKGfCLiNeAOkit/ugFDgcclndbM82ZERO+I2DsiLk7bzo+I6el0RMQZEbFvRHw8Im7dqq3JWENDI7/73e/o3r07+++//yZ9x44dy5FHHsndd9/N8OHD+fGPf8y4cePKUfY28eabb/L6669vnJ41axZ9+/bl5Zdf3tjnt7/9LX379t3keTfddBNDhgyhc+fO1NXVUVFRQUVFBXV1dSWt38wa1+xVQ+mn968BHwVuAvpHxEpJVcAzwDXZlrj9ePjhh+nevTsrV65k4MCB9OnTh0suuYRZs2Zt1nfgwIEMHDgQeH9n+Ne//pXLL7+cXXbZhauuuoqqqqpSb8IWe+WVVxg6dCgA69ev58QTT2Tw4MGccsopLFy4EEn07NmT66+/fuNz6urqmDJlysb354wzzmDIkCF06NBh43CZmZVfMZePHgdcERGzCxsjok7SqdmUtX2qPzTy4IMP8re//W3j0cCKFSs44IADmDdvHrvvvjvw/s5w5syZHH300UybNo077riDqVOnMnLkyLJtS0vttddePPHEE5u133zzzY0+p6qqitra2o3zhx56KE899VQm9ZnZlitmaOgCYOPAr6SOknoCRMQD2ZS1/WloaOSggw5i5cqVLFu2jGXLltGjRw8ef/zxjSEAcNlll/Hd736XyspK3nrrLSR5aMTMtivFHBH8BvhUwfy7adtBmVS0nWpsaKQpL730EvPmzeNHP/oRAKeddhoHHXQQnTp14q677sq6ZDOzohQTBO3TW0QAEBFvp5eD5kpjQyOFli1btsn8hz70Ie69996N88cffzzHH398FuWZmW2xYoLgVUnHbLjSR9KxwKpsy7Is9Bxzb/OdWoFlPzmq3CWYtSnFBME3gamSrgVEcv+gEZlWZWZmJdNsEETEUuAQSTul829kXpWZmZVMUXcflXQUsB+ww4Zvz0bERRnWlYm2MjQCHh4xs22nmJvOXUdyv6HTSIaGjgc+nHFdZmZWIsV8j+BTETECWBMRFwKfJP0dATMza/2KCYJ/p/+tk/Qh4B2S+w2ZmVkbUMw5grvTH4u5DHgcCOCGLIsyM7PSaTII0h+keSAi1gJ3SroH2CEi/lWK4szMLHtNDg1FxHvApIL5dQ4BM7O2pZhzBA9IOk6Fv7piZmZtRjFB8A2Sm8ytk/SapNclvZZxXWZmViLFfLO4yZ+kNDOz1q2YXyj7bEPt9X+oxszMWqdiLh89u2B6B6A/8BhweCYVmZlZSRUzNPSFwnlJewBXZlWQmZmVVjEni+tbAeyzrQsxM7PyKOYcwTUk3yaGJDiqSb5hbGZmbUAx5wgWFEyvB34dEXMyqsfMzEqsmCC4A/h3RLwLIKmdpKqIqMu2NDMzK4WivlkMdCyY7wj8IZtyzMys1IoJgh0Kf54yna7KriQzMyulYoLgTUkHbJiRdCDwVnYlmZlZKRVzjuD7wG8kvUTyU5W7k/x0pZmZtQHFfKFsvqQ+wMfSpmcj4p1syzIzs1Ip5sfrvwPsGBGLImIRsJOkb2dfmpmZlUIx5whGpr9QBkBErAFGZlaRmZmVVDFB0K7wR2kktQM6ZFeSmZmVUjEni+8HbpN0fTr/DeC+7EoyM7NSKiYIfgiMAr6Zzj9JcuWQmZm1Ac0ODaU/YP8osIzktwgOBxYXs3JJgyU9K2mJpDFN9DtOUkiqKa5sMzPbVho9IpDUGxiePlYBtwFExH8Vs+L0XMIkYCDJravnS5oeEc/U67cz8D2SsDEzsxJr6ojgLySf/o+OiM9ExDXAuy1Yd39gSUQ8HxFvA7cCxzbQ78fAT4F/t2DdZma2jTQVBF8EXgZqJd0g6QiSbxYXqzuwvGB+Rdq2UXrrij0i4t6mViRplKQFkha8+uqrLSjBzMya02gQRMRdEXEC0AeoJbnVxK6S/kfSoK19YUkVwM+AM5vrGxGTI6ImImq6du26tS9tZmYFijlZ/GZE3JL+dnEP4M8kVxI150Vgj4L5HmnbBjsDfYE/SVoGHAJM9wljM7PSatFvFkfEmvTT+RFFdJ8P9JL0EUkdgBOA6QXr+ldEdImInhHRE5gLHBMRCxpenZmZZWFLfry+KBGxHhgNzCS53PT2iHha0kWSjsnqdc3MrGWK+ULZFouIGcCMem3nN9J3QJa1mJlZwzI7IjAzs9bBQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpafIekZSU9KekDSh7Osx8zMNpdZEEhqB0wCPg/sCwyXtG+9bn8GaiKiH3AHMCGreszMrGFZHhH0B5ZExPMR8TZwK3BsYYeIqI2IunR2LtAjw3rMzKwBWQZBd2B5wfyKtK0xpwL3NbRA0ihJCyQtePXVV7dhiWZmtl2cLJZ0MlADXNbQ8oiYHBE1EVHTtWvX0hZnZtbGtc9w3S8CexTM90jbNiHpSOBc4LCIWJdhPWZm1oAsjwjmA70kfURSB+AEYHphB0mfAK4HjomIlRnWYmZmjcgsCCJiPTAamAksBm6PiKclXSTpmLTbZcBOwG8kLZQ0vZHVmZlZRrIcGiIiZgAz6rWdXzB9ZJavb2ZmzdsuThabmVn5OAjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpb/h6Tb0uWPSuqZZT1mZra5zIJAUjtgEvB5YF9guKR963U7FVgTER8FrgB+mlU9ZmbWsCyPCPoDSyLi+Yh4G7gVOLZen2OBX6bTdwBHSFKGNZmZWT2KiGxWLH0JGBwR/yedPwU4OCJGF/RZlPZZkc4vTfusqreuUcCodPZjwLOZFL3tdAFWNdurbfK251eet781bPuHI6JrQwval7qSLRERk4HJ5a6jWJIWRERNuesoB297Prcd8r39rX3bsxwaehHYo2C+R9rWYB9J7YEPAKszrMnMzOrJMgjmA70kfURSB+AEYHq9PtOBr6TTXwL+GFmNVZmZWYMyGxqKiPWSRgMzgXbAjRHxtKSLgAURMR34BXCzpCXAP0nCoi1oNcNYGfC251eet79Vb3tmJ4vNzKx18DeLzcxyzkFgZpZzDoJtqLlbarRlkm6UtDL9bkiuSNpDUq2kZyQ9Lel75a6pVCTtIGmepCfSbb+w3DWVg6R2kv4s6Z5y17IlHATbSJG31GjLpgCDy11EmawHzoyIfYFDgO/k6P/9OuDwiNgfqAYGSzqkvCWVxfeAxeUuYks5CLadYm6p0WZFxGySK79yJyJejojH0+nXSXYI3ctbVWlE4o10tjJ95OoKFEk9gKOA/1vuWraUg2Db6Q4sL5hfQU52Bva+9A66nwAeLXMpJZMOiywEVgK/j4jcbHvqSuAHwHtlrmOLOQjMthFJOwF3At+PiNfKXU+pRMS7EVFNcveA/pL6lrmkkpF0NLAyIh4rdy1bw0Gw7RRzSw1royRVkoTA1IiYVu56yiEi1gK15Otc0aeBYyQtIxkOPlzSr8pbUss5CLadYm6pYW1Qeuv0XwCLI+Jn5a6nlCR1ldQpne4IDAT+UtaiSigixkZEj4joSfJv/o8RcXKZy2oxB8E2EhHrgQ231FgM3B4RT5e3qtKR9GvgEeBjklZIOrXcNZXQp4FTSD4NLkwfQ8pdVIl0A2olPUnyYej3EdEqL6HMM99iwsws53xEYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOdcqfrzerJwkfRB4IJ3dHXgXeDWd75/eW6qp538VqImI0ZkVabYVHARmzYiI1SR31kTSBcAbEXF5OWsy25Y8NGS2BSSNlDQ/vQ//nZKq0vbjJS1K22c38LyjJD0iqUvpqzZrmIPAbMtMi4iD0vvwLwY2fJP6fOBzafsxhU+QNBQYAwyJiFUlrdasCR4aMtsyfSWNBzoBO5HcWgRgDjBF0u1A4c3nDgdqgEF5ujOptQ4+IjDbMlOA0RHxceBCYAeAiPgmcB7JnWgfS080AywFdgZ6l75Us6Y5CMy2zM7Ay+ntp0/a0Chp74h4NCLOJ7myaMOtyf8OHAfcJGm/kldr1gQHgdmWGUfyK2Rz2PS2y5dJekrSIuD/AU9sWBARfyEJjd9I2ruUxZo1xXcfNTPLOR8RmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZz/x/jOYg2+yx1FwAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     }
    }
   ],
   "source": [
    "results.make_plots()"
   ]
  },
  {
   "source": [
    "As you can see, our model's performance quickly deteriorates as new tasks are learned, a process refered to as \"Catastrophic Forgetting\".\n",
    "Next, we'll try to do something about it.\n"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "## Adding a CL Mechanism\n",
    "\n",
    "First, by taking a look at the logs above, you will notice that we are told that our Method doesn't have an `on_task_switch` method.\n",
    "\n",
    "A Setting would call this `on_task_switch` method during training or evaluation if we are allowed to know when task boundaries occur in that setting. Additionally, if it's allowed in that Setting, we might also receive the index of the new task we are switching to.\n",
    "\n",
    "Using this information, here we will add an EWC-like penalty to our model, which will prevent its weights from changing too much between tasks. We'll use the `on_task_switch` method to update the 'anchor' weights everytime a task boundary is encountered.\n"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from copy import deepcopy\n",
    "from sequoia.utils import dict_intersection\n",
    "\n",
    "class MyImprovedModel(MyModel):\n",
    "    \"\"\" Adds an ewc-like penalty to the demo model. \"\"\"\n",
    "    def __init__(self,\n",
    "                 observation_space: gym.Space,\n",
    "                 action_space: gym.Space,\n",
    "                 reward_space: gym.Space,\n",
    "                 ewc_coefficient: float = 1.0,\n",
    "                 ewc_p_norm: int = 2,\n",
    "                 ):\n",
    "        super().__init__(\n",
    "            observation_space,\n",
    "            action_space,\n",
    "            reward_space,\n",
    "        )\n",
    "        self.ewc_coefficient = ewc_coefficient\n",
    "        self.ewc_p_norm = ewc_p_norm\n",
    "\n",
    "        self.previous_model_weights: Dict[str, Tensor] = {}\n",
    "\n",
    "        self._previous_task: Optional[int] = None\n",
    "        self._n_switches: int = 0\n",
    "\n",
    "    def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs):\n",
    "        base_loss, metrics = super().shared_step(batch, *args, **kwargs)\n",
    "        ewc_loss = self.ewc_coefficient * self.ewc_loss()\n",
    "        metrics[\"ewc_loss\"] = ewc_loss\n",
    "        return base_loss + ewc_loss, metrics\n",
    "\n",
    "    def on_task_switch(self, task_id: Optional[int])-> None:\n",
    "        \"\"\" Executed when the task switches (to either a known or unknown task).\n",
    "        \"\"\"\n",
    "        if self._previous_task is None and self._n_switches == 0:\n",
    "            print(\"Starting the first task, no EWC update.\")\n",
    "        elif task_id is None or task_id != self._previous_task:\n",
    "            # NOTE: We also switch between unknown tasks.\n",
    "            print(f\"Switching tasks: {self._previous_task} -> {task_id}: \")\n",
    "            print(f\"Updating the EWC 'anchor' weights.\")\n",
    "            self._previous_task = task_id\n",
    "            self.previous_model_weights.clear()\n",
    "            self.previous_model_weights.update(deepcopy({\n",
    "                k: v.detach() for k, v in self.named_parameters()\n",
    "            }))\n",
    "        self._n_switches += 1\n",
    "\n",
    "    def ewc_loss(self) -> Tensor:\n",
    "        \"\"\"Gets an 'ewc-like' regularization loss.\n",
    "\n",
    "        NOTE: This is a simplified version of EWC where the loss is the P-norm\n",
    "        between the current weights and the weights as they were on the begining\n",
    "        of the task.\n",
    "        \"\"\"\n",
    "        if self._previous_task is None:\n",
    "            # We're in the first task: do nothing.\n",
    "            return 0.\n",
    "\n",
    "        old_weights: Dict[str, Tensor] = self.previous_model_weights\n",
    "        new_weights: Dict[str, Tensor] = dict(self.named_parameters())\n",
    "\n",
    "        loss = 0.\n",
    "        for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):\n",
    "            loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm)\n",
    "        return loss\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "class ImprovedDemoMethod(DemoMethod):\n",
    "    \"\"\" Improved version of the demo method, that adds an ewc-like regularizer.\n",
    "    \"\"\"\n",
    "    # Name of this method:    \n",
    "    @dataclass\n",
    "    class HParams(DemoMethod.HParams):\n",
    "        \"\"\" Hyperparameters of this new improved method. (Adds ewc params).\"\"\"\n",
    "        # Coefficient of the ewc-like loss.\n",
    "        ewc_coefficient: float = 1.0\n",
    "        # Distance norm used in the ewc loss.\n",
    "        ewc_p_norm: int = 2\n",
    "\n",
    "    def __init__(self, hparams: HParams):\n",
    "        super().__init__(hparams=hparams)\n",
    "    \n",
    "    def configure(self, setting: ClassIncrementalSetting):\n",
    "        # Use the improved model, with the added EWC-like term.\n",
    "        self.model = MyImprovedModel(\n",
    "            observation_space=setting.observation_space,\n",
    "            action_space=setting.action_space,\n",
    "            reward_space=setting.reward_space,\n",
    "            ewc_coefficient=self.hparams.ewc_coefficient,\n",
    "            ewc_p_norm = self.hparams.ewc_p_norm,\n",
    "        )\n",
    "        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.learning_rate)\n",
    "\n",
    "    def on_task_switch(self, task_id: Optional[int]):\n",
    "        self.model.on_task_switch(task_id)"
   ]
  },
  {
   "source": [
    "## Running the \"Improved\" method"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "2021-02-25:17:29:31,526 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 0.\n",
      "2021-02-25:17:29:31,580 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
      "2021-02-25:17:29:31,581 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
      "Training Epoch 0:   0%|          | 0/300 [00:00<?, ?it/s]Starting the first task, no EWC update.\n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 79.82it/s, accuracy=1, ewc_loss=0]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 147.76it/s, accuracy=1, ewc_loss=0, val_loss=tensor(3.3188)]\n",
      "2021-02-25:17:29:35,880 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 0.\n",
      "2021-02-25:17:29:35,921 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
      "2021-02-25:17:29:35,921 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
      "2021-02-25:17:29:35,950 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:  14%|█▍        | 43/312 [00:00<00:01, 211.59it/s]Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 239.22it/s]\n",
      "2021-02-25:17:29:37,352 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.690505\n",
      "2021-02-25:17:29:37,353 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 1.\n",
      "Training Epoch 0:   0%|          | 0/300 [00:00<?, ?it/s]Switching tasks: None -> 1: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 59.70it/s, accuracy=0.875, ewc_loss=tensor(0.2296, grad_fn=<MulBackward0>)]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 143.94it/s, accuracy=0.969, ewc_loss=tensor(0.2221), val_loss=tensor(33.0478)]\n",
      "2021-02-25:17:29:42,905 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 1.\n",
      "2021-02-25:17:29:42,909 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:  12%|█▎        | 39/312 [00:00<00:01, 190.68it/s]Switching tasks: 1 -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 218.28it/s]\n",
      "2021-02-25:17:29:44,441 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.745092\n",
      "2021-02-25:17:29:44,442 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 2.\n",
      "Training Epoch 0:   0%|          | 0/300 [00:00<?, ?it/s]Switching tasks: None -> 2: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 54.67it/s, accuracy=0.906, ewc_loss=tensor(0.3728, grad_fn=<MulBackward0>)]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 162.51it/s, accuracy=0.906, ewc_loss=tensor(0.3689), val_loss=tensor(43.5458)]\n",
      "2021-02-25:17:29:50,398 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 2.\n",
      "2021-02-25:17:29:50,402 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:  15%|█▍        | 46/312 [00:00<00:01, 231.12it/s]Switching tasks: 2 -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 239.81it/s]\n",
      "2021-02-25:17:29:51,801 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.915665\n",
      "2021-02-25:17:29:51,801 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 3.\n",
      "Training Epoch 0:   0%|          | 0/300 [00:00<?, ?it/s]Switching tasks: None -> 3: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 54.25it/s, accuracy=1, ewc_loss=tensor(0.0175, grad_fn=<MulBackward0>)]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 144.31it/s, accuracy=0.969, ewc_loss=tensor(0.0182), val_loss=tensor(8.4141)]\n",
      "2021-02-25:17:29:57,857 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 3.\n",
      "2021-02-25:17:29:57,861 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:  13%|█▎        | 42/312 [00:00<00:01, 211.24it/s]Switching tasks: 3 -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 231.53it/s]\n",
      "2021-02-25:17:29:59,316 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.917368\n",
      "2021-02-25:17:29:59,317 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 4.\n",
      "Training Epoch 0:   0%|          | 0/300 [00:00<?, ?it/s]Switching tasks: None -> 4: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 55.17it/s, accuracy=1, ewc_loss=tensor(0.0487, grad_fn=<MulBackward0>)]\n",
      "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 147.18it/s, accuracy=0.938, ewc_loss=tensor(0.0635), val_loss=tensor(14.3717)]\n",
      "2021-02-25:17:30:05,271 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 4.\n",
      "2021-02-25:17:30:05,276 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
      "Test:  14%|█▍        | 45/312 [00:00<00:01, 219.80it/s]Switching tasks: 4 -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Switching tasks: None -> None: \n",
      "Updating the EWC 'anchor' weights.\n",
      "Test: 100%|██████████| 312/312 [00:01<00:00, 219.23it/s]\n",
      "2021-02-25:17:30:06,803 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.90605\n",
      "2021-02-25:17:30:06,804 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:237] Finished main loop in 36.293361921000006 seconds.\n",
      "2021-02-25:17:30:06,894 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:257] {\n",
      "\t\"Task 0\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.981351\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.752976\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.53125\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.640377\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.546371\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 1\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.927419\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.896825\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.457157\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.700397\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.741935\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 2\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.970766\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.780258\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.94254\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.990079\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.895665\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 3\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.972278\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.770833\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.939516\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.990575\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.914819\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 4\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.970766\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.708333\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.88004\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.989583\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.983367\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Final/Average Online Performance\": 0,\n",
      "\t\"Final/Average Final Performance\": 0.90605,\n",
      "\t\"Final/Runtime (seconds)\": 36.293361921000006,\n",
      "\t\"Final/CL Score\": 0.74363\n",
      "}\n",
      "\n",
      "2021-02-25:17:30:06,997 INFO     [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:395] {\n",
      "\t\"Task 0\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.981351\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.752976\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.53125\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.640377\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.546371\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 1\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.927419\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.896825\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.457157\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.700397\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.741935\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 2\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.970766\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.780258\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.94254\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.990079\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.895665\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 3\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.972278\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.770833\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.939516\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.990575\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.914819\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Task 4\": {\n",
      "\t\t\"Task 0\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.970766\n",
      "\t\t},\n",
      "\t\t\"Task 1\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.708333\n",
      "\t\t},\n",
      "\t\t\"Task 2\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.88004\n",
      "\t\t},\n",
      "\t\t\"Task 3\": {\n",
      "\t\t\t\"n_samples\": 2016,\n",
      "\t\t\t\"accuracy\": 0.989583\n",
      "\t\t},\n",
      "\t\t\"Task 4\": {\n",
      "\t\t\t\"n_samples\": 1984,\n",
      "\t\t\t\"accuracy\": 0.983367\n",
      "\t\t}\n",
      "\t},\n",
      "\t\"Final/Average Online Performance\": 0,\n",
      "\t\"Final/Average Final Performance\": 0.90605,\n",
      "\t\"Final/Runtime (seconds)\": 36.293361921000006,\n",
      "\t\"Final/CL Score\": 0.74363\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "improved_method = ImprovedDemoMethod(hparams=ImprovedDemoMethod.HParams())\n",
    "setting = DomainIncrementalSetting(dataset=\"fashionmnist\")\n",
    "improved_results = setting.apply(improved_method)"
   ]
  },
  {
   "source": [
    "## Improved Results"
   ],
   "cell_type": "code",
   "metadata": {},
   "execution_count": 10,
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "{\n\t\"Task 0\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.981351\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.752976\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.53125\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.640377\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.546371\n\t\t}\n\t},\n\t\"Task 1\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.927419\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.896825\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.457157\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.700397\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.741935\n\t\t}\n\t},\n\t\"Task 2\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.970766\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.780258\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.94254\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.990079\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.895665\n\t\t}\n\t},\n\t\"Task 3\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.972278\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.770833\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.939516\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.990575\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.914819\n\t\t}\n\t},\n\t\"Task 4\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.970766\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.708333\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.88004\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.989583\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.983367\n\t\t}\n\t},\n\t\"Final/Average Online Performance\": 0,\n\t\"Final/Average Final Performance\": 0.90605,\n\t\"Final/Runtime (seconds)\": 36.293361921000006,\n\t\"Final/CL Score\": 0.74363\n}\n\n"
     ]
    }
   ],
   "source": [
    "print(improved_results.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "{'task_metrics': <Figure size 432x288 with 1 Axes>}"
      ]
     },
     "metadata": {},
     "execution_count": 12
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"277.314375pt\" version=\"1.1\" viewBox=\"0 0 385.78125 277.314375\" width=\"385.78125pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <metadata>\n  <rdf:RDF xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n   <cc:Work>\n    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n    <dc:date>2021-02-25T17:30:07.306773</dc:date>\n    <dc:format>image/svg+xml</dc:format>\n    <dc:creator>\n     <cc:Agent>\n      <dc:title>Matplotlib v3.3.4, https://matplotlib.org/</dc:title>\n     </cc:Agent>\n    </dc:creator>\n   </cc:Work>\n  </rdf:RDF>\n </metadata>\n <defs>\n  <style type=\"text/css\">*{stroke-linecap:butt;stroke-linejoin:round;}</style>\n </defs>\n <g id=\"figure_1\">\n  <g id=\"patch_1\">\n   <path d=\"M 0 277.314375 \nL 385.78125 277.314375 \nL 385.78125 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n  </g>\n  <g id=\"axes_1\">\n   <g id=\"patch_2\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \nL 378.58125 22.318125 \nL 43.78125 22.318125 \nz\n\" style=\"fill:#ffffff;\"/>\n   </g>\n   <g id=\"patch_3\">\n    <path clip-path=\"url(#p41c9b441b6)\" d=\"M 58.999432 239.758125 \nL 109.726705 239.758125 \nL 109.726705 28.674766 \nL 58.999432 28.674766 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_4\">\n    <path clip-path=\"url(#p41c9b441b6)\" d=\"M 122.408523 239.758125 \nL 173.135795 239.758125 \nL 173.135795 85.738197 \nL 122.408523 85.738197 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_5\">\n    <path clip-path=\"url(#p41c9b441b6)\" d=\"M 185.817614 239.758125 \nL 236.544886 239.758125 \nL 236.544886 48.402227 \nL 185.817614 48.402227 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_6\">\n    <path clip-path=\"url(#p41c9b441b6)\" d=\"M 249.226705 239.758125 \nL 299.953977 239.758125 \nL 299.953977 24.583197 \nL 249.226705 24.583197 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_7\">\n    <path clip-path=\"url(#p41c9b441b6)\" d=\"M 312.635795 239.758125 \nL 363.363068 239.758125 \nL 363.363068 25.934805 \nL 312.635795 25.934805 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"matplotlib.axis_1\">\n    <g id=\"xtick_1\">\n     <g id=\"line2d_1\">\n      <defs>\n       <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"me6157de1af\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"84.363068\" xlink:href=\"#me6157de1af\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_1\">\n      <!-- 0 -->\n      <g transform=\"translate(81.181818 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_2\">\n     <g id=\"line2d_2\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"147.772159\" xlink:href=\"#me6157de1af\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_2\">\n      <!-- 1 -->\n      <g transform=\"translate(144.590909 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 12.40625 8.296875 \nL 28.515625 8.296875 \nL 28.515625 63.921875 \nL 10.984375 60.40625 \nL 10.984375 69.390625 \nL 28.421875 72.90625 \nL 38.28125 72.90625 \nL 38.28125 8.296875 \nL 54.390625 8.296875 \nL 54.390625 0 \nL 12.40625 0 \nz\n\" id=\"DejaVuSans-49\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-49\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_3\">\n     <g id=\"line2d_3\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"211.18125\" xlink:href=\"#me6157de1af\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_3\">\n      <!-- 2 -->\n      <g transform=\"translate(208 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 19.1875 8.296875 \nL 53.609375 8.296875 \nL 53.609375 0 \nL 7.328125 0 \nL 7.328125 8.296875 \nQ 12.9375 14.109375 22.625 23.890625 \nQ 32.328125 33.6875 34.8125 36.53125 \nQ 39.546875 41.84375 41.421875 45.53125 \nQ 43.3125 49.21875 43.3125 52.78125 \nQ 43.3125 58.59375 39.234375 62.25 \nQ 35.15625 65.921875 28.609375 65.921875 \nQ 23.96875 65.921875 18.8125 64.3125 \nQ 13.671875 62.703125 7.8125 59.421875 \nL 7.8125 69.390625 \nQ 13.765625 71.78125 18.9375 73 \nQ 24.125 74.21875 28.421875 74.21875 \nQ 39.75 74.21875 46.484375 68.546875 \nQ 53.21875 62.890625 53.21875 53.421875 \nQ 53.21875 48.921875 51.53125 44.890625 \nQ 49.859375 40.875 45.40625 35.40625 \nQ 44.1875 33.984375 37.640625 27.21875 \nQ 31.109375 20.453125 19.1875 8.296875 \nz\n\" id=\"DejaVuSans-50\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_4\">\n     <g id=\"line2d_4\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"274.590341\" xlink:href=\"#me6157de1af\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_4\">\n      <!-- 3 -->\n      <g transform=\"translate(271.409091 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 40.578125 39.3125 \nQ 47.65625 37.796875 51.625 33 \nQ 55.609375 28.21875 55.609375 21.1875 \nQ 55.609375 10.40625 48.1875 4.484375 \nQ 40.765625 -1.421875 27.09375 -1.421875 \nQ 22.515625 -1.421875 17.65625 -0.515625 \nQ 12.796875 0.390625 7.625 2.203125 \nL 7.625 11.71875 \nQ 11.71875 9.328125 16.59375 8.109375 \nQ 21.484375 6.890625 26.8125 6.890625 \nQ 36.078125 6.890625 40.9375 10.546875 \nQ 45.796875 14.203125 45.796875 21.1875 \nQ 45.796875 27.640625 41.28125 31.265625 \nQ 36.765625 34.90625 28.71875 34.90625 \nL 20.21875 34.90625 \nL 20.21875 43.015625 \nL 29.109375 43.015625 \nQ 36.375 43.015625 40.234375 45.921875 \nQ 44.09375 48.828125 44.09375 54.296875 \nQ 44.09375 59.90625 40.109375 62.90625 \nQ 36.140625 65.921875 28.71875 65.921875 \nQ 24.65625 65.921875 20.015625 65.03125 \nQ 15.375 64.15625 9.8125 62.3125 \nL 9.8125 71.09375 \nQ 15.4375 72.65625 20.34375 73.4375 \nQ 25.25 74.21875 29.59375 74.21875 \nQ 40.828125 74.21875 47.359375 69.109375 \nQ 53.90625 64.015625 53.90625 55.328125 \nQ 53.90625 49.265625 50.4375 45.09375 \nQ 46.96875 40.921875 40.578125 39.3125 \nz\n\" id=\"DejaVuSans-51\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-51\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_5\">\n     <g id=\"line2d_5\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"337.999432\" xlink:href=\"#me6157de1af\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_5\">\n      <!-- 4 -->\n      <g transform=\"translate(334.818182 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 37.796875 64.3125 \nL 12.890625 25.390625 \nL 37.796875 25.390625 \nz\nM 35.203125 72.90625 \nL 47.609375 72.90625 \nL 47.609375 25.390625 \nL 58.015625 25.390625 \nL 58.015625 17.1875 \nL 47.609375 17.1875 \nL 47.609375 0 \nL 37.796875 0 \nL 37.796875 17.1875 \nL 4.890625 17.1875 \nL 4.890625 26.703125 \nz\n\" id=\"DejaVuSans-52\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_6\">\n     <!-- Task -->\n     <g transform=\"translate(200.388281 268.034687)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M -0.296875 72.90625 \nL 61.375 72.90625 \nL 61.375 64.59375 \nL 35.5 64.59375 \nL 35.5 0 \nL 25.59375 0 \nL 25.59375 64.59375 \nL -0.296875 64.59375 \nz\n\" id=\"DejaVuSans-84\"/>\n       <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n       <path d=\"M 44.28125 53.078125 \nL 44.28125 44.578125 \nQ 40.484375 46.53125 36.375 47.5 \nQ 32.28125 48.484375 27.875 48.484375 \nQ 21.1875 48.484375 17.84375 46.4375 \nQ 14.5 44.390625 14.5 40.28125 \nQ 14.5 37.15625 16.890625 35.375 \nQ 19.28125 33.59375 26.515625 31.984375 \nL 29.59375 31.296875 \nQ 39.15625 29.25 43.1875 25.515625 \nQ 47.21875 21.78125 47.21875 15.09375 \nQ 47.21875 7.46875 41.1875 3.015625 \nQ 35.15625 -1.421875 24.609375 -1.421875 \nQ 20.21875 -1.421875 15.453125 -0.5625 \nQ 10.6875 0.296875 5.421875 2 \nL 5.421875 11.28125 \nQ 10.40625 8.6875 15.234375 7.390625 \nQ 20.0625 6.109375 24.8125 6.109375 \nQ 31.15625 6.109375 34.5625 8.28125 \nQ 37.984375 10.453125 37.984375 14.40625 \nQ 37.984375 18.0625 35.515625 20.015625 \nQ 33.0625 21.96875 24.703125 23.78125 \nL 21.578125 24.515625 \nQ 13.234375 26.265625 9.515625 29.90625 \nQ 5.8125 33.546875 5.8125 39.890625 \nQ 5.8125 47.609375 11.28125 51.796875 \nQ 16.75 56 26.8125 56 \nQ 31.78125 56 36.171875 55.265625 \nQ 40.578125 54.546875 44.28125 53.078125 \nz\n\" id=\"DejaVuSans-115\"/>\n       <path d=\"M 9.078125 75.984375 \nL 18.109375 75.984375 \nL 18.109375 31.109375 \nL 44.921875 54.6875 \nL 56.390625 54.6875 \nL 27.390625 29.109375 \nL 57.625 0 \nL 45.90625 0 \nL 18.109375 26.703125 \nL 18.109375 0 \nL 9.078125 0 \nz\n\" id=\"DejaVuSans-107\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-84\"/>\n      <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"matplotlib.axis_2\">\n    <g id=\"ytick_1\">\n     <g id=\"line2d_6\">\n      <defs>\n       <path d=\"M 0 0 \nL -3.5 0 \n\" id=\"m0e5382894a\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m0e5382894a\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_7\">\n      <!-- 0.0 -->\n      <g transform=\"translate(20.878125 243.557344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 10.6875 12.40625 \nL 21 12.40625 \nL 21 0 \nL 10.6875 0 \nz\n\" id=\"DejaVuSans-46\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_2\">\n     <g id=\"line2d_7\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m0e5382894a\" y=\"196.270125\"/>\n      </g>\n     </g>\n     <g id=\"text_8\">\n      <!-- 0.2 -->\n      <g transform=\"translate(20.878125 200.069344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_3\">\n     <g id=\"line2d_8\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m0e5382894a\" y=\"152.782125\"/>\n      </g>\n     </g>\n     <g id=\"text_9\">\n      <!-- 0.4 -->\n      <g transform=\"translate(20.878125 156.581344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_4\">\n     <g id=\"line2d_9\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m0e5382894a\" y=\"109.294125\"/>\n      </g>\n     </g>\n     <g id=\"text_10\">\n      <!-- 0.6 -->\n      <g transform=\"translate(20.878125 113.093344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 33.015625 40.375 \nQ 26.375 40.375 22.484375 35.828125 \nQ 18.609375 31.296875 18.609375 23.390625 \nQ 18.609375 15.53125 22.484375 10.953125 \nQ 26.375 6.390625 33.015625 6.390625 \nQ 39.65625 6.390625 43.53125 10.953125 \nQ 47.40625 15.53125 47.40625 23.390625 \nQ 47.40625 31.296875 43.53125 35.828125 \nQ 39.65625 40.375 33.015625 40.375 \nz\nM 52.59375 71.296875 \nL 52.59375 62.3125 \nQ 48.875 64.0625 45.09375 64.984375 \nQ 41.3125 65.921875 37.59375 65.921875 \nQ 27.828125 65.921875 22.671875 59.328125 \nQ 17.53125 52.734375 16.796875 39.40625 \nQ 19.671875 43.65625 24.015625 45.921875 \nQ 28.375 48.1875 33.59375 48.1875 \nQ 44.578125 48.1875 50.953125 41.515625 \nQ 57.328125 34.859375 57.328125 23.390625 \nQ 57.328125 12.15625 50.6875 5.359375 \nQ 44.046875 -1.421875 33.015625 -1.421875 \nQ 20.359375 -1.421875 13.671875 8.265625 \nQ 6.984375 17.96875 6.984375 36.375 \nQ 6.984375 53.65625 15.1875 63.9375 \nQ 23.390625 74.21875 37.203125 74.21875 \nQ 40.921875 74.21875 44.703125 73.484375 \nQ 48.484375 72.75 52.59375 71.296875 \nz\n\" id=\"DejaVuSans-54\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-54\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_5\">\n     <g id=\"line2d_10\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m0e5382894a\" y=\"65.806125\"/>\n      </g>\n     </g>\n     <g id=\"text_11\">\n      <!-- 0.8 -->\n      <g transform=\"translate(20.878125 69.605344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 34.625 \nQ 24.75 34.625 20.71875 30.859375 \nQ 16.703125 27.09375 16.703125 20.515625 \nQ 16.703125 13.921875 20.71875 10.15625 \nQ 24.75 6.390625 31.78125 6.390625 \nQ 38.8125 6.390625 42.859375 10.171875 \nQ 46.921875 13.96875 46.921875 20.515625 \nQ 46.921875 27.09375 42.890625 30.859375 \nQ 38.875 34.625 31.78125 34.625 \nz\nM 21.921875 38.8125 \nQ 15.578125 40.375 12.03125 44.71875 \nQ 8.5 49.078125 8.5 55.328125 \nQ 8.5 64.0625 14.71875 69.140625 \nQ 20.953125 74.21875 31.78125 74.21875 \nQ 42.671875 74.21875 48.875 69.140625 \nQ 55.078125 64.0625 55.078125 55.328125 \nQ 55.078125 49.078125 51.53125 44.71875 \nQ 48 40.375 41.703125 38.8125 \nQ 48.828125 37.15625 52.796875 32.3125 \nQ 56.78125 27.484375 56.78125 20.515625 \nQ 56.78125 9.90625 50.3125 4.234375 \nQ 43.84375 -1.421875 31.78125 -1.421875 \nQ 19.734375 -1.421875 13.25 4.234375 \nQ 6.78125 9.90625 6.78125 20.515625 \nQ 6.78125 27.484375 10.78125 32.3125 \nQ 14.796875 37.15625 21.921875 38.8125 \nz\nM 18.3125 54.390625 \nQ 18.3125 48.734375 21.84375 45.5625 \nQ 25.390625 42.390625 31.78125 42.390625 \nQ 38.140625 42.390625 41.71875 45.5625 \nQ 45.3125 48.734375 45.3125 54.390625 \nQ 45.3125 60.0625 41.71875 63.234375 \nQ 38.140625 66.40625 31.78125 66.40625 \nQ 25.390625 66.40625 21.84375 63.234375 \nQ 18.3125 60.0625 18.3125 54.390625 \nz\n\" id=\"DejaVuSans-56\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-56\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_6\">\n     <g id=\"line2d_11\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m0e5382894a\" y=\"22.318125\"/>\n      </g>\n     </g>\n     <g id=\"text_12\">\n      <!-- 1.0 -->\n      <g transform=\"translate(20.878125 26.117344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-49\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_13\">\n     <!-- Accuracy -->\n     <g transform=\"translate(14.798438 153.86625)rotate(-90)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M 34.1875 63.1875 \nL 20.796875 26.90625 \nL 47.609375 26.90625 \nz\nM 28.609375 72.90625 \nL 39.796875 72.90625 \nL 67.578125 0 \nL 57.328125 0 \nL 50.6875 18.703125 \nL 17.828125 18.703125 \nL 11.1875 0 \nL 0.78125 0 \nz\n\" id=\"DejaVuSans-65\"/>\n       <path d=\"M 48.78125 52.59375 \nL 48.78125 44.1875 \nQ 44.96875 46.296875 41.140625 47.34375 \nQ 37.3125 48.390625 33.40625 48.390625 \nQ 24.65625 48.390625 19.8125 42.84375 \nQ 14.984375 37.3125 14.984375 27.296875 \nQ 14.984375 17.28125 19.8125 11.734375 \nQ 24.65625 6.203125 33.40625 6.203125 \nQ 37.3125 6.203125 41.140625 7.25 \nQ 44.96875 8.296875 48.78125 10.40625 \nL 48.78125 2.09375 \nQ 45.015625 0.34375 40.984375 -0.53125 \nQ 36.96875 -1.421875 32.421875 -1.421875 \nQ 20.0625 -1.421875 12.78125 6.34375 \nQ 5.515625 14.109375 5.515625 27.296875 \nQ 5.515625 40.671875 12.859375 48.328125 \nQ 20.21875 56 33.015625 56 \nQ 37.15625 56 41.109375 55.140625 \nQ 45.0625 54.296875 48.78125 52.59375 \nz\n\" id=\"DejaVuSans-99\"/>\n       <path d=\"M 8.5 21.578125 \nL 8.5 54.6875 \nL 17.484375 54.6875 \nL 17.484375 21.921875 \nQ 17.484375 14.15625 20.5 10.265625 \nQ 23.53125 6.390625 29.59375 6.390625 \nQ 36.859375 6.390625 41.078125 11.03125 \nQ 45.3125 15.671875 45.3125 23.6875 \nL 45.3125 54.6875 \nL 54.296875 54.6875 \nL 54.296875 0 \nL 45.3125 0 \nL 45.3125 8.40625 \nQ 42.046875 3.421875 37.71875 1 \nQ 33.40625 -1.421875 27.6875 -1.421875 \nQ 18.265625 -1.421875 13.375 4.4375 \nQ 8.5 10.296875 8.5 21.578125 \nz\nM 31.109375 56 \nz\n\" id=\"DejaVuSans-117\"/>\n       <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n       <path d=\"M 32.171875 -5.078125 \nQ 28.375 -14.84375 24.75 -17.8125 \nQ 21.140625 -20.796875 15.09375 -20.796875 \nL 7.90625 -20.796875 \nL 7.90625 -13.28125 \nL 13.1875 -13.28125 \nQ 16.890625 -13.28125 18.9375 -11.515625 \nQ 21 -9.765625 23.484375 -3.21875 \nL 25.09375 0.875 \nL 2.984375 54.6875 \nL 12.5 54.6875 \nL 29.59375 11.921875 \nL 46.6875 54.6875 \nL 56.203125 54.6875 \nz\n\" id=\"DejaVuSans-121\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-65\"/>\n      <use x=\"66.658203\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"121.638672\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"176.619141\" xlink:href=\"#DejaVuSans-117\"/>\n      <use x=\"239.998047\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"281.111328\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"342.390625\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"397.371094\" xlink:href=\"#DejaVuSans-121\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"patch_8\">\n    <path d=\"M 43.78125 239.758125 \nL 43.78125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_9\">\n    <path d=\"M 378.58125 239.758125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_10\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_11\">\n    <path d=\"M 43.78125 22.318125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"text_14\">\n    <!-- 97% -->\n    <g transform=\"translate(73.249787 23.595078)scale(0.1 -0.1)\">\n     <defs>\n      <path d=\"M 10.984375 1.515625 \nL 10.984375 10.5 \nQ 14.703125 8.734375 18.5 7.8125 \nQ 22.3125 6.890625 25.984375 6.890625 \nQ 35.75 6.890625 40.890625 13.453125 \nQ 46.046875 20.015625 46.78125 33.40625 \nQ 43.953125 29.203125 39.59375 26.953125 \nQ 35.25 24.703125 29.984375 24.703125 \nQ 19.046875 24.703125 12.671875 31.3125 \nQ 6.296875 37.9375 6.296875 49.421875 \nQ 6.296875 60.640625 12.9375 67.421875 \nQ 19.578125 74.21875 30.609375 74.21875 \nQ 43.265625 74.21875 49.921875 64.515625 \nQ 56.59375 54.828125 56.59375 36.375 \nQ 56.59375 19.140625 48.40625 8.859375 \nQ 40.234375 -1.421875 26.421875 -1.421875 \nQ 22.703125 -1.421875 18.890625 -0.6875 \nQ 15.09375 0.046875 10.984375 1.515625 \nz\nM 30.609375 32.421875 \nQ 37.25 32.421875 41.125 36.953125 \nQ 45.015625 41.5 45.015625 49.421875 \nQ 45.015625 57.28125 41.125 61.84375 \nQ 37.25 66.40625 30.609375 66.40625 \nQ 23.96875 66.40625 20.09375 61.84375 \nQ 16.21875 57.28125 16.21875 49.421875 \nQ 16.21875 41.5 20.09375 36.953125 \nQ 23.96875 32.421875 30.609375 32.421875 \nz\n\" id=\"DejaVuSans-57\"/>\n      <path d=\"M 8.203125 72.90625 \nL 55.078125 72.90625 \nL 55.078125 68.703125 \nL 28.609375 0 \nL 18.3125 0 \nL 43.21875 64.59375 \nL 8.203125 64.59375 \nz\n\" id=\"DejaVuSans-55\"/>\n      <path d=\"M 72.703125 32.078125 \nQ 68.453125 32.078125 66.03125 28.46875 \nQ 63.625 24.859375 63.625 18.40625 \nQ 63.625 12.0625 66.03125 8.421875 \nQ 68.453125 4.78125 72.703125 4.78125 \nQ 76.859375 4.78125 79.265625 8.421875 \nQ 81.6875 12.0625 81.6875 18.40625 \nQ 81.6875 24.8125 79.265625 28.4375 \nQ 76.859375 32.078125 72.703125 32.078125 \nz\nM 72.703125 38.28125 \nQ 80.421875 38.28125 84.953125 32.90625 \nQ 89.5 27.546875 89.5 18.40625 \nQ 89.5 9.28125 84.9375 3.921875 \nQ 80.375 -1.421875 72.703125 -1.421875 \nQ 64.890625 -1.421875 60.34375 3.921875 \nQ 55.8125 9.28125 55.8125 18.40625 \nQ 55.8125 27.59375 60.375 32.9375 \nQ 64.9375 38.28125 72.703125 38.28125 \nz\nM 22.3125 68.015625 \nQ 18.109375 68.015625 15.6875 64.375 \nQ 13.28125 60.75 13.28125 54.390625 \nQ 13.28125 47.953125 15.671875 44.328125 \nQ 18.0625 40.71875 22.3125 40.71875 \nQ 26.5625 40.71875 28.96875 44.328125 \nQ 31.390625 47.953125 31.390625 54.390625 \nQ 31.390625 60.6875 28.953125 64.34375 \nQ 26.515625 68.015625 22.3125 68.015625 \nz\nM 66.40625 74.21875 \nL 74.21875 74.21875 \nL 28.609375 -1.421875 \nL 20.796875 -1.421875 \nz\nM 22.3125 74.21875 \nQ 30.03125 74.21875 34.609375 68.875 \nQ 39.203125 63.53125 39.203125 54.390625 \nQ 39.203125 45.171875 34.640625 39.84375 \nQ 30.078125 34.515625 22.3125 34.515625 \nQ 14.546875 34.515625 10.03125 39.859375 \nQ 5.515625 45.21875 5.515625 54.390625 \nQ 5.515625 63.484375 10.046875 68.84375 \nQ 14.59375 74.21875 22.3125 74.21875 \nz\n\" id=\"DejaVuSans-37\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-55\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_15\">\n    <!-- 71% -->\n    <g transform=\"translate(136.658878 80.65851)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-55\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-49\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_16\">\n    <!-- 88% -->\n    <g transform=\"translate(200.067969 43.32254)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-56\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-56\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_17\">\n    <!-- 99% -->\n    <g transform=\"translate(263.47706 19.50351)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_18\">\n    <!-- 98% -->\n    <g transform=\"translate(326.886151 20.855117)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-56\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_19\">\n    <!-- Task Accuracy -->\n    <g transform=\"translate(168.929063 16.318125)scale(0.12 -0.12)\">\n     <defs>\n      <path id=\"DejaVuSans-32\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-84\"/>\n     <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n     <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     <use x=\"215.873047\" xlink:href=\"#DejaVuSans-32\"/>\n     <use x=\"247.660156\" xlink:href=\"#DejaVuSans-65\"/>\n     <use x=\"314.318359\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"369.298828\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"424.279297\" xlink:href=\"#DejaVuSans-117\"/>\n     <use x=\"487.658203\" xlink:href=\"#DejaVuSans-114\"/>\n     <use x=\"528.771484\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"590.050781\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"645.03125\" xlink:href=\"#DejaVuSans-121\"/>\n    </g>\n   </g>\n  </g>\n </g>\n <defs>\n  <clipPath id=\"p41c9b441b6\">\n   <rect height=\"217.44\" width=\"334.8\" x=\"43.78125\" y=\"22.318125\"/>\n  </clipPath>\n </defs>\n</svg>\n",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcv0lEQVR4nO3de7xUdb3/8dd76ybBS0RCyUUxDyoXE3GHpNmxLNJtiYimmFodf2IXTEXzaL/0qGEXO4QHo6NmHryDphUZikSURxJ1k4ggoWgkFwskhGRUbp/zx1rosNmX2ciaYe/1fj4e83DWmu+s9VkI857v97vWGkUEZmaWX1WVLsDMzCrLQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnILDckTRB0uhK12G2s3AQ2E5P0utFj82S3iha/kKZapggaaOkfcqxP7NychDYTi8i9tjyAF4GPle07q6s9y9pd2AYsAY4M+v91dv3ruXcn+WTg8BaLUkDJT0u6TVJr0j6saR26WuSNFbSCklrJT0rqV8D29hT0gxJ4ySpkV0NA14DrgG+WO/9nST9j6TlklZL+mXRa0MkzUn3/6Kk49L1iyV9qqjdVZLuTJ/3lBSSzpH0MvC7dP19kv4maY2kRyX1LXp/e0ljJP01ff2xdN1vJJ1fr965koa24I/ZcsBBYK3ZJuAiYG/go8CxwNfS1wYDHwcOBN4LfB5YVfxmSe8HpgMzI+Ib0fj9Vr4I3ANMBA6WdHjRa3cAHYC+QBdgbLrtgcDtwDeBjmkti1twbP8K9AY+ky4/BPRK9/EnoLgn9J/A4cCRQCfgUmAzcBtFPRhJhwLdgN+0oA7LAQeBtVoRMTsiZkXExohYDNxE8gEKsAHYEzgYUEQsiIhXit7eFfgDcF9EfLuxfUjaF/gEcHdE/J0kOM5OX9sHOB74SkSsjogNEfGH9K3nALdGxLSI2BwRyyLizy04vKsiYl1EvJEe660R8c+IeAu4CjhU0nslVQH/BlyQ7mNTRPwxbTcZOFBSr3SbZwGTImJ9C+pA0gWS5kmaL+nCdN2haW/sWUm/lrRXuv6otNdRt2W/kjpKeiSt1XZC/h9jrZakAyU9mA6ZrAW+S9I7ICJ+B/wYGA+skHTzlg+r1AlAe+DGZnZzFrAgIuaky3cBZ0iqBnoA/4iI1Q28rwfw4nYeGsCSLU8k7SLp++nw0lre6VnsnT52a2hfEfEmMAk4M/0QHk7SgylZOpx2LjAQOBT4rKR/AW4BLouIQ4BfkPR8AC4GaoELga+k674NfDciNrdk31Y+DgJrzf4b+DPQKyL2Ar4FvD3OHxHjIuJwoA/JENE3i977U+BhYEo6GdyYs4EPpWHzN+BHJB++tSQf1p0kdWzgfUuAAxrZ5jqS4aQtPthAm+JhqjOAIcCnSIa5eqbrBbwKvNnEvm4DvkAybFaIiMcbadeY3sATEVGIiI0kvaiTSf48H03bTCOZR4GkJ9YhfWyQdADQIyJ+38L97jQa6RH1lzQrnQOqS4cCkTQsbfe/6dAjkg6QNKmCh9AsB4G1ZnsCa4HXJR0MfHXLC5I+IumI9Jv7OpIPy/rfSEcCC4FfS2pff+OSPkryATsQ6J8++gF3A2enQ00PAT+R9D5J1ZI+nr79Z8CXJR0rqUpSt7RGgDnA6Wn7GuCUEo7zLZI5jg4kPR8A0m/ZtwI/ktQ17T18VNJ70tcfT497DC3sDaTmAUdLer+kDiQB2AOYTxJOAKem6wC+RzI3cjlJj+xakh5Bq9REj+g64OqI6A9cmS4DnA98hGSY8ox03Wh28j8DB4G1ZpeQ/GP7J8k3/OJvXXul61YDfyX5EP1h8ZvTyeERwFLgV5J2q7f9LwK/iohnI+JvWx7Af5F8IHQiGTraQNIzWUEyJEJEPAl8mWTyeA3JN+n90u1eQRIwq4GrSYKlKbenx7AMeA6Y1cCfw7PAU8A/gB+w9b/t24FDgDub2c82ImJBur1HSHpQc0gm6f8N+Jqk2SRBtT5tPyciBkXEJ4APAa+QnMQ1SdKdkj7Q0hoqrLEeUZD8HYOkl7Y8fb4ZeA/v9IiOBv4WES+Ut+wWigg/SnwAF5B8Q5oPXJium0Tyj2MOydjtnHT9UcBcoI5k6AKSs0ceAaoqfSx+5OdBMrz12A7a1neBr9VbdyDwZL11Sv+udyKZV9mPZCL/2kr/ebTweHsDzwPvJ/lwfxy4IV3/MskQ4DJgv7T9p4HZwK9JAuIRoFOlj6O5hy9WKVG9LuJ64GFJD0bEaUVtxpB8+4N3Js16kkyaXYwnzazM0uGcrwE/eRfb6BIRK9IzqE4GBhWtqyL5e11/0v1sYEpE/COtYXP66EArEhELJG3pEa3jnR7RV4GLIuJ+SZ8nGQr8VERMI5kzQdLZwBSSM7cuIekBXhARhfIfSdMyGxqSdKuSi3nmNfK6lFzEsyg93WxAVrXsII11EYHkeEjOVb8nXdXmJs2sdZH0GWAl8HeaH35qyv2SniP5lvv1iHgNGC7peZIhseXA/xTttwPwJZIztiCZYJ8CXE/zZ2ntdCLiZxFxeER8nOTD/HmSYcMH0ib3kXxBfFu9P4Or0/aPkUzc73wy7FJ9HBgAzGvk9VqSiTYBg0g+ZCveRWppF7He8dYVLfcnGcudAXQnuRipV6WPww8//GjZA+iS/ndfkuDrCCwAjknXHwvMrvee/wBOSp8/mn5mnEXSI6j4MdV/ZDY0FBGPSurZRJMhwO2R/EnNSi862Se2vuhnpxGNdxG3GM47vQEiOe98EEB6Jsnbk2YkvYWLI7lAycx2bvenp4JuIO0RSToX+C8l94J6k+SkAwAkdQUGRsTV6aobSCbyXwNOKmfhpVKaWNlsPAmCByOioXu8PAh8PyIeS5enA/8eEXUNtB1B+ge9++67H37wwQfXb1J2y5Yto7q6mi5duhARzJ07l969e9OuXbut2kUEL7zwAh/60IdYsmQJXbt2Zf369axdu5Zu3bpVqHozy5vZs2e/GhGdG3qtVUwWR8TNwM0ANTU1UVe3TVaUxYoVK+jSpQsvv/wygwcPZtasWXTs2JGHH36Y733ve/zhD3/Y5j233XYbq1ev5sILL2To0KGMGzeOxYsX88ADDzB27NgKHIWZ5ZGkvzb2WiWDYBnvXIQCyTj6sgrVUpJhw4axatUqqqurGT9+PB07dgRg4sSJDB8+fJv2hUKBCRMm8MgjjwAwatQoamtradeuHXff/W7m7szMdpxKDg2dQHJlZy1wBDAuIgbWb1dfJXsEZq1Vz8vaxg1HF3//hEqX0GpJmh0RNQ29llmPQNI9wDHA3pKWksyiVwNExI0kp5PVAouAAslVmGZmO1RbCUHILgizPGto27GSrV8P4OtZ7d/MzErjew2ZmeVcqzhraEdxF9HMbFvuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYFaCsWPH0rdvX/r168fw4cN58803mT59OgMGDKB///587GMfY9GiRQDccMMN9OvXj9raWtavXw/AY489xkUXXVTJQzBrlIPArBnLli1j3Lhx1NXVMW/ePDZt2sTEiRP56le/yl133cWcOXM444wzGD16NAB33XUXc+fO5cgjj2Tq1KlEBN/5zne44oorKnwkZg1zEJiVYOPGjbzxxhts3LiRQqFA165dkcTatWsBWLNmDV27dgWS36DYsGEDhUKB6upq7rzzTo4//ng6depUyUMwa1Suriw22x7dunXjkksuYd9996V9+/YMHjyYwYMHc8stt1BbW0v79u3Za6+9mDVrFgAjR45k0KBB9O3bl6OOOoohQ4YwderUCh+FWePcIzBrxurVq/nVr37FX/7yF5YvX866deu48847GTt2LFOmTGHp0qV8+ctfZtSoUQCcddZZPP3002+3+cY3vsFDDz3EKaecwkUXXcTmzZsrfERmW3MQmDXjt7/9Lfvvvz+dO3emurqak08+mZkzZ/LMM89wxBFHAHDaaafxxz/+cav3LV++nCeffJKTTjqJMWPGMGnSJDp27Mj06dMrcRhmjXIQmDVj3333ZdasWRQKBSKC6dOn06dPH9asWcPzzz8PwLRp0+jdu/dW77viiiu45pprAHjjjTeQRFVVFYVCoezHYNYUzxGYNeOII47glFNOYcCAAey6664cdthhjBgxgu7duzNs2DCqqqp43/vex6233vr2e55++mkABgwYAMAZZ5zBIYccQo8ePbj00ksrchxmjcn0pyqz8G5+qtK3oba8ait/97fn731bOXZ4d//um/qpSg8NmZnlnIPAzCznHARmZjnnyWLLBY8TmzXOPQIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOZRoEko6TtFDSIkmXNfD6vpJmSHpa0lxJtVnWY2Zm28osCCTtAowHjgf6AMMl9anX7NvAvRFxGHA68JOs6jEzs4Zl2SMYCCyKiJciYj0wERhSr00Ae6XP3wssz7AeMzNrQJZB0A1YUrS8NF1X7CrgTElLgSnA+Q1tSNIISXWS6lauXJlFrWZmuVXpyeLhwISI6A7UAndI2qamiLg5ImoioqZz585lL9LMrC3LMgiWAT2Klrun64qdA9wLEBGPA7sBe2dYk5mZ1ZNlEDwF9JK0v6R2JJPBk+u1eRk4FkBSb5Ig8NiPmVkZZRYEEbERGAlMBRaQnB00X9I1kk5Mm10MnCvpGeAe4EsREVnVZGZm29o1y41HxBSSSeDidVcWPX8OOCrLGszMrGmVniw2M7MKcxCYmeWcg8BKsnDhQvr37//2Y6+99uL666/nvvvuo2/fvlRVVVFXV/d2+5kzZ/LhD3+YmpoaXnjhBQBee+01Bg8ezObNmyt1GGbWgEznCKztOOigg5gzZw4AmzZtolu3bgwdOpRCocADDzzAeeedt1X7MWPGMGXKFBYvXsyNN97ImDFjGD16NN/61reoqvL3D7OdiYPAWmz69OkccMAB7Lfffo22qa6uplAoUCgUqK6u5sUXX2TJkiUcc8wx5SvUzEriILAWmzhxIsOHD2+yzeWXX87ZZ59N+/btueOOO7jkkksYPXp0mSo0s5ZwH91aZP369UyePJlTTz21yXb9+/dn1qxZzJgxg5deeol99tmHiOC0007jzDPP5O9//3uZKjaz5rhHYC3y0EMPMWDAAD7wgQ+U1D4iGD16NBMnTuT888/nuuuuY/HixYwbN45rr70242rNrBTuEViL3HPPPc0OCxW7/fbbqa2tpVOnThQKBaqqqqiqqqJQKGRYpZm1hHsEVrJ169Yxbdo0brrpprfX/eIXv+D8889n5cqVnHDCCfTv35+pU6cCUCgUmDBhAo888ggAo0aNora2lnbt2nH33XdX5BjMbFsOAivZ7rvvzqpVq7ZaN3ToUIYOHdpg+w4dOjBjxoy3l48++mieffbZTGs0s5bz0JCZWc45CMzMcs5BYGaWc54jyJGel/2m0iXsEIu/f0KlSzBrU9wjMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMci7TIJB0nKSFkhZJuqyRNp+X9Jyk+ZLuzrIeMzPbVmY/Xi9pF2A88GlgKfCUpMkR8VxRm17A5cBREbFaUpes6jEzs4Zl2SMYCCyKiJciYj0wERhSr825wPiIWA0QESsyrMfMzBqQZRB0A5YULS9N1xU7EDhQ0kxJsyQd19CGJI2QVCepbuXKlRmVa2aWT5WeLN4V6AUcAwwHfiqpY/1GEXFzRNRERE3nzp3LW6GZWRvXbBBI+pyk7QmMZUCPouXu6bpiS4HJEbEhIv4CPE8SDGZmVialfMCfBrwg6TpJB7dg208BvSTtL6kdcDowuV6bX5L0BpC0N8lQ0Ust2IeZmb1LzQZBRJwJHAa8CEyQ9Hg6Zr9nM+/bCIwEpgILgHsjYr6kaySdmDabCqyS9BwwA/hmRKx6F8djZmYtVNLpoxGxVtLPgfbAhcBQ4JuSxkXEDU28bwowpd66K4ueBzAqfZiZWQWUMkdwoqRfAL8HqoGBEXE8cChwcbblmZlZ1krpEQwDxkbEo8UrI6Ig6ZxsyjIzs3IpJQiuAl7ZsiCpPfCBiFgcEdOzKszMzMqjlLOG7gM2Fy1vSteZmVkbUEoQ7JreIgKA9Hm77EoyM7NyKiUIVhad7omkIcCr2ZVkZmblVMocwVeAuyT9GBDJ/YPOzrQqMzMrm2aDICJeBAZJ2iNdfj3zqszMrGxKuqBM0glAX2A3SQBExDUZ1mVmZmVSygVlN5Lcb+h8kqGhU4H9Mq7LzMzKpJTJ4iMj4mxgdURcDXyU5OZwZmbWBpQSBG+m/y1I6gpsAPbJriQzMyunUuYIfp3+WMwPgT8BAfw0y6LMzKx8mgyC9AdppkfEa8D9kh4EdouINeUozszMstfk0FBEbAbGFy2/5RAwM2tbSpkjmC5pmLacN2pmZm1KKUFwHslN5t6StFbSPyWtzbguMzMrk1KuLG7yJynNzKx1azYIJH28ofX1f6jGzMxap1JOH/1m0fPdgIHAbOCTmVRkZmZlVcrQ0OeKlyX1AK7PqiAzMyuvUiaL61sK9N7RhZiZWWWUMkdwA8nVxJAER3+SK4zNzKwNKGWOoK7o+UbgnoiYmVE9ZmZWZqUEwc+BNyNiE4CkXSR1iIhCtqWZmVk5lHRlMdC+aLk98NtsyjEzs3IrJQh2K/55yvR5h+xKMjOzciolCNZJGrBlQdLhwBvZlWRmZuVUyhzBhcB9kpaT/FTlB0l+utLMzNqAUi4oe0rSwcBB6aqFEbEh27LMzKxcSvnx+q8Du0fEvIiYB+wh6WvZl2ZmZuVQyhzBuekvlAEQEauBczOryMzMyqqUINil+EdpJO0CtMuuJDMzK6dSJosfBiZJuildPg94KLuSzMysnEoJgn8HRgBfSZfnkpw5ZGZmbUCzQ0PpD9g/ASwm+S2CTwILStm4pOMkLZS0SNJlTbQbJikk1ZRWtpmZ7SiN9ggkHQgMTx+vApMAIuITpWw4nUsYD3ya5NbVT0maHBHP1Wu3J3ABSdiYmVmZNdUj+DPJt//PRsTHIuIGYFMLtj0QWBQRL0XEemAiMKSBdt8BfgC82YJtm5nZDtJUEJwMvALMkPRTSceSXFlcqm7AkqLlpem6t6W3rugREb9pakOSRkiqk1S3cuXKFpRgZmbNaTQIIuKXEXE6cDAwg+RWE10k/bekwe92x5KqgB8BFzfXNiJujoiaiKjp3Lnzu921mZkVKWWyeF1E3J3+dnF34GmSM4maswzoUbTcPV23xZ5AP+D3khYDg4DJnjA2MyuvFv1mcUSsTr+dH1tC86eAXpL2l9QOOB2YXLStNRGxd0T0jIiewCzgxIioa3hzZmaWhe358fqSRMRGYCQwleR003sjYr6kaySdmNV+zcysZUq5oGy7RcQUYEq9dVc20vaYLGsxM7OGZdYjMDOz1sFBYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnOZBoGk4yQtlLRI0mUNvD5K0nOS5kqaLmm/LOsxM7NtZRYEknYBxgPHA32A4ZL61Gv2NFATER8Gfg5cl1U9ZmbWsCx7BAOBRRHxUkSsByYCQ4obRMSMiCiki7OA7hnWY2ZmDcgyCLoBS4qWl6brGnMO8FBDL0gaIalOUt3KlSt3YIlmZrZTTBZLOhOoAX7Y0OsRcXNE1ERETefOnctbnJlZG7drhtteBvQoWu6ertuKpE8B/x/414h4K8N6zMysAVn2CJ4CeknaX1I74HRgcnEDSYcBNwEnRsSKDGsxM7NGZBYEEbERGAlMBRYA90bEfEnXSDoxbfZDYA/gPklzJE1uZHNmZpaRLIeGiIgpwJR6664sev6pLPdvZmbN2ykmi83MrHIcBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzy7lMg0DScZIWSlok6bIGXn+PpEnp609I6pllPWZmtq3MgkDSLsB44HigDzBcUp96zc4BVkfEvwBjgR9kVY+ZmTUsyx7BQGBRRLwUEeuBicCQem2GALelz38OHCtJGdZkZmb1KCKy2bB0CnBcRPy/dPks4IiIGFnUZl7aZmm6/GLa5tV62xoBjEgXDwIWZlL0jrM38GqzrdomH3t+5fn4W8Ox7xcRnRt6YddyV7I9IuJm4OZK11EqSXURUVPpOirBx57PY4d8H39rP/Ysh4aWAT2Klrun6xpsI2lX4L3AqgxrMjOzerIMgqeAXpL2l9QOOB2YXK/NZOCL6fNTgN9FVmNVZmbWoMyGhiJio6SRwFRgF+DWiJgv6RqgLiImAz8D7pC0CPgHSVi0Ba1mGCsDPvb8yvPxt+pjz2yy2MzMWgdfWWxmlnMOAjOznHMQ7EDN3VKjLZN0q6QV6bUhuSKph6QZkp6TNF/SBZWuqVwk7SbpSUnPpMd+daVrqgRJu0h6WtKDla5lezgIdpASb6nRlk0Ajqt0ERWyEbg4IvoAg4Cv5+j//VvAJyPiUKA/cJykQZUtqSIuABZUuojt5SDYcUq5pUabFRGPkpz5lTsR8UpE/Cl9/k+SD4Rula2qPCLxerpYnT5ydQaKpO7ACcAtla5lezkIdpxuwJKi5aXk5MPA3pHeQfcw4IkKl1I26bDIHGAFMC0icnPsqeuBS4HNFa5juzkIzHYQSXsA9wMXRsTaStdTLhGxKSL6k9w9YKCkfhUuqWwkfRZYERGzK13Lu+Eg2HFKuaWGtVGSqklC4K6IeKDS9VRCRLwGzCBfc0VHASdKWkwyHPxJSXdWtqSWcxDsOKXcUsPaoPTW6T8DFkTEjypdTzlJ6iypY/q8PfBp4M8VLaqMIuLyiOgeET1J/s3/LiLOrHBZLeYg2EEiYiOw5ZYaC4B7I2J+ZasqH0n3AI8DB0laKumcStdURkcBZ5F8G5yTPmorXVSZ7APMkDSX5MvQtIholadQ5plvMWFmlnPuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc61ih+vN6skSe8HpqeLHwQ2ASvT5YHpvaWaev+XgJqIGJlZkWbvgoPArBkRsYrkzppIugp4PSL+s5I1me1IHhoy2w6SzpX0VHof/vsldUjXnyppXrr+0Qbed4KkxyXtXf6qzRrmIDDbPg9ExEfS+/AvALZcSX0l8Jl0/YnFb5A0FLgMqI2IV8tarVkTPDRktn36SRoNdAT2ILm1CMBMYIKke4Him899EqgBBufpzqTWOrhHYLZ9JgAjI+IQ4GpgN4CI+ArwbZI70c5OJ5oBXgT2BA4sf6lmTXMQmG2fPYFX0ttPf2HLSkkHRMQTEXElyZlFW25N/ldgGHC7pL5lr9asCQ4Cs+1zBcmvkM1k69su/1DSs5LmAX8EntnyQkT8mSQ07pN0QDmLNWuK7z5qZpZz7hGYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnP/B0iPrwaXcQuCAAAAAElFTkSuQmCC\n"
     },
     "metadata": {
      "needs_background": "light"
     }
    }
   ],
   "source": [
    "improved_results.make_plots()"
   ]
  },
  {
   "source": [
    "## Final Results\n"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "{'task_metrics': <Figure size 432x288 with 1 Axes>}"
      ]
     },
     "metadata": {},
     "execution_count": 13
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"277.314375pt\" version=\"1.1\" viewBox=\"0 0 385.78125 277.314375\" width=\"385.78125pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <metadata>\n  <rdf:RDF xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n   <cc:Work>\n    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n    <dc:date>2021-02-25T17:30:07.489874</dc:date>\n    <dc:format>image/svg+xml</dc:format>\n    <dc:creator>\n     <cc:Agent>\n      <dc:title>Matplotlib v3.3.4, https://matplotlib.org/</dc:title>\n     </cc:Agent>\n    </dc:creator>\n   </cc:Work>\n  </rdf:RDF>\n </metadata>\n <defs>\n  <style type=\"text/css\">*{stroke-linecap:butt;stroke-linejoin:round;}</style>\n </defs>\n <g id=\"figure_1\">\n  <g id=\"patch_1\">\n   <path d=\"M 0 277.314375 \nL 385.78125 277.314375 \nL 385.78125 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n  </g>\n  <g id=\"axes_1\">\n   <g id=\"patch_2\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \nL 378.58125 22.318125 \nL 43.78125 22.318125 \nz\n\" style=\"fill:#ffffff;\"/>\n   </g>\n   <g id=\"patch_3\">\n    <path clip-path=\"url(#p7ae5f5802d)\" d=\"M 58.999432 239.758125 \nL 109.726705 239.758125 \nL 109.726705 122.818458 \nL 58.999432 122.818458 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_4\">\n    <path clip-path=\"url(#p7ae5f5802d)\" d=\"M 122.408523 239.758125 \nL 173.135795 239.758125 \nL 173.135795 120.252449 \nL 122.408523 120.252449 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_5\">\n    <path clip-path=\"url(#p7ae5f5802d)\" d=\"M 185.817614 239.758125 \nL 236.544886 239.758125 \nL 236.544886 39.963164 \nL 185.817614 39.963164 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_6\">\n    <path clip-path=\"url(#p7ae5f5802d)\" d=\"M 249.226705 239.758125 \nL 299.953977 239.758125 \nL 299.953977 23.612328 \nL 249.226705 23.612328 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_7\">\n    <path clip-path=\"url(#p7ae5f5802d)\" d=\"M 312.635795 239.758125 \nL 363.363068 239.758125 \nL 363.363068 23.304433 \nL 312.635795 23.304433 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"matplotlib.axis_1\">\n    <g id=\"xtick_1\">\n     <g id=\"line2d_1\">\n      <defs>\n       <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m7725b068bf\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"84.363068\" xlink:href=\"#m7725b068bf\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_1\">\n      <!-- 0 -->\n      <g transform=\"translate(81.181818 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_2\">\n     <g id=\"line2d_2\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"147.772159\" xlink:href=\"#m7725b068bf\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_2\">\n      <!-- 1 -->\n      <g transform=\"translate(144.590909 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 12.40625 8.296875 \nL 28.515625 8.296875 \nL 28.515625 63.921875 \nL 10.984375 60.40625 \nL 10.984375 69.390625 \nL 28.421875 72.90625 \nL 38.28125 72.90625 \nL 38.28125 8.296875 \nL 54.390625 8.296875 \nL 54.390625 0 \nL 12.40625 0 \nz\n\" id=\"DejaVuSans-49\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-49\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_3\">\n     <g id=\"line2d_3\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"211.18125\" xlink:href=\"#m7725b068bf\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_3\">\n      <!-- 2 -->\n      <g transform=\"translate(208 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 19.1875 8.296875 \nL 53.609375 8.296875 \nL 53.609375 0 \nL 7.328125 0 \nL 7.328125 8.296875 \nQ 12.9375 14.109375 22.625 23.890625 \nQ 32.328125 33.6875 34.8125 36.53125 \nQ 39.546875 41.84375 41.421875 45.53125 \nQ 43.3125 49.21875 43.3125 52.78125 \nQ 43.3125 58.59375 39.234375 62.25 \nQ 35.15625 65.921875 28.609375 65.921875 \nQ 23.96875 65.921875 18.8125 64.3125 \nQ 13.671875 62.703125 7.8125 59.421875 \nL 7.8125 69.390625 \nQ 13.765625 71.78125 18.9375 73 \nQ 24.125 74.21875 28.421875 74.21875 \nQ 39.75 74.21875 46.484375 68.546875 \nQ 53.21875 62.890625 53.21875 53.421875 \nQ 53.21875 48.921875 51.53125 44.890625 \nQ 49.859375 40.875 45.40625 35.40625 \nQ 44.1875 33.984375 37.640625 27.21875 \nQ 31.109375 20.453125 19.1875 8.296875 \nz\n\" id=\"DejaVuSans-50\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_4\">\n     <g id=\"line2d_4\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"274.590341\" xlink:href=\"#m7725b068bf\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_4\">\n      <!-- 3 -->\n      <g transform=\"translate(271.409091 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 40.578125 39.3125 \nQ 47.65625 37.796875 51.625 33 \nQ 55.609375 28.21875 55.609375 21.1875 \nQ 55.609375 10.40625 48.1875 4.484375 \nQ 40.765625 -1.421875 27.09375 -1.421875 \nQ 22.515625 -1.421875 17.65625 -0.515625 \nQ 12.796875 0.390625 7.625 2.203125 \nL 7.625 11.71875 \nQ 11.71875 9.328125 16.59375 8.109375 \nQ 21.484375 6.890625 26.8125 6.890625 \nQ 36.078125 6.890625 40.9375 10.546875 \nQ 45.796875 14.203125 45.796875 21.1875 \nQ 45.796875 27.640625 41.28125 31.265625 \nQ 36.765625 34.90625 28.71875 34.90625 \nL 20.21875 34.90625 \nL 20.21875 43.015625 \nL 29.109375 43.015625 \nQ 36.375 43.015625 40.234375 45.921875 \nQ 44.09375 48.828125 44.09375 54.296875 \nQ 44.09375 59.90625 40.109375 62.90625 \nQ 36.140625 65.921875 28.71875 65.921875 \nQ 24.65625 65.921875 20.015625 65.03125 \nQ 15.375 64.15625 9.8125 62.3125 \nL 9.8125 71.09375 \nQ 15.4375 72.65625 20.34375 73.4375 \nQ 25.25 74.21875 29.59375 74.21875 \nQ 40.828125 74.21875 47.359375 69.109375 \nQ 53.90625 64.015625 53.90625 55.328125 \nQ 53.90625 49.265625 50.4375 45.09375 \nQ 46.96875 40.921875 40.578125 39.3125 \nz\n\" id=\"DejaVuSans-51\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-51\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_5\">\n     <g id=\"line2d_5\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"337.999432\" xlink:href=\"#m7725b068bf\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_5\">\n      <!-- 4 -->\n      <g transform=\"translate(334.818182 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 37.796875 64.3125 \nL 12.890625 25.390625 \nL 37.796875 25.390625 \nz\nM 35.203125 72.90625 \nL 47.609375 72.90625 \nL 47.609375 25.390625 \nL 58.015625 25.390625 \nL 58.015625 17.1875 \nL 47.609375 17.1875 \nL 47.609375 0 \nL 37.796875 0 \nL 37.796875 17.1875 \nL 4.890625 17.1875 \nL 4.890625 26.703125 \nz\n\" id=\"DejaVuSans-52\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_6\">\n     <!-- Task -->\n     <g transform=\"translate(200.388281 268.034687)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M -0.296875 72.90625 \nL 61.375 72.90625 \nL 61.375 64.59375 \nL 35.5 64.59375 \nL 35.5 0 \nL 25.59375 0 \nL 25.59375 64.59375 \nL -0.296875 64.59375 \nz\n\" id=\"DejaVuSans-84\"/>\n       <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n       <path d=\"M 44.28125 53.078125 \nL 44.28125 44.578125 \nQ 40.484375 46.53125 36.375 47.5 \nQ 32.28125 48.484375 27.875 48.484375 \nQ 21.1875 48.484375 17.84375 46.4375 \nQ 14.5 44.390625 14.5 40.28125 \nQ 14.5 37.15625 16.890625 35.375 \nQ 19.28125 33.59375 26.515625 31.984375 \nL 29.59375 31.296875 \nQ 39.15625 29.25 43.1875 25.515625 \nQ 47.21875 21.78125 47.21875 15.09375 \nQ 47.21875 7.46875 41.1875 3.015625 \nQ 35.15625 -1.421875 24.609375 -1.421875 \nQ 20.21875 -1.421875 15.453125 -0.5625 \nQ 10.6875 0.296875 5.421875 2 \nL 5.421875 11.28125 \nQ 10.40625 8.6875 15.234375 7.390625 \nQ 20.0625 6.109375 24.8125 6.109375 \nQ 31.15625 6.109375 34.5625 8.28125 \nQ 37.984375 10.453125 37.984375 14.40625 \nQ 37.984375 18.0625 35.515625 20.015625 \nQ 33.0625 21.96875 24.703125 23.78125 \nL 21.578125 24.515625 \nQ 13.234375 26.265625 9.515625 29.90625 \nQ 5.8125 33.546875 5.8125 39.890625 \nQ 5.8125 47.609375 11.28125 51.796875 \nQ 16.75 56 26.8125 56 \nQ 31.78125 56 36.171875 55.265625 \nQ 40.578125 54.546875 44.28125 53.078125 \nz\n\" id=\"DejaVuSans-115\"/>\n       <path d=\"M 9.078125 75.984375 \nL 18.109375 75.984375 \nL 18.109375 31.109375 \nL 44.921875 54.6875 \nL 56.390625 54.6875 \nL 27.390625 29.109375 \nL 57.625 0 \nL 45.90625 0 \nL 18.109375 26.703125 \nL 18.109375 0 \nL 9.078125 0 \nz\n\" id=\"DejaVuSans-107\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-84\"/>\n      <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"matplotlib.axis_2\">\n    <g id=\"ytick_1\">\n     <g id=\"line2d_6\">\n      <defs>\n       <path d=\"M 0 0 \nL -3.5 0 \n\" id=\"m41ff687a35\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m41ff687a35\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_7\">\n      <!-- 0.0 -->\n      <g transform=\"translate(20.878125 243.557344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 10.6875 12.40625 \nL 21 12.40625 \nL 21 0 \nL 10.6875 0 \nz\n\" id=\"DejaVuSans-46\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_2\">\n     <g id=\"line2d_7\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m41ff687a35\" y=\"196.270125\"/>\n      </g>\n     </g>\n     <g id=\"text_8\">\n      <!-- 0.2 -->\n      <g transform=\"translate(20.878125 200.069344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_3\">\n     <g id=\"line2d_8\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m41ff687a35\" y=\"152.782125\"/>\n      </g>\n     </g>\n     <g id=\"text_9\">\n      <!-- 0.4 -->\n      <g transform=\"translate(20.878125 156.581344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_4\">\n     <g id=\"line2d_9\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m41ff687a35\" y=\"109.294125\"/>\n      </g>\n     </g>\n     <g id=\"text_10\">\n      <!-- 0.6 -->\n      <g transform=\"translate(20.878125 113.093344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 33.015625 40.375 \nQ 26.375 40.375 22.484375 35.828125 \nQ 18.609375 31.296875 18.609375 23.390625 \nQ 18.609375 15.53125 22.484375 10.953125 \nQ 26.375 6.390625 33.015625 6.390625 \nQ 39.65625 6.390625 43.53125 10.953125 \nQ 47.40625 15.53125 47.40625 23.390625 \nQ 47.40625 31.296875 43.53125 35.828125 \nQ 39.65625 40.375 33.015625 40.375 \nz\nM 52.59375 71.296875 \nL 52.59375 62.3125 \nQ 48.875 64.0625 45.09375 64.984375 \nQ 41.3125 65.921875 37.59375 65.921875 \nQ 27.828125 65.921875 22.671875 59.328125 \nQ 17.53125 52.734375 16.796875 39.40625 \nQ 19.671875 43.65625 24.015625 45.921875 \nQ 28.375 48.1875 33.59375 48.1875 \nQ 44.578125 48.1875 50.953125 41.515625 \nQ 57.328125 34.859375 57.328125 23.390625 \nQ 57.328125 12.15625 50.6875 5.359375 \nQ 44.046875 -1.421875 33.015625 -1.421875 \nQ 20.359375 -1.421875 13.671875 8.265625 \nQ 6.984375 17.96875 6.984375 36.375 \nQ 6.984375 53.65625 15.1875 63.9375 \nQ 23.390625 74.21875 37.203125 74.21875 \nQ 40.921875 74.21875 44.703125 73.484375 \nQ 48.484375 72.75 52.59375 71.296875 \nz\n\" id=\"DejaVuSans-54\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-54\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_5\">\n     <g id=\"line2d_10\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m41ff687a35\" y=\"65.806125\"/>\n      </g>\n     </g>\n     <g id=\"text_11\">\n      <!-- 0.8 -->\n      <g transform=\"translate(20.878125 69.605344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 34.625 \nQ 24.75 34.625 20.71875 30.859375 \nQ 16.703125 27.09375 16.703125 20.515625 \nQ 16.703125 13.921875 20.71875 10.15625 \nQ 24.75 6.390625 31.78125 6.390625 \nQ 38.8125 6.390625 42.859375 10.171875 \nQ 46.921875 13.96875 46.921875 20.515625 \nQ 46.921875 27.09375 42.890625 30.859375 \nQ 38.875 34.625 31.78125 34.625 \nz\nM 21.921875 38.8125 \nQ 15.578125 40.375 12.03125 44.71875 \nQ 8.5 49.078125 8.5 55.328125 \nQ 8.5 64.0625 14.71875 69.140625 \nQ 20.953125 74.21875 31.78125 74.21875 \nQ 42.671875 74.21875 48.875 69.140625 \nQ 55.078125 64.0625 55.078125 55.328125 \nQ 55.078125 49.078125 51.53125 44.71875 \nQ 48 40.375 41.703125 38.8125 \nQ 48.828125 37.15625 52.796875 32.3125 \nQ 56.78125 27.484375 56.78125 20.515625 \nQ 56.78125 9.90625 50.3125 4.234375 \nQ 43.84375 -1.421875 31.78125 -1.421875 \nQ 19.734375 -1.421875 13.25 4.234375 \nQ 6.78125 9.90625 6.78125 20.515625 \nQ 6.78125 27.484375 10.78125 32.3125 \nQ 14.796875 37.15625 21.921875 38.8125 \nz\nM 18.3125 54.390625 \nQ 18.3125 48.734375 21.84375 45.5625 \nQ 25.390625 42.390625 31.78125 42.390625 \nQ 38.140625 42.390625 41.71875 45.5625 \nQ 45.3125 48.734375 45.3125 54.390625 \nQ 45.3125 60.0625 41.71875 63.234375 \nQ 38.140625 66.40625 31.78125 66.40625 \nQ 25.390625 66.40625 21.84375 63.234375 \nQ 18.3125 60.0625 18.3125 54.390625 \nz\n\" id=\"DejaVuSans-56\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-56\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_6\">\n     <g id=\"line2d_11\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m41ff687a35\" y=\"22.318125\"/>\n      </g>\n     </g>\n     <g id=\"text_12\">\n      <!-- 1.0 -->\n      <g transform=\"translate(20.878125 26.117344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-49\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_13\">\n     <!-- Accuracy -->\n     <g transform=\"translate(14.798438 153.86625)rotate(-90)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M 34.1875 63.1875 \nL 20.796875 26.90625 \nL 47.609375 26.90625 \nz\nM 28.609375 72.90625 \nL 39.796875 72.90625 \nL 67.578125 0 \nL 57.328125 0 \nL 50.6875 18.703125 \nL 17.828125 18.703125 \nL 11.1875 0 \nL 0.78125 0 \nz\n\" id=\"DejaVuSans-65\"/>\n       <path d=\"M 48.78125 52.59375 \nL 48.78125 44.1875 \nQ 44.96875 46.296875 41.140625 47.34375 \nQ 37.3125 48.390625 33.40625 48.390625 \nQ 24.65625 48.390625 19.8125 42.84375 \nQ 14.984375 37.3125 14.984375 27.296875 \nQ 14.984375 17.28125 19.8125 11.734375 \nQ 24.65625 6.203125 33.40625 6.203125 \nQ 37.3125 6.203125 41.140625 7.25 \nQ 44.96875 8.296875 48.78125 10.40625 \nL 48.78125 2.09375 \nQ 45.015625 0.34375 40.984375 -0.53125 \nQ 36.96875 -1.421875 32.421875 -1.421875 \nQ 20.0625 -1.421875 12.78125 6.34375 \nQ 5.515625 14.109375 5.515625 27.296875 \nQ 5.515625 40.671875 12.859375 48.328125 \nQ 20.21875 56 33.015625 56 \nQ 37.15625 56 41.109375 55.140625 \nQ 45.0625 54.296875 48.78125 52.59375 \nz\n\" id=\"DejaVuSans-99\"/>\n       <path d=\"M 8.5 21.578125 \nL 8.5 54.6875 \nL 17.484375 54.6875 \nL 17.484375 21.921875 \nQ 17.484375 14.15625 20.5 10.265625 \nQ 23.53125 6.390625 29.59375 6.390625 \nQ 36.859375 6.390625 41.078125 11.03125 \nQ 45.3125 15.671875 45.3125 23.6875 \nL 45.3125 54.6875 \nL 54.296875 54.6875 \nL 54.296875 0 \nL 45.3125 0 \nL 45.3125 8.40625 \nQ 42.046875 3.421875 37.71875 1 \nQ 33.40625 -1.421875 27.6875 -1.421875 \nQ 18.265625 -1.421875 13.375 4.4375 \nQ 8.5 10.296875 8.5 21.578125 \nz\nM 31.109375 56 \nz\n\" id=\"DejaVuSans-117\"/>\n       <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n       <path d=\"M 32.171875 -5.078125 \nQ 28.375 -14.84375 24.75 -17.8125 \nQ 21.140625 -20.796875 15.09375 -20.796875 \nL 7.90625 -20.796875 \nL 7.90625 -13.28125 \nL 13.1875 -13.28125 \nQ 16.890625 -13.28125 18.9375 -11.515625 \nQ 21 -9.765625 23.484375 -3.21875 \nL 25.09375 0.875 \nL 2.984375 54.6875 \nL 12.5 54.6875 \nL 29.59375 11.921875 \nL 46.6875 54.6875 \nL 56.203125 54.6875 \nz\n\" id=\"DejaVuSans-121\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-65\"/>\n      <use x=\"66.658203\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"121.638672\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"176.619141\" xlink:href=\"#DejaVuSans-117\"/>\n      <use x=\"239.998047\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"281.111328\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"342.390625\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"397.371094\" xlink:href=\"#DejaVuSans-121\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"patch_8\">\n    <path d=\"M 43.78125 239.758125 \nL 43.78125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_9\">\n    <path d=\"M 378.58125 239.758125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_10\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_11\">\n    <path d=\"M 43.78125 22.318125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"text_14\">\n    <!-- 54% -->\n    <g transform=\"translate(73.249787 117.738771)scale(0.1 -0.1)\">\n     <defs>\n      <path d=\"M 10.796875 72.90625 \nL 49.515625 72.90625 \nL 49.515625 64.59375 \nL 19.828125 64.59375 \nL 19.828125 46.734375 \nQ 21.96875 47.46875 24.109375 47.828125 \nQ 26.265625 48.1875 28.421875 48.1875 \nQ 40.625 48.1875 47.75 41.5 \nQ 54.890625 34.8125 54.890625 23.390625 \nQ 54.890625 11.625 47.5625 5.09375 \nQ 40.234375 -1.421875 26.90625 -1.421875 \nQ 22.3125 -1.421875 17.546875 -0.640625 \nQ 12.796875 0.140625 7.71875 1.703125 \nL 7.71875 11.625 \nQ 12.109375 9.234375 16.796875 8.0625 \nQ 21.484375 6.890625 26.703125 6.890625 \nQ 35.15625 6.890625 40.078125 11.328125 \nQ 45.015625 15.765625 45.015625 23.390625 \nQ 45.015625 31 40.078125 35.4375 \nQ 35.15625 39.890625 26.703125 39.890625 \nQ 22.75 39.890625 18.8125 39.015625 \nQ 14.890625 38.140625 10.796875 36.28125 \nz\n\" id=\"DejaVuSans-53\"/>\n      <path d=\"M 72.703125 32.078125 \nQ 68.453125 32.078125 66.03125 28.46875 \nQ 63.625 24.859375 63.625 18.40625 \nQ 63.625 12.0625 66.03125 8.421875 \nQ 68.453125 4.78125 72.703125 4.78125 \nQ 76.859375 4.78125 79.265625 8.421875 \nQ 81.6875 12.0625 81.6875 18.40625 \nQ 81.6875 24.8125 79.265625 28.4375 \nQ 76.859375 32.078125 72.703125 32.078125 \nz\nM 72.703125 38.28125 \nQ 80.421875 38.28125 84.953125 32.90625 \nQ 89.5 27.546875 89.5 18.40625 \nQ 89.5 9.28125 84.9375 3.921875 \nQ 80.375 -1.421875 72.703125 -1.421875 \nQ 64.890625 -1.421875 60.34375 3.921875 \nQ 55.8125 9.28125 55.8125 18.40625 \nQ 55.8125 27.59375 60.375 32.9375 \nQ 64.9375 38.28125 72.703125 38.28125 \nz\nM 22.3125 68.015625 \nQ 18.109375 68.015625 15.6875 64.375 \nQ 13.28125 60.75 13.28125 54.390625 \nQ 13.28125 47.953125 15.671875 44.328125 \nQ 18.0625 40.71875 22.3125 40.71875 \nQ 26.5625 40.71875 28.96875 44.328125 \nQ 31.390625 47.953125 31.390625 54.390625 \nQ 31.390625 60.6875 28.953125 64.34375 \nQ 26.515625 68.015625 22.3125 68.015625 \nz\nM 66.40625 74.21875 \nL 74.21875 74.21875 \nL 28.609375 -1.421875 \nL 20.796875 -1.421875 \nz\nM 22.3125 74.21875 \nQ 30.03125 74.21875 34.609375 68.875 \nQ 39.203125 63.53125 39.203125 54.390625 \nQ 39.203125 45.171875 34.640625 39.84375 \nQ 30.078125 34.515625 22.3125 34.515625 \nQ 14.546875 34.515625 10.03125 39.859375 \nQ 5.515625 45.21875 5.515625 54.390625 \nQ 5.515625 63.484375 10.046875 68.84375 \nQ 14.59375 74.21875 22.3125 74.21875 \nz\n\" id=\"DejaVuSans-37\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-53\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-52\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_15\">\n    <!-- 55% -->\n    <g transform=\"translate(136.658878 115.172761)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-53\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-53\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_16\">\n    <!-- 92% -->\n    <g transform=\"translate(200.067969 34.883476)scale(0.1 -0.1)\">\n     <defs>\n      <path d=\"M 10.984375 1.515625 \nL 10.984375 10.5 \nQ 14.703125 8.734375 18.5 7.8125 \nQ 22.3125 6.890625 25.984375 6.890625 \nQ 35.75 6.890625 40.890625 13.453125 \nQ 46.046875 20.015625 46.78125 33.40625 \nQ 43.953125 29.203125 39.59375 26.953125 \nQ 35.25 24.703125 29.984375 24.703125 \nQ 19.046875 24.703125 12.671875 31.3125 \nQ 6.296875 37.9375 6.296875 49.421875 \nQ 6.296875 60.640625 12.9375 67.421875 \nQ 19.578125 74.21875 30.609375 74.21875 \nQ 43.265625 74.21875 49.921875 64.515625 \nQ 56.59375 54.828125 56.59375 36.375 \nQ 56.59375 19.140625 48.40625 8.859375 \nQ 40.234375 -1.421875 26.421875 -1.421875 \nQ 22.703125 -1.421875 18.890625 -0.6875 \nQ 15.09375 0.046875 10.984375 1.515625 \nz\nM 30.609375 32.421875 \nQ 37.25 32.421875 41.125 36.953125 \nQ 45.015625 41.5 45.015625 49.421875 \nQ 45.015625 57.28125 41.125 61.84375 \nQ 37.25 66.40625 30.609375 66.40625 \nQ 23.96875 66.40625 20.09375 61.84375 \nQ 16.21875 57.28125 16.21875 49.421875 \nQ 16.21875 41.5 20.09375 36.953125 \nQ 23.96875 32.421875 30.609375 32.421875 \nz\n\" id=\"DejaVuSans-57\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-50\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_17\">\n    <!-- 99% -->\n    <g transform=\"translate(263.47706 18.53264)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_18\">\n    <!-- 100% -->\n    <g transform=\"translate(323.704901 18.224745)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-49\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-48\"/>\n     <use x=\"190.869141\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_19\">\n    <!-- Task Accuracy -->\n    <g transform=\"translate(168.929063 16.318125)scale(0.12 -0.12)\">\n     <defs>\n      <path id=\"DejaVuSans-32\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-84\"/>\n     <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n     <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     <use x=\"215.873047\" xlink:href=\"#DejaVuSans-32\"/>\n     <use x=\"247.660156\" xlink:href=\"#DejaVuSans-65\"/>\n     <use x=\"314.318359\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"369.298828\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"424.279297\" xlink:href=\"#DejaVuSans-117\"/>\n     <use x=\"487.658203\" xlink:href=\"#DejaVuSans-114\"/>\n     <use x=\"528.771484\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"590.050781\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"645.03125\" xlink:href=\"#DejaVuSans-121\"/>\n    </g>\n   </g>\n  </g>\n </g>\n <defs>\n  <clipPath id=\"p7ae5f5802d\">\n   <rect height=\"217.44\" width=\"334.8\" x=\"43.78125\" y=\"22.318125\"/>\n  </clipPath>\n </defs>\n</svg>\n",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdO0lEQVR4nO3de7wVdd328c+1YXPLVu+QQCXQSINQCXe6RTuY3CpEaHqTGeKBDj7QCStPBSqmhlooeaRb8cmbNExNyVBRKNuJ8oiAhoqSCUaCmghBHrah6Pf5YwZcbPZhbWDWYu+53q/Xejnzm9+a9Z3lZq41v5k1SxGBmZnlV0W5CzAzs/JyEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CCx3JE2RNL7cdZhtLxwEtt2T9EbB4z1JbxXMn1SiGqZIWi+pWylez6yUHAS23YuInTY8gBeALxS0Tc369SXtCBwH/As4OevXq/fa7Uv5epZPDgJrtST1l/SIpLWSXpZ0raQO6TJJukLSSkmvSXpKUt8G1rGzpFpJV0tSIy91HLAWuAj4Sr3nd5b0v5JekrRG0l0Fy46VtDB9/aWSBqftyyQdWdDvAkm/Sqd7SgpJp0p6Afhj2v4bSf+Q9C9JsyXtV/D8jpImSvp7uvzhtO1eSafVq/dJSUNb8DZbDjgIrDV7Fzgd6AJ8EjgC+Ha6bBDwWaA38AHgy8DqwidL+iDwADAnIr4bjd9v5SvAr4FbgT6SDixYdjNQBewH7Apcka67P3ATcDbQKa1lWQu27TBgH+Bz6fx9QK/0NR4HCo+ELgcOBD4FdAZ+ALwH/JKCIxhJ+wPdgXtbUAeSvidpkaSnJX1/w7rSEH5K0t2S/jNt/3QaNgsk9UrbOkmaJalV7G8k3Zh+gFhU0NZZ0u8lPZf+d5e0XemHiCXpdh+Qtn9M0mNp2yfTtvaS/iCpqjxb1oSI8MOPVvMg2Zke2ciy7wO/TacPB/4KHAJU1Os3BbgRWASc3czr7UmyU61O52cCV6XT3dJluzTwvOuBK4rZBuAC4FfpdE8ggL2aqKlT2ucDJB/m3gL2b6DfDsAaoFc6fznw8xa+333T96kKaA/8AfgoMB84LO3zdeDH6fQ0oAfwGWBiwesOKPffTgu2+bPAAcCigrYJwJh0egzw03R6CElIK/1bezRt/1n6HvQA7kzbTgO+Wu7ta+jRKhLarCGSeku6Jx0yeQ24hOTogIj4I3AtMAlYKWnyhk+tqaOAjsB1zbzMKcDiiFiYzk8FTpRUCewB/DMi1jTwvD2ApVu4aQDLN0xIaifpJ+nw0mu8f2TRJX3s0NBrRcS/gduAk9NP48NJjmBaYh+SnVtdRKwHHgS+SHKkNTvt83uS4TOAd0hCowp4R9LewB4R8acWvm7ZRMRs4J/1mo8lOcIi/e9/F7TfFIm5QKf0goL670Mn4AskR4nbHQeBtWb/A/yF5BPvfwLnkHwyAyAiro6IA4F9SXZcZxc89wbgfmBGejK4MSOAvdKw+QfJJ70uJJ8ElwOd03/k9S0H9m5knW+S7CA22L2BPoXDVCeS7HCOJDkK6Jm2C1gF/LuJ1/olcBLJsFldRDzSSL/GLAIOlfTBdEhjCEnIPZ3WBHB82gZwKcnObixJEF8MnNfC19we7RYRL6fT/wB2S6e7UxDawIq0bRLJ3+MvST6gjAMuiYj3SlNuyzgIrDXbGXgNeENSH+BbGxZIOkjSwekn9zdJdpb1/xGOBp4F7pbUsf7K07HdvYH+QHX66AvcAoxIdwz3AT+XtIukSkmfTZ/+C+Brko6QVCGpe1ojwELghLR/DfClIrZzHck5jiqSHQsA6Y7lRuBnkj6UHj18UtJ/pMsfSbd7Ii0/GiAiFgM/BWaRBOdCknMzXwe+LemxtL630/4LI+KQiPgvYC/gZZKh9Nsk/UrSbg28TKsSyThPk/fvj4gXImJARHwSqCMZIlos6eb0vehdilqLVu6xKT/8aMmDgvF1krHcvwBvAA+RXNXzcLrsCODJdNkqkiGdndJlU4Dx6XQFySfYWcAO9V7rOtLx3Xrt/Ul2zJ3Txy+BV0jG46cV9Bua1vA6sAT4XNq+F/BoWtu9wNVsfo6gfcF6dgJ+l67n7yRHKQF8NF3eEbgSeJHkEtfZQMeC559HM+cdWvD+XwJ8u15bb2BevTal72nn9L3/MMkJ8IvL/TdU5Hb2ZNNzBM8C3dLpbsCz6fT1wPCG+hW03UZyov/i9D34MDC13NtY+FBaqJm1UZJGAKMi4jNb+PxdI2KlpD1Jdu6HAB3StgqSYP1TRNxY8JyvkJxEv1LSb4HvkuxcvxgRp2/dFmVPUk/gnojom85fBqyOiJ9IGgN0jogfSDqK5MhyCHAwcHVE9C9Yz2HAf0fE6ZKuIDmZviztt91cxusvq5i1Yem4/reBn2/Fau5ML7V9B/hORKxNLyn9Trp8GvC/9V7zqySX8EJyXmUGyfDRiVtRR0lI+jUwAOgiaQXwI+AnwO2STiU5Kvty2n0GSQgsIRkC+lrBekRyNDYsbZpMcnTUnoJhzO1BZkcEkm4EjgZWbkjVessFXEXyJtaRXFb1eCbFmOWQpM+R7KT/ABwXyVU/ZpvJ8mTxFGBwE8s/TzJu1gsYRXIFiJltIxExMyJ2jIhjHQLWlMyCIBq+FrdQY9ffmplZCZXzHEFj19++XL+jpFEkRw3suOOOB/bp06d+FzMza8Jjjz22KiK6NrSsVZwsjojJJCdaqKmpiQULFpS5IjNrLXqOadGtlbZry35y1BY/V9LfG1tWziB4kfe/jQjJFy5eLFMtZm1aW9kZbs2O0BpXzm8WTwdGpHfvOwT4V7z/FW4zMyuRzI4IGrkWtxIgIq6jietvzcysdDILgogY3szyAL7TVB8zM8uebzpnVoSrrrqKvn37st9++3HllVcCcPbZZ9OnTx/69evH0KFDWbt2LQBz5syhX79+1NTU8NxzzwGwdu1aBg0axHvvbZc3n7SccxCYNWPRokXccMMNzJs3jyeeeIJ77rmHJUuWMHDgQBYtWsSTTz5J7969ufTSSwGYOHEiM2bM4Morr+S665KfOxg/fjznnHMOFRX+J2fbH/9VmjVj8eLFHHzwwVRVVdG+fXsOO+wwpk2bxqBBg2jfPhldPeSQQ1ixYgUAlZWV1NXVUVdXR2VlJUuXLmX58uUMGDCgjFth1rhW8T0Cs3Lq27cv5557LqtXr6Zjx47MmDGDmpqaTfrceOONDBuW3Fts7NixjBgxgo4dO3LzzTdz1llnMX78+HKUblYUB4FZM/bZZx9++MMfMmjQIHbccUeqq6tp167dxuUXX3wx7du356STTgKgurqauXPnAjB79my6detGRDBs2DAqKyuZOHEiu+3W6n+fxdoQDw2ZFeHUU0/lscceY/bs2eyyyy707p38wNSUKVO45557mDp1KskNdd8XEYwfP55x48Zx4YUXMmHCBEaOHMnVV19djk0wa5SPCMyKsHLlSnbddVdeeOEFpk2bxty5c7n//vuZMGECDz74IFVVVZs956abbmLIkCF07tyZuro6KioqqKiooK6urgxbYNY4B4FZEY477jhWr15NZWUlkyZNolOnTowePZp169YxcOBAIDlhvOEqobq6OqZMmcKsWbMAOOOMMxgyZAgdOnTglltuKdt2mDXEQWBWhIceemiztiVLljTav6qqitra2o3zhx56KE899VQmtZltLZ8jMDPLOQeBmVnOOQjMzHLO5wgsF9rK/fjB9+S3bc9HBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OcyzQIJA2W9KykJZLGNLB8T0m1kv4s6UlJQ7Ksx8zMNpdZEEhqB0wCPg/sCwyXtG+9bucBt0fEJ4ATgJ9nVY+ZmTUsyyOC/sCSiHg+It4GbgWOrdcngP9Mpz8AvJRhPWZm1oAsg6A7sLxgfkXaVugC4GRJK4AZwGkNrUjSKEkLJC149dVXs6jVzCy3yn2yeDgwJSJ6AEOAmyVtVlNETI6Imoio6dq1a8mLNDNry7IMgheBPQrme6RthU4FbgeIiEeAHYAuGdZkZmb1ZBkE84Fekj4iqQPJyeDp9fq8ABwBIGkfkiDw2I+ZWQllFgQRsR4YDcwEFpNcHfS0pIskHZN2OxMYKekJ4NfAVyMisqrJzMw21z7LlUfEDJKTwIVt5xdMPwN8OssazMysaeU+WWxmZmXmIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8u5TINA0mBJz0paImlMI32+LOkZSU9LuiXLeszMbHPts1qxpHbAJGAgsAKYL2l6RDxT0KcXMBb4dESskbRrVvWYmVnDsjwi6A8siYjnI+Jt4Fbg2Hp9RgKTImINQESszLAe20o9e/bk4x//ONXV1dTU1ABwwQUX0L17d6qrq6murmbGjBkAzJkzh379+lFTU8Nzzz0HwNq1axk0aBDvvfde2bbBzDaX2REB0B1YXjC/Aji4Xp/eAJLmAO2ACyLi/vorkjQKGAWw5557ZlKsFae2tpYuXbps0nb66adz1llnbdI2ceJEZsyYwbJly7juuuuYOHEi48eP55xzzqGiwqemzLYn5f4X2R7oBQwAhgM3SOpUv1NETI6Imoio6dq1a2krtC1SWVlJXV0ddXV1VFZWsnTpUpYvX86AAQPKXZqZ1dNsEEj6gqQtCYwXgT0K5nukbYVWANMj4p2I+BvwV5Jg2C41NDSywcSJE5HEqlWrALjzzjvZb7/9OPTQQ1m9ejUAS5cuZdiwYSWve1uRxKBBgzjwwAOZPHnyxvZrr72Wfv368fWvf501a9YAMHbsWEaMGMGll17K6NGjOffccxk/fny5SjezJhSzgx8GPCdpgqQ+LVj3fKCXpI9I6gCcAEyv1+cukqMBJHUhGSp6vgWvUXK1tbUsXLiQBQsWbGxbvnw5s2bN2mTY6pprrmH+/Pl84xvf4JZbkouhzjvvvFa9M3z44Yd5/PHHue+++5g0aRKzZ8/mW9/6FkuXLmXhwoV069aNM888E4Dq6mrmzp1LbW0tzz//PN26dSMiGDZsGCeffDKvvPJKmbfGzDZoNggi4mTgE8BSYIqkRySNkrRzM89bD4wGZgKLgdsj4mlJF0k6Ju02E1gt6RmgFjg7IlZvxfaUxemnn86ECROQtLGtoqKCdevWbRwaeeihh9h9993p1Wu7PeBpVvfu3QHYddddGTp0KPPmzWO33XajXbt2VFRUMHLkSObNm7fJcyKC8ePHM27cOC688EImTJjAyJEjufrqq8uxCWbWgKKGfCLiNeAOkit/ugFDgcclndbM82ZERO+I2DsiLk7bzo+I6el0RMQZEbFvRHw8Im7dqq3JWENDI7/73e/o3r07+++//yZ9x44dy5FHHsndd9/N8OHD+fGPf8y4cePKUfY28eabb/L6669vnJ41axZ9+/bl5Zdf3tjnt7/9LX379t3keTfddBNDhgyhc+fO1NXVUVFRQUVFBXV1dSWt38wa1+xVQ+mn968BHwVuAvpHxEpJVcAzwDXZlrj9ePjhh+nevTsrV65k4MCB9OnTh0suuYRZs2Zt1nfgwIEMHDgQeH9n+Ne//pXLL7+cXXbZhauuuoqqqqpSb8IWe+WVVxg6dCgA69ev58QTT2Tw4MGccsopLFy4EEn07NmT66+/fuNz6urqmDJlysb354wzzmDIkCF06NBh43CZmZVfMZePHgdcERGzCxsjok7SqdmUtX2qPzTy4IMP8re//W3j0cCKFSs44IADmDdvHrvvvjvw/s5w5syZHH300UybNo077riDqVOnMnLkyLJtS0vttddePPHEE5u133zzzY0+p6qqitra2o3zhx56KE899VQm9ZnZlitmaOgCYOPAr6SOknoCRMQD2ZS1/WloaOSggw5i5cqVLFu2jGXLltGjRw8ef/zxjSEAcNlll/Hd736XyspK3nrrLSR5aMTMtivFHBH8BvhUwfy7adtBmVS0nWpsaKQpL730EvPmzeNHP/oRAKeddhoHHXQQnTp14q677sq6ZDOzohQTBO3TW0QAEBFvp5eD5kpjQyOFli1btsn8hz70Ie69996N88cffzzHH398FuWZmW2xYoLgVUnHbLjSR9KxwKpsy7Is9Bxzb/OdWoFlPzmq3CWYtSnFBME3gamSrgVEcv+gEZlWZWZmJdNsEETEUuAQSTul829kXpWZmZVMUXcflXQUsB+ww4Zvz0bERRnWlYm2MjQCHh4xs22nmJvOXUdyv6HTSIaGjgc+nHFdZmZWIsV8j+BTETECWBMRFwKfJP0dATMza/2KCYJ/p/+tk/Qh4B2S+w2ZmVkbUMw5grvTH4u5DHgcCOCGLIsyM7PSaTII0h+keSAi1gJ3SroH2CEi/lWK4szMLHtNDg1FxHvApIL5dQ4BM7O2pZhzBA9IOk6Fv7piZmZtRjFB8A2Sm8ytk/SapNclvZZxXWZmViLFfLO4yZ+kNDOz1q2YXyj7bEPt9X+oxszMWqdiLh89u2B6B6A/8BhweCYVmZlZSRUzNPSFwnlJewBXZlWQmZmVVjEni+tbAeyzrQsxM7PyKOYcwTUk3yaGJDiqSb5hbGZmbUAx5wgWFEyvB34dEXMyqsfMzEqsmCC4A/h3RLwLIKmdpKqIqMu2NDMzK4WivlkMdCyY7wj8IZtyzMys1IoJgh0Kf54yna7KriQzMyulYoLgTUkHbJiRdCDwVnYlmZlZKRVzjuD7wG8kvUTyU5W7k/x0pZmZtQHFfKFsvqQ+wMfSpmcj4p1syzIzs1Ip5sfrvwPsGBGLImIRsJOkb2dfmpmZlUIx5whGpr9QBkBErAFGZlaRmZmVVDFB0K7wR2kktQM6ZFeSmZmVUjEni+8HbpN0fTr/DeC+7EoyM7NSKiYIfgiMAr6Zzj9JcuWQmZm1Ac0ODaU/YP8osIzktwgOBxYXs3JJgyU9K2mJpDFN9DtOUkiqKa5sMzPbVho9IpDUGxiePlYBtwFExH8Vs+L0XMIkYCDJravnS5oeEc/U67cz8D2SsDEzsxJr6ojgLySf/o+OiM9ExDXAuy1Yd39gSUQ8HxFvA7cCxzbQ78fAT4F/t2DdZma2jTQVBF8EXgZqJd0g6QiSbxYXqzuwvGB+Rdq2UXrrij0i4t6mViRplKQFkha8+uqrLSjBzMya02gQRMRdEXEC0AeoJbnVxK6S/kfSoK19YUkVwM+AM5vrGxGTI6ImImq6du26tS9tZmYFijlZ/GZE3JL+dnEP4M8kVxI150Vgj4L5HmnbBjsDfYE/SVoGHAJM9wljM7PSatFvFkfEmvTT+RFFdJ8P9JL0EUkdgBOA6QXr+ldEdImInhHRE5gLHBMRCxpenZmZZWFLfry+KBGxHhgNzCS53PT2iHha0kWSjsnqdc3MrGWK+ULZFouIGcCMem3nN9J3QJa1mJlZwzI7IjAzs9bBQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpafIekZSU9KekDSh7Osx8zMNpdZEEhqB0wCPg/sCwyXtG+9bn8GaiKiH3AHMCGreszMrGFZHhH0B5ZExPMR8TZwK3BsYYeIqI2IunR2LtAjw3rMzKwBWQZBd2B5wfyKtK0xpwL3NbRA0ihJCyQtePXVV7dhiWZmtl2cLJZ0MlADXNbQ8oiYHBE1EVHTtWvX0hZnZtbGtc9w3S8CexTM90jbNiHpSOBc4LCIWJdhPWZm1oAsjwjmA70kfURSB+AEYHphB0mfAK4HjomIlRnWYmZmjcgsCCJiPTAamAksBm6PiKclXSTpmLTbZcBOwG8kLZQ0vZHVmZlZRrIcGiIiZgAz6rWdXzB9ZJavb2ZmzdsuThabmVn5OAjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpb/h6Tb0uWPSuqZZT1mZra5zIJAUjtgEvB5YF9guKR963U7FVgTER8FrgB+mlU9ZmbWsCyPCPoDSyLi+Yh4G7gVOLZen2OBX6bTdwBHSFKGNZmZWT2KiGxWLH0JGBwR/yedPwU4OCJGF/RZlPZZkc4vTfusqreuUcCodPZjwLOZFL3tdAFWNdurbfK251eet781bPuHI6JrQwval7qSLRERk4HJ5a6jWJIWRERNuesoB297Prcd8r39rX3bsxwaehHYo2C+R9rWYB9J7YEPAKszrMnMzOrJMgjmA70kfURSB+AEYHq9PtOBr6TTXwL+GFmNVZmZWYMyGxqKiPWSRgMzgXbAjRHxtKSLgAURMR34BXCzpCXAP0nCoi1oNcNYGfC251eet79Vb3tmJ4vNzKx18DeLzcxyzkFgZpZzDoJtqLlbarRlkm6UtDL9bkiuSNpDUq2kZyQ9Lel75a6pVCTtIGmepCfSbb+w3DWVg6R2kv4s6Z5y17IlHATbSJG31GjLpgCDy11EmawHzoyIfYFDgO/k6P/9OuDwiNgfqAYGSzqkvCWVxfeAxeUuYks5CLadYm6p0WZFxGySK79yJyJejojH0+nXSXYI3ctbVWlE4o10tjJ95OoKFEk9gKOA/1vuWraUg2Db6Q4sL5hfQU52Bva+9A66nwAeLXMpJZMOiywEVgK/j4jcbHvqSuAHwHtlrmOLOQjMthFJOwF3At+PiNfKXU+pRMS7EVFNcveA/pL6lrmkkpF0NLAyIh4rdy1bw0Gw7RRzSw1royRVkoTA1IiYVu56yiEi1gK15Otc0aeBYyQtIxkOPlzSr8pbUss5CLadYm6pYW1Qeuv0XwCLI+Jn5a6nlCR1ldQpne4IDAT+UtaiSigixkZEj4joSfJv/o8RcXKZy2oxB8E2EhHrgQ231FgM3B4RT5e3qtKR9GvgEeBjklZIOrXcNZXQp4FTSD4NLkwfQ8pdVIl0A2olPUnyYej3EdEqL6HMM99iwsws53xEYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOdcqfrzerJwkfRB4IJ3dHXgXeDWd75/eW6qp538VqImI0ZkVabYVHARmzYiI1SR31kTSBcAbEXF5OWsy25Y8NGS2BSSNlDQ/vQ//nZKq0vbjJS1K22c38LyjJD0iqUvpqzZrmIPAbMtMi4iD0vvwLwY2fJP6fOBzafsxhU+QNBQYAwyJiFUlrdasCR4aMtsyfSWNBzoBO5HcWgRgDjBF0u1A4c3nDgdqgEF5ujOptQ4+IjDbMlOA0RHxceBCYAeAiPgmcB7JnWgfS080AywFdgZ6l75Us6Y5CMy2zM7Ay+ntp0/a0Chp74h4NCLOJ7myaMOtyf8OHAfcJGm/kldr1gQHgdmWGUfyK2Rz2PS2y5dJekrSIuD/AU9sWBARfyEJjd9I2ruUxZo1xXcfNTPLOR8RmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZz/x/jOYg2+yx1FwAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     }
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"277.314375pt\" version=\"1.1\" viewBox=\"0 0 385.78125 277.314375\" width=\"385.78125pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <metadata>\n  <rdf:RDF xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n   <cc:Work>\n    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n    <dc:date>2021-02-25T17:30:07.601652</dc:date>\n    <dc:format>image/svg+xml</dc:format>\n    <dc:creator>\n     <cc:Agent>\n      <dc:title>Matplotlib v3.3.4, https://matplotlib.org/</dc:title>\n     </cc:Agent>\n    </dc:creator>\n   </cc:Work>\n  </rdf:RDF>\n </metadata>\n <defs>\n  <style type=\"text/css\">*{stroke-linecap:butt;stroke-linejoin:round;}</style>\n </defs>\n <g id=\"figure_1\">\n  <g id=\"patch_1\">\n   <path d=\"M 0 277.314375 \nL 385.78125 277.314375 \nL 385.78125 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n  </g>\n  <g id=\"axes_1\">\n   <g id=\"patch_2\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \nL 378.58125 22.318125 \nL 43.78125 22.318125 \nz\n\" style=\"fill:#ffffff;\"/>\n   </g>\n   <g id=\"patch_3\">\n    <path clip-path=\"url(#p5b6ae91fee)\" d=\"M 58.999432 239.758125 \nL 109.726705 239.758125 \nL 109.726705 28.674766 \nL 58.999432 28.674766 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_4\">\n    <path clip-path=\"url(#p5b6ae91fee)\" d=\"M 122.408523 239.758125 \nL 173.135795 239.758125 \nL 173.135795 85.738197 \nL 122.408523 85.738197 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_5\">\n    <path clip-path=\"url(#p5b6ae91fee)\" d=\"M 185.817614 239.758125 \nL 236.544886 239.758125 \nL 236.544886 48.402227 \nL 185.817614 48.402227 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_6\">\n    <path clip-path=\"url(#p5b6ae91fee)\" d=\"M 249.226705 239.758125 \nL 299.953977 239.758125 \nL 299.953977 24.583197 \nL 249.226705 24.583197 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"patch_7\">\n    <path clip-path=\"url(#p5b6ae91fee)\" d=\"M 312.635795 239.758125 \nL 363.363068 239.758125 \nL 363.363068 25.934805 \nL 312.635795 25.934805 \nz\n\" style=\"fill:#1f77b4;\"/>\n   </g>\n   <g id=\"matplotlib.axis_1\">\n    <g id=\"xtick_1\">\n     <g id=\"line2d_1\">\n      <defs>\n       <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m80570c8eec\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"84.363068\" xlink:href=\"#m80570c8eec\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_1\">\n      <!-- 0 -->\n      <g transform=\"translate(81.181818 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_2\">\n     <g id=\"line2d_2\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"147.772159\" xlink:href=\"#m80570c8eec\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_2\">\n      <!-- 1 -->\n      <g transform=\"translate(144.590909 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 12.40625 8.296875 \nL 28.515625 8.296875 \nL 28.515625 63.921875 \nL 10.984375 60.40625 \nL 10.984375 69.390625 \nL 28.421875 72.90625 \nL 38.28125 72.90625 \nL 38.28125 8.296875 \nL 54.390625 8.296875 \nL 54.390625 0 \nL 12.40625 0 \nz\n\" id=\"DejaVuSans-49\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-49\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_3\">\n     <g id=\"line2d_3\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"211.18125\" xlink:href=\"#m80570c8eec\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_3\">\n      <!-- 2 -->\n      <g transform=\"translate(208 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 19.1875 8.296875 \nL 53.609375 8.296875 \nL 53.609375 0 \nL 7.328125 0 \nL 7.328125 8.296875 \nQ 12.9375 14.109375 22.625 23.890625 \nQ 32.328125 33.6875 34.8125 36.53125 \nQ 39.546875 41.84375 41.421875 45.53125 \nQ 43.3125 49.21875 43.3125 52.78125 \nQ 43.3125 58.59375 39.234375 62.25 \nQ 35.15625 65.921875 28.609375 65.921875 \nQ 23.96875 65.921875 18.8125 64.3125 \nQ 13.671875 62.703125 7.8125 59.421875 \nL 7.8125 69.390625 \nQ 13.765625 71.78125 18.9375 73 \nQ 24.125 74.21875 28.421875 74.21875 \nQ 39.75 74.21875 46.484375 68.546875 \nQ 53.21875 62.890625 53.21875 53.421875 \nQ 53.21875 48.921875 51.53125 44.890625 \nQ 49.859375 40.875 45.40625 35.40625 \nQ 44.1875 33.984375 37.640625 27.21875 \nQ 31.109375 20.453125 19.1875 8.296875 \nz\n\" id=\"DejaVuSans-50\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_4\">\n     <g id=\"line2d_4\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"274.590341\" xlink:href=\"#m80570c8eec\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_4\">\n      <!-- 3 -->\n      <g transform=\"translate(271.409091 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 40.578125 39.3125 \nQ 47.65625 37.796875 51.625 33 \nQ 55.609375 28.21875 55.609375 21.1875 \nQ 55.609375 10.40625 48.1875 4.484375 \nQ 40.765625 -1.421875 27.09375 -1.421875 \nQ 22.515625 -1.421875 17.65625 -0.515625 \nQ 12.796875 0.390625 7.625 2.203125 \nL 7.625 11.71875 \nQ 11.71875 9.328125 16.59375 8.109375 \nQ 21.484375 6.890625 26.8125 6.890625 \nQ 36.078125 6.890625 40.9375 10.546875 \nQ 45.796875 14.203125 45.796875 21.1875 \nQ 45.796875 27.640625 41.28125 31.265625 \nQ 36.765625 34.90625 28.71875 34.90625 \nL 20.21875 34.90625 \nL 20.21875 43.015625 \nL 29.109375 43.015625 \nQ 36.375 43.015625 40.234375 45.921875 \nQ 44.09375 48.828125 44.09375 54.296875 \nQ 44.09375 59.90625 40.109375 62.90625 \nQ 36.140625 65.921875 28.71875 65.921875 \nQ 24.65625 65.921875 20.015625 65.03125 \nQ 15.375 64.15625 9.8125 62.3125 \nL 9.8125 71.09375 \nQ 15.4375 72.65625 20.34375 73.4375 \nQ 25.25 74.21875 29.59375 74.21875 \nQ 40.828125 74.21875 47.359375 69.109375 \nQ 53.90625 64.015625 53.90625 55.328125 \nQ 53.90625 49.265625 50.4375 45.09375 \nQ 46.96875 40.921875 40.578125 39.3125 \nz\n\" id=\"DejaVuSans-51\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-51\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_5\">\n     <g id=\"line2d_5\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"337.999432\" xlink:href=\"#m80570c8eec\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_5\">\n      <!-- 4 -->\n      <g transform=\"translate(334.818182 254.356562)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 37.796875 64.3125 \nL 12.890625 25.390625 \nL 37.796875 25.390625 \nz\nM 35.203125 72.90625 \nL 47.609375 72.90625 \nL 47.609375 25.390625 \nL 58.015625 25.390625 \nL 58.015625 17.1875 \nL 47.609375 17.1875 \nL 47.609375 0 \nL 37.796875 0 \nL 37.796875 17.1875 \nL 4.890625 17.1875 \nL 4.890625 26.703125 \nz\n\" id=\"DejaVuSans-52\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_6\">\n     <!-- Task -->\n     <g transform=\"translate(200.388281 268.034687)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M -0.296875 72.90625 \nL 61.375 72.90625 \nL 61.375 64.59375 \nL 35.5 64.59375 \nL 35.5 0 \nL 25.59375 0 \nL 25.59375 64.59375 \nL -0.296875 64.59375 \nz\n\" id=\"DejaVuSans-84\"/>\n       <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n       <path d=\"M 44.28125 53.078125 \nL 44.28125 44.578125 \nQ 40.484375 46.53125 36.375 47.5 \nQ 32.28125 48.484375 27.875 48.484375 \nQ 21.1875 48.484375 17.84375 46.4375 \nQ 14.5 44.390625 14.5 40.28125 \nQ 14.5 37.15625 16.890625 35.375 \nQ 19.28125 33.59375 26.515625 31.984375 \nL 29.59375 31.296875 \nQ 39.15625 29.25 43.1875 25.515625 \nQ 47.21875 21.78125 47.21875 15.09375 \nQ 47.21875 7.46875 41.1875 3.015625 \nQ 35.15625 -1.421875 24.609375 -1.421875 \nQ 20.21875 -1.421875 15.453125 -0.5625 \nQ 10.6875 0.296875 5.421875 2 \nL 5.421875 11.28125 \nQ 10.40625 8.6875 15.234375 7.390625 \nQ 20.0625 6.109375 24.8125 6.109375 \nQ 31.15625 6.109375 34.5625 8.28125 \nQ 37.984375 10.453125 37.984375 14.40625 \nQ 37.984375 18.0625 35.515625 20.015625 \nQ 33.0625 21.96875 24.703125 23.78125 \nL 21.578125 24.515625 \nQ 13.234375 26.265625 9.515625 29.90625 \nQ 5.8125 33.546875 5.8125 39.890625 \nQ 5.8125 47.609375 11.28125 51.796875 \nQ 16.75 56 26.8125 56 \nQ 31.78125 56 36.171875 55.265625 \nQ 40.578125 54.546875 44.28125 53.078125 \nz\n\" id=\"DejaVuSans-115\"/>\n       <path d=\"M 9.078125 75.984375 \nL 18.109375 75.984375 \nL 18.109375 31.109375 \nL 44.921875 54.6875 \nL 56.390625 54.6875 \nL 27.390625 29.109375 \nL 57.625 0 \nL 45.90625 0 \nL 18.109375 26.703125 \nL 18.109375 0 \nL 9.078125 0 \nz\n\" id=\"DejaVuSans-107\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-84\"/>\n      <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n      <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"matplotlib.axis_2\">\n    <g id=\"ytick_1\">\n     <g id=\"line2d_6\">\n      <defs>\n       <path d=\"M 0 0 \nL -3.5 0 \n\" id=\"m2a0ac35f9d\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m2a0ac35f9d\" y=\"239.758125\"/>\n      </g>\n     </g>\n     <g id=\"text_7\">\n      <!-- 0.0 -->\n      <g transform=\"translate(20.878125 243.557344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 10.6875 12.40625 \nL 21 12.40625 \nL 21 0 \nL 10.6875 0 \nz\n\" id=\"DejaVuSans-46\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_2\">\n     <g id=\"line2d_7\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m2a0ac35f9d\" y=\"196.270125\"/>\n      </g>\n     </g>\n     <g id=\"text_8\">\n      <!-- 0.2 -->\n      <g transform=\"translate(20.878125 200.069344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-50\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_3\">\n     <g id=\"line2d_8\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m2a0ac35f9d\" y=\"152.782125\"/>\n      </g>\n     </g>\n     <g id=\"text_9\">\n      <!-- 0.4 -->\n      <g transform=\"translate(20.878125 156.581344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-52\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_4\">\n     <g id=\"line2d_9\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m2a0ac35f9d\" y=\"109.294125\"/>\n      </g>\n     </g>\n     <g id=\"text_10\">\n      <!-- 0.6 -->\n      <g transform=\"translate(20.878125 113.093344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 33.015625 40.375 \nQ 26.375 40.375 22.484375 35.828125 \nQ 18.609375 31.296875 18.609375 23.390625 \nQ 18.609375 15.53125 22.484375 10.953125 \nQ 26.375 6.390625 33.015625 6.390625 \nQ 39.65625 6.390625 43.53125 10.953125 \nQ 47.40625 15.53125 47.40625 23.390625 \nQ 47.40625 31.296875 43.53125 35.828125 \nQ 39.65625 40.375 33.015625 40.375 \nz\nM 52.59375 71.296875 \nL 52.59375 62.3125 \nQ 48.875 64.0625 45.09375 64.984375 \nQ 41.3125 65.921875 37.59375 65.921875 \nQ 27.828125 65.921875 22.671875 59.328125 \nQ 17.53125 52.734375 16.796875 39.40625 \nQ 19.671875 43.65625 24.015625 45.921875 \nQ 28.375 48.1875 33.59375 48.1875 \nQ 44.578125 48.1875 50.953125 41.515625 \nQ 57.328125 34.859375 57.328125 23.390625 \nQ 57.328125 12.15625 50.6875 5.359375 \nQ 44.046875 -1.421875 33.015625 -1.421875 \nQ 20.359375 -1.421875 13.671875 8.265625 \nQ 6.984375 17.96875 6.984375 36.375 \nQ 6.984375 53.65625 15.1875 63.9375 \nQ 23.390625 74.21875 37.203125 74.21875 \nQ 40.921875 74.21875 44.703125 73.484375 \nQ 48.484375 72.75 52.59375 71.296875 \nz\n\" id=\"DejaVuSans-54\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-54\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_5\">\n     <g id=\"line2d_10\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m2a0ac35f9d\" y=\"65.806125\"/>\n      </g>\n     </g>\n     <g id=\"text_11\">\n      <!-- 0.8 -->\n      <g transform=\"translate(20.878125 69.605344)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 34.625 \nQ 24.75 34.625 20.71875 30.859375 \nQ 16.703125 27.09375 16.703125 20.515625 \nQ 16.703125 13.921875 20.71875 10.15625 \nQ 24.75 6.390625 31.78125 6.390625 \nQ 38.8125 6.390625 42.859375 10.171875 \nQ 46.921875 13.96875 46.921875 20.515625 \nQ 46.921875 27.09375 42.890625 30.859375 \nQ 38.875 34.625 31.78125 34.625 \nz\nM 21.921875 38.8125 \nQ 15.578125 40.375 12.03125 44.71875 \nQ 8.5 49.078125 8.5 55.328125 \nQ 8.5 64.0625 14.71875 69.140625 \nQ 20.953125 74.21875 31.78125 74.21875 \nQ 42.671875 74.21875 48.875 69.140625 \nQ 55.078125 64.0625 55.078125 55.328125 \nQ 55.078125 49.078125 51.53125 44.71875 \nQ 48 40.375 41.703125 38.8125 \nQ 48.828125 37.15625 52.796875 32.3125 \nQ 56.78125 27.484375 56.78125 20.515625 \nQ 56.78125 9.90625 50.3125 4.234375 \nQ 43.84375 -1.421875 31.78125 -1.421875 \nQ 19.734375 -1.421875 13.25 4.234375 \nQ 6.78125 9.90625 6.78125 20.515625 \nQ 6.78125 27.484375 10.78125 32.3125 \nQ 14.796875 37.15625 21.921875 38.8125 \nz\nM 18.3125 54.390625 \nQ 18.3125 48.734375 21.84375 45.5625 \nQ 25.390625 42.390625 31.78125 42.390625 \nQ 38.140625 42.390625 41.71875 45.5625 \nQ 45.3125 48.734375 45.3125 54.390625 \nQ 45.3125 60.0625 41.71875 63.234375 \nQ 38.140625 66.40625 31.78125 66.40625 \nQ 25.390625 66.40625 21.84375 63.234375 \nQ 18.3125 60.0625 18.3125 54.390625 \nz\n\" id=\"DejaVuSans-56\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-56\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_6\">\n     <g id=\"line2d_11\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"43.78125\" xlink:href=\"#m2a0ac35f9d\" y=\"22.318125\"/>\n      </g>\n     </g>\n     <g id=\"text_12\">\n      <!-- 1.0 -->\n      <g transform=\"translate(20.878125 26.117344)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-49\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_13\">\n     <!-- Accuracy -->\n     <g transform=\"translate(14.798438 153.86625)rotate(-90)scale(0.1 -0.1)\">\n      <defs>\n       <path d=\"M 34.1875 63.1875 \nL 20.796875 26.90625 \nL 47.609375 26.90625 \nz\nM 28.609375 72.90625 \nL 39.796875 72.90625 \nL 67.578125 0 \nL 57.328125 0 \nL 50.6875 18.703125 \nL 17.828125 18.703125 \nL 11.1875 0 \nL 0.78125 0 \nz\n\" id=\"DejaVuSans-65\"/>\n       <path d=\"M 48.78125 52.59375 \nL 48.78125 44.1875 \nQ 44.96875 46.296875 41.140625 47.34375 \nQ 37.3125 48.390625 33.40625 48.390625 \nQ 24.65625 48.390625 19.8125 42.84375 \nQ 14.984375 37.3125 14.984375 27.296875 \nQ 14.984375 17.28125 19.8125 11.734375 \nQ 24.65625 6.203125 33.40625 6.203125 \nQ 37.3125 6.203125 41.140625 7.25 \nQ 44.96875 8.296875 48.78125 10.40625 \nL 48.78125 2.09375 \nQ 45.015625 0.34375 40.984375 -0.53125 \nQ 36.96875 -1.421875 32.421875 -1.421875 \nQ 20.0625 -1.421875 12.78125 6.34375 \nQ 5.515625 14.109375 5.515625 27.296875 \nQ 5.515625 40.671875 12.859375 48.328125 \nQ 20.21875 56 33.015625 56 \nQ 37.15625 56 41.109375 55.140625 \nQ 45.0625 54.296875 48.78125 52.59375 \nz\n\" id=\"DejaVuSans-99\"/>\n       <path d=\"M 8.5 21.578125 \nL 8.5 54.6875 \nL 17.484375 54.6875 \nL 17.484375 21.921875 \nQ 17.484375 14.15625 20.5 10.265625 \nQ 23.53125 6.390625 29.59375 6.390625 \nQ 36.859375 6.390625 41.078125 11.03125 \nQ 45.3125 15.671875 45.3125 23.6875 \nL 45.3125 54.6875 \nL 54.296875 54.6875 \nL 54.296875 0 \nL 45.3125 0 \nL 45.3125 8.40625 \nQ 42.046875 3.421875 37.71875 1 \nQ 33.40625 -1.421875 27.6875 -1.421875 \nQ 18.265625 -1.421875 13.375 4.4375 \nQ 8.5 10.296875 8.5 21.578125 \nz\nM 31.109375 56 \nz\n\" id=\"DejaVuSans-117\"/>\n       <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n       <path d=\"M 32.171875 -5.078125 \nQ 28.375 -14.84375 24.75 -17.8125 \nQ 21.140625 -20.796875 15.09375 -20.796875 \nL 7.90625 -20.796875 \nL 7.90625 -13.28125 \nL 13.1875 -13.28125 \nQ 16.890625 -13.28125 18.9375 -11.515625 \nQ 21 -9.765625 23.484375 -3.21875 \nL 25.09375 0.875 \nL 2.984375 54.6875 \nL 12.5 54.6875 \nL 29.59375 11.921875 \nL 46.6875 54.6875 \nL 56.203125 54.6875 \nz\n\" id=\"DejaVuSans-121\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-65\"/>\n      <use x=\"66.658203\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"121.638672\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"176.619141\" xlink:href=\"#DejaVuSans-117\"/>\n      <use x=\"239.998047\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"281.111328\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"342.390625\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"397.371094\" xlink:href=\"#DejaVuSans-121\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"patch_8\">\n    <path d=\"M 43.78125 239.758125 \nL 43.78125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_9\">\n    <path d=\"M 378.58125 239.758125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_10\">\n    <path d=\"M 43.78125 239.758125 \nL 378.58125 239.758125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_11\">\n    <path d=\"M 43.78125 22.318125 \nL 378.58125 22.318125 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"text_14\">\n    <!-- 97% -->\n    <g transform=\"translate(73.249787 23.595078)scale(0.1 -0.1)\">\n     <defs>\n      <path d=\"M 10.984375 1.515625 \nL 10.984375 10.5 \nQ 14.703125 8.734375 18.5 7.8125 \nQ 22.3125 6.890625 25.984375 6.890625 \nQ 35.75 6.890625 40.890625 13.453125 \nQ 46.046875 20.015625 46.78125 33.40625 \nQ 43.953125 29.203125 39.59375 26.953125 \nQ 35.25 24.703125 29.984375 24.703125 \nQ 19.046875 24.703125 12.671875 31.3125 \nQ 6.296875 37.9375 6.296875 49.421875 \nQ 6.296875 60.640625 12.9375 67.421875 \nQ 19.578125 74.21875 30.609375 74.21875 \nQ 43.265625 74.21875 49.921875 64.515625 \nQ 56.59375 54.828125 56.59375 36.375 \nQ 56.59375 19.140625 48.40625 8.859375 \nQ 40.234375 -1.421875 26.421875 -1.421875 \nQ 22.703125 -1.421875 18.890625 -0.6875 \nQ 15.09375 0.046875 10.984375 1.515625 \nz\nM 30.609375 32.421875 \nQ 37.25 32.421875 41.125 36.953125 \nQ 45.015625 41.5 45.015625 49.421875 \nQ 45.015625 57.28125 41.125 61.84375 \nQ 37.25 66.40625 30.609375 66.40625 \nQ 23.96875 66.40625 20.09375 61.84375 \nQ 16.21875 57.28125 16.21875 49.421875 \nQ 16.21875 41.5 20.09375 36.953125 \nQ 23.96875 32.421875 30.609375 32.421875 \nz\n\" id=\"DejaVuSans-57\"/>\n      <path d=\"M 8.203125 72.90625 \nL 55.078125 72.90625 \nL 55.078125 68.703125 \nL 28.609375 0 \nL 18.3125 0 \nL 43.21875 64.59375 \nL 8.203125 64.59375 \nz\n\" id=\"DejaVuSans-55\"/>\n      <path d=\"M 72.703125 32.078125 \nQ 68.453125 32.078125 66.03125 28.46875 \nQ 63.625 24.859375 63.625 18.40625 \nQ 63.625 12.0625 66.03125 8.421875 \nQ 68.453125 4.78125 72.703125 4.78125 \nQ 76.859375 4.78125 79.265625 8.421875 \nQ 81.6875 12.0625 81.6875 18.40625 \nQ 81.6875 24.8125 79.265625 28.4375 \nQ 76.859375 32.078125 72.703125 32.078125 \nz\nM 72.703125 38.28125 \nQ 80.421875 38.28125 84.953125 32.90625 \nQ 89.5 27.546875 89.5 18.40625 \nQ 89.5 9.28125 84.9375 3.921875 \nQ 80.375 -1.421875 72.703125 -1.421875 \nQ 64.890625 -1.421875 60.34375 3.921875 \nQ 55.8125 9.28125 55.8125 18.40625 \nQ 55.8125 27.59375 60.375 32.9375 \nQ 64.9375 38.28125 72.703125 38.28125 \nz\nM 22.3125 68.015625 \nQ 18.109375 68.015625 15.6875 64.375 \nQ 13.28125 60.75 13.28125 54.390625 \nQ 13.28125 47.953125 15.671875 44.328125 \nQ 18.0625 40.71875 22.3125 40.71875 \nQ 26.5625 40.71875 28.96875 44.328125 \nQ 31.390625 47.953125 31.390625 54.390625 \nQ 31.390625 60.6875 28.953125 64.34375 \nQ 26.515625 68.015625 22.3125 68.015625 \nz\nM 66.40625 74.21875 \nL 74.21875 74.21875 \nL 28.609375 -1.421875 \nL 20.796875 -1.421875 \nz\nM 22.3125 74.21875 \nQ 30.03125 74.21875 34.609375 68.875 \nQ 39.203125 63.53125 39.203125 54.390625 \nQ 39.203125 45.171875 34.640625 39.84375 \nQ 30.078125 34.515625 22.3125 34.515625 \nQ 14.546875 34.515625 10.03125 39.859375 \nQ 5.515625 45.21875 5.515625 54.390625 \nQ 5.515625 63.484375 10.046875 68.84375 \nQ 14.59375 74.21875 22.3125 74.21875 \nz\n\" id=\"DejaVuSans-37\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-55\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_15\">\n    <!-- 71% -->\n    <g transform=\"translate(136.658878 80.65851)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-55\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-49\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_16\">\n    <!-- 88% -->\n    <g transform=\"translate(200.067969 43.32254)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-56\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-56\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_17\">\n    <!-- 99% -->\n    <g transform=\"translate(263.47706 19.50351)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_18\">\n    <!-- 98% -->\n    <g transform=\"translate(326.886151 20.855117)scale(0.1 -0.1)\">\n     <use xlink:href=\"#DejaVuSans-57\"/>\n     <use x=\"63.623047\" xlink:href=\"#DejaVuSans-56\"/>\n     <use x=\"127.246094\" xlink:href=\"#DejaVuSans-37\"/>\n    </g>\n   </g>\n   <g id=\"text_19\">\n    <!-- Task Accuracy -->\n    <g transform=\"translate(168.929063 16.318125)scale(0.12 -0.12)\">\n     <defs>\n      <path id=\"DejaVuSans-32\"/>\n     </defs>\n     <use xlink:href=\"#DejaVuSans-84\"/>\n     <use x=\"44.583984\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"105.863281\" xlink:href=\"#DejaVuSans-115\"/>\n     <use x=\"157.962891\" xlink:href=\"#DejaVuSans-107\"/>\n     <use x=\"215.873047\" xlink:href=\"#DejaVuSans-32\"/>\n     <use x=\"247.660156\" xlink:href=\"#DejaVuSans-65\"/>\n     <use x=\"314.318359\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"369.298828\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"424.279297\" xlink:href=\"#DejaVuSans-117\"/>\n     <use x=\"487.658203\" xlink:href=\"#DejaVuSans-114\"/>\n     <use x=\"528.771484\" xlink:href=\"#DejaVuSans-97\"/>\n     <use x=\"590.050781\" xlink:href=\"#DejaVuSans-99\"/>\n     <use x=\"645.03125\" xlink:href=\"#DejaVuSans-121\"/>\n    </g>\n   </g>\n  </g>\n </g>\n <defs>\n  <clipPath id=\"p5b6ae91fee\">\n   <rect height=\"217.44\" width=\"334.8\" x=\"43.78125\" y=\"22.318125\"/>\n  </clipPath>\n </defs>\n</svg>\n",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcv0lEQVR4nO3de7xUdb3/8dd76ybBS0RCyUUxDyoXE3GHpNmxLNJtiYimmFodf2IXTEXzaL/0qGEXO4QHo6NmHryDphUZikSURxJ1k4ggoWgkFwskhGRUbp/zx1rosNmX2ciaYe/1fj4e83DWmu+s9VkI857v97vWGkUEZmaWX1WVLsDMzCrLQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnILDckTRB0uhK12G2s3AQ2E5P0utFj82S3iha/kKZapggaaOkfcqxP7NychDYTi8i9tjyAF4GPle07q6s9y9pd2AYsAY4M+v91dv3ruXcn+WTg8BaLUkDJT0u6TVJr0j6saR26WuSNFbSCklrJT0rqV8D29hT0gxJ4ySpkV0NA14DrgG+WO/9nST9j6TlklZL+mXRa0MkzUn3/6Kk49L1iyV9qqjdVZLuTJ/3lBSSzpH0MvC7dP19kv4maY2kRyX1LXp/e0ljJP01ff2xdN1vJJ1fr965koa24I/ZcsBBYK3ZJuAiYG/go8CxwNfS1wYDHwcOBN4LfB5YVfxmSe8HpgMzI+Ib0fj9Vr4I3ANMBA6WdHjRa3cAHYC+QBdgbLrtgcDtwDeBjmkti1twbP8K9AY+ky4/BPRK9/EnoLgn9J/A4cCRQCfgUmAzcBtFPRhJhwLdgN+0oA7LAQeBtVoRMTsiZkXExohYDNxE8gEKsAHYEzgYUEQsiIhXit7eFfgDcF9EfLuxfUjaF/gEcHdE/J0kOM5OX9sHOB74SkSsjogNEfGH9K3nALdGxLSI2BwRyyLizy04vKsiYl1EvJEe660R8c+IeAu4CjhU0nslVQH/BlyQ7mNTRPwxbTcZOFBSr3SbZwGTImJ9C+pA0gWS5kmaL+nCdN2haW/sWUm/lrRXuv6otNdRt2W/kjpKeiSt1XZC/h9jrZakAyU9mA6ZrAW+S9I7ICJ+B/wYGA+skHTzlg+r1AlAe+DGZnZzFrAgIuaky3cBZ0iqBnoA/4iI1Q28rwfw4nYeGsCSLU8k7SLp++nw0lre6VnsnT52a2hfEfEmMAk4M/0QHk7SgylZOpx2LjAQOBT4rKR/AW4BLouIQ4BfkPR8AC4GaoELga+k674NfDciNrdk31Y+DgJrzf4b+DPQKyL2Ar4FvD3OHxHjIuJwoA/JENE3i977U+BhYEo6GdyYs4EPpWHzN+BHJB++tSQf1p0kdWzgfUuAAxrZ5jqS4aQtPthAm+JhqjOAIcCnSIa5eqbrBbwKvNnEvm4DvkAybFaIiMcbadeY3sATEVGIiI0kvaiTSf48H03bTCOZR4GkJ9YhfWyQdADQIyJ+38L97jQa6RH1lzQrnQOqS4cCkTQsbfe/6dAjkg6QNKmCh9AsB4G1ZnsCa4HXJR0MfHXLC5I+IumI9Jv7OpIPy/rfSEcCC4FfS2pff+OSPkryATsQ6J8++gF3A2enQ00PAT+R9D5J1ZI+nr79Z8CXJR0rqUpSt7RGgDnA6Wn7GuCUEo7zLZI5jg4kPR8A0m/ZtwI/ktQ17T18VNJ70tcfT497DC3sDaTmAUdLer+kDiQB2AOYTxJOAKem6wC+RzI3cjlJj+xakh5Bq9REj+g64OqI6A9cmS4DnA98hGSY8ox03Wh28j8DB4G1ZpeQ/GP7J8k3/OJvXXul61YDfyX5EP1h8ZvTyeERwFLgV5J2q7f9LwK/iohnI+JvWx7Af5F8IHQiGTraQNIzWUEyJEJEPAl8mWTyeA3JN+n90u1eQRIwq4GrSYKlKbenx7AMeA6Y1cCfw7PAU8A/gB+w9b/t24FDgDub2c82ImJBur1HSHpQc0gm6f8N+Jqk2SRBtT5tPyciBkXEJ4APAa+QnMQ1SdKdkj7Q0hoqrLEeUZD8HYOkl7Y8fb4ZeA/v9IiOBv4WES+Ut+wWigg/SnwAF5B8Q5oPXJium0Tyj2MOydjtnHT9UcBcoI5k6AKSs0ceAaoqfSx+5OdBMrz12A7a1neBr9VbdyDwZL11Sv+udyKZV9mPZCL/2kr/ebTweHsDzwPvJ/lwfxy4IV3/MskQ4DJgv7T9p4HZwK9JAuIRoFOlj6O5hy9WKVG9LuJ64GFJD0bEaUVtxpB8+4N3Js16kkyaXYwnzazM0uGcrwE/eRfb6BIRK9IzqE4GBhWtqyL5e11/0v1sYEpE/COtYXP66EArEhELJG3pEa3jnR7RV4GLIuJ+SZ8nGQr8VERMI5kzQdLZwBSSM7cuIekBXhARhfIfSdMyGxqSdKuSi3nmNfK6lFzEsyg93WxAVrXsII11EYHkeEjOVb8nXdXmJs2sdZH0GWAl8HeaH35qyv2SniP5lvv1iHgNGC7peZIhseXA/xTttwPwJZIztiCZYJ8CXE/zZ2ntdCLiZxFxeER8nOTD/HmSYcMH0ib3kXxBfFu9P4Or0/aPkUzc73wy7FJ9HBgAzGvk9VqSiTYBg0g+ZCveRWppF7He8dYVLfcnGcudAXQnuRipV6WPww8//GjZA+iS/ndfkuDrCCwAjknXHwvMrvee/wBOSp8/mn5mnEXSI6j4MdV/ZDY0FBGPSurZRJMhwO2R/EnNSi862Se2vuhnpxGNdxG3GM47vQEiOe98EEB6Jsnbk2YkvYWLI7lAycx2bvenp4JuIO0RSToX+C8l94J6k+SkAwAkdQUGRsTV6aobSCbyXwNOKmfhpVKaWNlsPAmCByOioXu8PAh8PyIeS5enA/8eEXUNtB1B+ge9++67H37wwQfXb1J2y5Yto7q6mi5duhARzJ07l969e9OuXbut2kUEL7zwAh/60IdYsmQJXbt2Zf369axdu5Zu3bpVqHozy5vZs2e/GhGdG3qtVUwWR8TNwM0ANTU1UVe3TVaUxYoVK+jSpQsvv/wygwcPZtasWXTs2JGHH36Y733ve/zhD3/Y5j233XYbq1ev5sILL2To0KGMGzeOxYsX88ADDzB27NgKHIWZ5ZGkvzb2WiWDYBnvXIQCyTj6sgrVUpJhw4axatUqqqurGT9+PB07dgRg4sSJDB8+fJv2hUKBCRMm8MgjjwAwatQoamtradeuHXff/W7m7szMdpxKDg2dQHJlZy1wBDAuIgbWb1dfJXsEZq1Vz8vaxg1HF3//hEqX0GpJmh0RNQ29llmPQNI9wDHA3pKWksyiVwNExI0kp5PVAouAAslVmGZmO1RbCUHILgizPGto27GSrV8P4OtZ7d/MzErjew2ZmeVcqzhraEdxF9HMbFvuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYFaCsWPH0rdvX/r168fw4cN58803mT59OgMGDKB///587GMfY9GiRQDccMMN9OvXj9raWtavXw/AY489xkUXXVTJQzBrlIPArBnLli1j3Lhx1NXVMW/ePDZt2sTEiRP56le/yl133cWcOXM444wzGD16NAB33XUXc+fO5cgjj2Tq1KlEBN/5zne44oorKnwkZg1zEJiVYOPGjbzxxhts3LiRQqFA165dkcTatWsBWLNmDV27dgWS36DYsGEDhUKB6upq7rzzTo4//ng6depUyUMwa1Suriw22x7dunXjkksuYd9996V9+/YMHjyYwYMHc8stt1BbW0v79u3Za6+9mDVrFgAjR45k0KBB9O3bl6OOOoohQ4YwderUCh+FWePcIzBrxurVq/nVr37FX/7yF5YvX866deu48847GTt2LFOmTGHp0qV8+ctfZtSoUQCcddZZPP3002+3+cY3vsFDDz3EKaecwkUXXcTmzZsrfERmW3MQmDXjt7/9Lfvvvz+dO3emurqak08+mZkzZ/LMM89wxBFHAHDaaafxxz/+cav3LV++nCeffJKTTjqJMWPGMGnSJDp27Mj06dMrcRhmjXIQmDVj3333ZdasWRQKBSKC6dOn06dPH9asWcPzzz8PwLRp0+jdu/dW77viiiu45pprAHjjjTeQRFVVFYVCoezHYNYUzxGYNeOII47glFNOYcCAAey6664cdthhjBgxgu7duzNs2DCqqqp43/vex6233vr2e55++mkABgwYAMAZZ5zBIYccQo8ePbj00ksrchxmjcn0pyqz8G5+qtK3oba8ait/97fn731bOXZ4d//um/qpSg8NmZnlnIPAzCznHARmZjnnyWLLBY8TmzXOPQIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOZRoEko6TtFDSIkmXNfD6vpJmSHpa0lxJtVnWY2Zm28osCCTtAowHjgf6AMMl9anX7NvAvRFxGHA68JOs6jEzs4Zl2SMYCCyKiJciYj0wERhSr00Ae6XP3wssz7AeMzNrQJZB0A1YUrS8NF1X7CrgTElLgSnA+Q1tSNIISXWS6lauXJlFrWZmuVXpyeLhwISI6A7UAndI2qamiLg5ImoioqZz585lL9LMrC3LMgiWAT2Klrun64qdA9wLEBGPA7sBe2dYk5mZ1ZNlEDwF9JK0v6R2JJPBk+u1eRk4FkBSb5Ig8NiPmVkZZRYEEbERGAlMBRaQnB00X9I1kk5Mm10MnCvpGeAe4EsREVnVZGZm29o1y41HxBSSSeDidVcWPX8OOCrLGszMrGmVniw2M7MKcxCYmeWcg8BKsnDhQvr37//2Y6+99uL666/nvvvuo2/fvlRVVVFXV/d2+5kzZ/LhD3+YmpoaXnjhBQBee+01Bg8ezObNmyt1GGbWgEznCKztOOigg5gzZw4AmzZtolu3bgwdOpRCocADDzzAeeedt1X7MWPGMGXKFBYvXsyNN97ImDFjGD16NN/61reoqvL3D7OdiYPAWmz69OkccMAB7Lfffo22qa6uplAoUCgUqK6u5sUXX2TJkiUcc8wx5SvUzEriILAWmzhxIsOHD2+yzeWXX87ZZ59N+/btueOOO7jkkksYPXp0mSo0s5ZwH91aZP369UyePJlTTz21yXb9+/dn1qxZzJgxg5deeol99tmHiOC0007jzDPP5O9//3uZKjaz5rhHYC3y0EMPMWDAAD7wgQ+U1D4iGD16NBMnTuT888/nuuuuY/HixYwbN45rr70242rNrBTuEViL3HPPPc0OCxW7/fbbqa2tpVOnThQKBaqqqqiqqqJQKGRYpZm1hHsEVrJ169Yxbdo0brrpprfX/eIXv+D8889n5cqVnHDCCfTv35+pU6cCUCgUmDBhAo888ggAo0aNora2lnbt2nH33XdX5BjMbFsOAivZ7rvvzqpVq7ZaN3ToUIYOHdpg+w4dOjBjxoy3l48++mieffbZTGs0s5bz0JCZWc45CMzMcs5BYGaWc54jyJGel/2m0iXsEIu/f0KlSzBrU9wjMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMci7TIJB0nKSFkhZJuqyRNp+X9Jyk+ZLuzrIeMzPbVmY/Xi9pF2A88GlgKfCUpMkR8VxRm17A5cBREbFaUpes6jEzs4Zl2SMYCCyKiJciYj0wERhSr825wPiIWA0QESsyrMfMzBqQZRB0A5YULS9N1xU7EDhQ0kxJsyQd19CGJI2QVCepbuXKlRmVa2aWT5WeLN4V6AUcAwwHfiqpY/1GEXFzRNRERE3nzp3LW6GZWRvXbBBI+pyk7QmMZUCPouXu6bpiS4HJEbEhIv4CPE8SDGZmVialfMCfBrwg6TpJB7dg208BvSTtL6kdcDowuV6bX5L0BpC0N8lQ0Ust2IeZmb1LzQZBRJwJHAa8CEyQ9Hg6Zr9nM+/bCIwEpgILgHsjYr6kaySdmDabCqyS9BwwA/hmRKx6F8djZmYtVNLpoxGxVtLPgfbAhcBQ4JuSxkXEDU28bwowpd66K4ueBzAqfZiZWQWUMkdwoqRfAL8HqoGBEXE8cChwcbblmZlZ1krpEQwDxkbEo8UrI6Ig6ZxsyjIzs3IpJQiuAl7ZsiCpPfCBiFgcEdOzKszMzMqjlLOG7gM2Fy1vSteZmVkbUEoQ7JreIgKA9Hm77EoyM7NyKiUIVhad7omkIcCr2ZVkZmblVMocwVeAuyT9GBDJ/YPOzrQqMzMrm2aDICJeBAZJ2iNdfj3zqszMrGxKuqBM0glAX2A3SQBExDUZ1mVmZmVSygVlN5Lcb+h8kqGhU4H9Mq7LzMzKpJTJ4iMj4mxgdURcDXyU5OZwZmbWBpQSBG+m/y1I6gpsAPbJriQzMyunUuYIfp3+WMwPgT8BAfw0y6LMzKx8mgyC9AdppkfEa8D9kh4EdouINeUozszMstfk0FBEbAbGFy2/5RAwM2tbSpkjmC5pmLacN2pmZm1KKUFwHslN5t6StFbSPyWtzbguMzMrk1KuLG7yJynNzKx1azYIJH28ofX1f6jGzMxap1JOH/1m0fPdgIHAbOCTmVRkZmZlVcrQ0OeKlyX1AK7PqiAzMyuvUiaL61sK9N7RhZiZWWWUMkdwA8nVxJAER3+SK4zNzKwNKGWOoK7o+UbgnoiYmVE9ZmZWZqUEwc+BNyNiE4CkXSR1iIhCtqWZmVk5lHRlMdC+aLk98NtsyjEzs3IrJQh2K/55yvR5h+xKMjOzciolCNZJGrBlQdLhwBvZlWRmZuVUyhzBhcB9kpaT/FTlB0l+utLMzNqAUi4oe0rSwcBB6aqFEbEh27LMzKxcSvnx+q8Du0fEvIiYB+wh6WvZl2ZmZuVQyhzBuekvlAEQEauBczOryMzMyqqUINil+EdpJO0CtMuuJDMzK6dSJosfBiZJuildPg94KLuSzMysnEoJgn8HRgBfSZfnkpw5ZGZmbUCzQ0PpD9g/ASwm+S2CTwILStm4pOMkLZS0SNJlTbQbJikk1ZRWtpmZ7SiN9ggkHQgMTx+vApMAIuITpWw4nUsYD3ya5NbVT0maHBHP1Wu3J3ABSdiYmVmZNdUj+DPJt//PRsTHIuIGYFMLtj0QWBQRL0XEemAiMKSBdt8BfgC82YJtm5nZDtJUEJwMvALMkPRTSceSXFlcqm7AkqLlpem6t6W3rugREb9pakOSRkiqk1S3cuXKFpRgZmbNaTQIIuKXEXE6cDAwg+RWE10k/bekwe92x5KqgB8BFzfXNiJujoiaiKjp3Lnzu921mZkVKWWyeF1E3J3+dnF34GmSM4maswzoUbTcPV23xZ5AP+D3khYDg4DJnjA2MyuvFv1mcUSsTr+dH1tC86eAXpL2l9QOOB2YXLStNRGxd0T0jIiewCzgxIioa3hzZmaWhe358fqSRMRGYCQwleR003sjYr6kaySdmNV+zcysZUq5oGy7RcQUYEq9dVc20vaYLGsxM7OGZdYjMDOz1sFBYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnOZBoGk4yQtlLRI0mUNvD5K0nOS5kqaLmm/LOsxM7NtZRYEknYBxgPHA32A4ZL61Gv2NFATER8Gfg5cl1U9ZmbWsCx7BAOBRRHxUkSsByYCQ4obRMSMiCiki7OA7hnWY2ZmDcgyCLoBS4qWl6brGnMO8FBDL0gaIalOUt3KlSt3YIlmZrZTTBZLOhOoAX7Y0OsRcXNE1ERETefOnctbnJlZG7drhtteBvQoWu6ertuKpE8B/x/414h4K8N6zMysAVn2CJ4CeknaX1I74HRgcnEDSYcBNwEnRsSKDGsxM7NGZBYEEbERGAlMBRYA90bEfEnXSDoxbfZDYA/gPklzJE1uZHNmZpaRLIeGiIgpwJR6664sev6pLPdvZmbN2ykmi83MrHIcBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzy7lMg0DScZIWSlok6bIGXn+PpEnp609I6pllPWZmtq3MgkDSLsB44HigDzBcUp96zc4BVkfEvwBjgR9kVY+ZmTUsyx7BQGBRRLwUEeuBicCQem2GALelz38OHCtJGdZkZmb1KCKy2bB0CnBcRPy/dPks4IiIGFnUZl7aZmm6/GLa5tV62xoBjEgXDwIWZlL0jrM38GqzrdomH3t+5fn4W8Ox7xcRnRt6YddyV7I9IuJm4OZK11EqSXURUVPpOirBx57PY4d8H39rP/Ysh4aWAT2Klrun6xpsI2lX4L3AqgxrMjOzerIMgqeAXpL2l9QOOB2YXK/NZOCL6fNTgN9FVmNVZmbWoMyGhiJio6SRwFRgF+DWiJgv6RqgLiImAz8D7pC0CPgHSVi0Ba1mGCsDPvb8yvPxt+pjz2yy2MzMWgdfWWxmlnMOAjOznHMQ7EDN3VKjLZN0q6QV6bUhuSKph6QZkp6TNF/SBZWuqVwk7SbpSUnPpMd+daVrqgRJu0h6WtKDla5lezgIdpASb6nRlk0Ajqt0ERWyEbg4IvoAg4Cv5+j//VvAJyPiUKA/cJykQZUtqSIuABZUuojt5SDYcUq5pUabFRGPkpz5lTsR8UpE/Cl9/k+SD4Rula2qPCLxerpYnT5ydQaKpO7ACcAtla5lezkIdpxuwJKi5aXk5MPA3pHeQfcw4IkKl1I26bDIHGAFMC0icnPsqeuBS4HNFa5juzkIzHYQSXsA9wMXRsTaStdTLhGxKSL6k9w9YKCkfhUuqWwkfRZYERGzK13Lu+Eg2HFKuaWGtVGSqklC4K6IeKDS9VRCRLwGzCBfc0VHASdKWkwyHPxJSXdWtqSWcxDsOKXcUsPaoPTW6T8DFkTEjypdTzlJ6iypY/q8PfBp4M8VLaqMIuLyiOgeET1J/s3/LiLOrHBZLeYg2EEiYiOw5ZYaC4B7I2J+ZasqH0n3AI8DB0laKumcStdURkcBZ5F8G5yTPmorXVSZ7APMkDSX5MvQtIholadQ5plvMWFmlnPuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc61ih+vN6skSe8HpqeLHwQ2ASvT5YHpvaWaev+XgJqIGJlZkWbvgoPArBkRsYrkzppIugp4PSL+s5I1me1IHhoy2w6SzpX0VHof/vsldUjXnyppXrr+0Qbed4KkxyXtXf6qzRrmIDDbPg9ExEfS+/AvALZcSX0l8Jl0/YnFb5A0FLgMqI2IV8tarVkTPDRktn36SRoNdAT2ILm1CMBMYIKke4Him899EqgBBufpzqTWOrhHYLZ9JgAjI+IQ4GpgN4CI+ArwbZI70c5OJ5oBXgT2BA4sf6lmTXMQmG2fPYFX0ttPf2HLSkkHRMQTEXElyZlFW25N/ldgGHC7pL5lr9asCQ4Cs+1zBcmvkM1k69su/1DSs5LmAX8EntnyQkT8mSQ07pN0QDmLNWuK7z5qZpZz7hGYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnP/B0iPrwaXcQuCAAAAAElFTkSuQmCC\n"
     },
     "metadata": {
      "needs_background": "light"
     }
    }
   ],
   "source": [
    "results.make_plots()\n",
    "improved_results.make_plots()"
   ]
  }
 ]
}

================================================
FILE: examples/basic/quick_demo.py
================================================
""" Demo: Creates a simple new method and applies it to a single CL setting.
"""
import sys
from argparse import Namespace
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type

import gym
import pandas as pd
import torch
import tqdm
from gym import spaces
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor, nn

from sequoia import Method, Setting
from sequoia.common import Config
from sequoia.settings import Environment
from sequoia.settings.sl import DomainIncrementalSLSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards
from sequoia.settings.sl.incremental.results import IncrementalSLResults as Results


class MyModel(nn.Module):
    """Simple classification model without any CL-related mechanism.

    To keep things simple, this demo model is designed for supervised
    (classification) settings where observations have shape [3, 28, 28] (ie the
    MNIST variants: Mnist, FashionMnist, RotatedMnist, EMnist, etc.)

    NOTE: You are free to use whatever kind of Model you want, or even not to use one
    at all! This is just an example to help you get started quickly.
    """

    def __init__(
        self,
        observation_space: gym.Space,
        action_space: gym.Space,
        reward_space: gym.Space,
    ):
        super().__init__()

        image_shape = observation_space["x"].shape
        assert image_shape == (3, 28, 28), "this example only works on mnist-like data"
        assert isinstance(action_space, spaces.Discrete)
        assert action_space == reward_space
        n_classes = action_space.n
        image_channels = image_shape[0]

        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, n_classes),
        )
        self.loss = nn.CrossEntropyLoss()

    def forward(self, observations: Observations) -> Tensor:
        # NOTE: here we don't make use of the task labels.
        x = observations.x
        task_labels = observations.task_labels
        features = self.encoder(x)
        logits = self.classifier(features)
        return logits

    def shared_step(
        self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
    ) -> Tuple[Tensor, Dict]:
        """Shared step used for both training and validation.

        Parameters
        ----------
        batch : Tuple[Observations, Optional[Rewards]]
            Batch containing Observations, and optional Rewards. When the Rewards are
            None, it means that we'll need to provide the Environment with actions
            before we can get the Rewards (e.g. image labels) back.

            This happens for example when being applied in a Setting which cares about
            sample efficiency or training performance, for example.

        environment : Environment
            The environment we're currently interacting with. Used to provide the
            rewards when they aren't already part of the batch (as mentioned above).

        Returns
        -------
        Tuple[Tensor, Dict]
            The Loss tensor, and a dict of metrics to be logged.
        """
        # Since we're training on a Passive environment, we will get both observations
        # and rewards, unless we're being evaluated based on our training performance,
        # in which case we will need to send actions to the environments before we can
        # get the corresponding rewards (image labels).
        observations: Observations = batch[0]
        rewards: Optional[Rewards] = batch[1]
        # Get the predictions:
        logits = self(observations)
        y_pred = logits.argmax(-1)

        if rewards is None:
            # If the rewards in the batch is None, it means we're expected to give
            # actions before we can get rewards back from the environment.
            rewards = environment.send(Actions(y_pred))

        assert rewards is not None
        image_labels = rewards.y

        loss = self.loss(logits, image_labels)

        accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
        metrics_dict = {"accuracy": accuracy.item()}
        return loss, metrics_dict


class DemoMethod(Method, target_setting=DomainIncrementalSLSetting):
    """Minimal example of a Method targetting the Class-Incremental CL setting.

    For a quick intro to dataclasses, see examples/dataclasses_example.py
    """

    @dataclass
    class HParams:
        """Hyper-parameters of the demo model."""

        # Learning rate of the optimizer.
        learning_rate: float = 0.001

    def __init__(self, hparams: HParams = None):
        self.hparams: DemoMethod.HParams = hparams or self.HParams()
        self.max_epochs: int = 1
        self.early_stop_patience: int = 2

        # We will create those when `configure` will be called, before training.
        self.model: MyModel
        self.optimizer: torch.optim.Optimizer

    def configure(self, setting: DomainIncrementalSLSetting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        self.model = MyModel(
            observation_space=setting.observation_space,
            action_space=setting.action_space,
            reward_space=setting.reward_space,
        )
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=self.hparams.learning_rate,
        )

    def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
        """Example train loop.
        You can do whatever you want with train_env and valid_env here.

        NOTE: In the Settings where task boundaries are known (in this case all
        the supervised CL settings), this will be called once per task.
        """
        # configure() will have been called by the setting before we get here.
        best_val_loss = inf
        best_epoch = 0
        for epoch in range(self.max_epochs):
            self.model.train()
            print(f"Starting epoch {epoch}")
            postfix = {}
            # Training loop:
            with tqdm.tqdm(train_env) as train_pbar:
                train_pbar.set_description(f"Training Epoch {epoch}")
                for i, batch in enumerate(train_pbar):
                    loss, metrics_dict = self.model.shared_step(batch, environment=train_env)
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    postfix.update(metrics_dict)
                    train_pbar.set_postfix(postfix)

            # Validation loop:
            self.model.eval()
            torch.set_grad_enabled(False)
            with tqdm.tqdm(valid_env) as val_pbar:
                val_pbar.set_description(f"Validation Epoch {epoch}")
                epoch_val_loss = 0.0

                for i, batch in enumerate(val_pbar):
                    batch_val_loss, metrics_dict = self.model.shared_step(
                        batch, environment=valid_env
                    )
                    epoch_val_loss += batch_val_loss
                    postfix.update(metrics_dict, val_loss=epoch_val_loss)
                    val_pbar.set_postfix(postfix)
            torch.set_grad_enabled(True)

            if epoch_val_loss < best_val_loss:
                best_val_loss = epoch_val_loss
                best_epoch = epoch
            if epoch - best_epoch > self.early_stop_patience:
                print(f"Early stopping at epoch {i}.")
                break

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        """Get a batch of predictions (aka actions) for these observations."""
        with torch.no_grad():
            logits = self.model(observations)
        # Get the predicted classes
        y_pred = logits.argmax(dim=-1)
        return self.target_setting.Actions(y_pred)

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser):
        """Adds command-line arguments for this Method to an argument parser."""
        parser.add_arguments(cls.HParams, "hparams")

    @classmethod
    def from_argparse_args(cls, args: Namespace):
        """Creates an instance of this Method from the parsed arguments."""
        hparams: cls.HParams = args.hparams
        return cls(hparams=hparams)


def demo_simple():
    """Simple demo: Creating and applying a Method onto a Setting."""
    from sequoia.settings.sl import DomainIncrementalSLSetting

    ## 1. Creating the setting:
    setting = DomainIncrementalSLSetting(dataset="fashionmnist", batch_size=32)
    ## 2. Creating the Method
    method = DemoMethod()
    # (Optional): You can also create a Config, which holds other fields like
    # `log_dir`, `debug`, `device`, etc. which aren't specific to either the
    # Setting or the Method.
    config = Config(debug=True, render=False, device="cpu")
    ## 3. Applying the method to the setting: (optionally passing a Config to
    # use for that run)
    results = setting.apply(method, config=config)
    print(results.summary())
    print(f"objective: {results.objective}")


def demo_command_line():
    """Run this quick demo from the command-line."""
    parser = ArgumentParser(description=__doc__)
    # Add command-line arguments for the Method and the Setting.
    DemoMethod.add_argparse_args(parser)
    # Add command-line arguments for the Setting and the Config (an object with
    # options like log_dir, debug, etc, which are not part of the Setting or the
    # Method) using simple-parsing.
    parser.add_arguments(DomainIncrementalSLSetting, "setting")
    parser.add_arguments(Config, "config")
    args = parser.parse_args()

    # Create the Method from the parsed arguments
    method: DemoMethod = DemoMethod.from_argparse_args(args)
    # Extract the Setting and Config from the args.
    setting: DomainIncrementalSLSetting = args.setting
    config: Config = args.config

    # Run the demo, applying that DemoMethod on the given setting.
    results: Results = setting.apply(method, config=config)
    print(results.summary())
    print(f"objective: {results.objective}")


if __name__ == "__main__":
    # Example: Evaluate a Method on a single CL setting:

    ###
    ### First option: Run the demo, creating the Setting and Method directly.
    ###
    # demo_simple()

    ##
    ## Second part of the demo: Same as before, but customize the options for
    ## the Setting and the Method from the command-line.
    ##

    demo_command_line()

    ##
    ## As a little bonus: Evaluate on *ALL* the applicable settings, and
    ## aggregate the results in a nice little LaTeX-formatted table.
    ##

    # from examples.demo_utils import demo_all_settings
    # all_results = demo_all_settings(DemoMethod)


================================================
FILE: examples/basic/quick_demo_ewc.py
================================================
""" Example script: Defines a new Method based on the DemoMethod from the
quick_demo.py script, adding an EWC-like loss to prevent the weights from
changing too much between tasks.
"""
import sys
from copy import deepcopy
from dataclasses import dataclass
from typing import ClassVar, Dict, Optional, Tuple

import gym
import torch
from torch import Tensor

from examples.basic.quick_demo import DemoMethod, MyModel
from sequoia.settings import DomainIncrementalSLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards
from sequoia.utils.utils import dict_intersection
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


class MyImprovedModel(MyModel):
    """Adds an ewc-like penalty to the demo model."""

    def __init__(
        self,
        observation_space: gym.Space,
        action_space: gym.Space,
        reward_space: gym.Space,
        ewc_coefficient: float = 1.0,
        ewc_p_norm: int = 2,
    ):
        super().__init__(
            observation_space,
            action_space,
            reward_space,
        )
        self.ewc_coefficient = ewc_coefficient
        self.ewc_p_norm = ewc_p_norm

        self.previous_model_weights: Dict[str, Tensor] = {}

        self._previous_task: Optional[int] = None
        self._n_switches: int = 0

    def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs):
        base_loss, metrics = super().shared_step(batch, *args, **kwargs)
        ewc_loss = self.ewc_coefficient * self.ewc_loss()
        metrics["ewc_loss"] = ewc_loss
        return base_loss + ewc_loss, metrics

    def on_task_switch(self, task_id: int) -> None:
        """Executed when the task switches (to either a known or unknown task)."""
        if self._previous_task is None and self._n_switches == 0:
            logger.debug("Starting the first task, no EWC update.")
        elif task_id is None or task_id != self._previous_task:
            # NOTE: We also switch between unknown tasks.
            logger.debug(
                f"Switching tasks: {self._previous_task} -> {task_id}: "
                f"Updating the EWC 'anchor' weights."
            )
            self._previous_task = task_id
            self.previous_model_weights.clear()
            self.previous_model_weights.update(
                deepcopy({k: v.detach() for k, v in self.named_parameters()})
            )
        self._n_switches += 1

    def ewc_loss(self) -> Tensor:
        """Gets an 'ewc-like' regularization loss.

        NOTE: This is a simplified version of EWC where the loss is the P-norm
        between the current weights and the weights as they were on the begining
        of the task.
        """
        if self._previous_task is None:
            # We're in the first task: do nothing.
            return 0.0

        old_weights: Dict[str, Tensor] = self.previous_model_weights
        new_weights: Dict[str, Tensor] = dict(self.named_parameters())

        loss = 0.0
        for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
            loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm)
        return loss


class ImprovedDemoMethod(DemoMethod):
    """Improved version of the demo method, that adds an ewc-like regularizer."""

    # Name of this method:
    name: ClassVar[str] = "demo_ewc"

    @dataclass
    class HParams(DemoMethod.HParams):
        """Hyperparameters of this new improved method. (Adds ewc params)."""

        # Coefficient of the ewc-like loss.
        ewc_coefficient: float = 1.0
        # Distance norm used in the ewc loss.
        ewc_p_norm: int = 2

    def __init__(self, hparams: HParams = None):
        super().__init__(hparams=hparams or self.HParams.from_args())

    def configure(self, setting: DomainIncrementalSLSetting):
        # Use the improved model, with the added EWC-like term.
        self.model = MyImprovedModel(
            observation_space=setting.observation_space,
            action_space=setting.action_space,
            reward_space=setting.reward_space,
            ewc_coefficient=self.hparams.ewc_coefficient,
            ewc_p_norm=self.hparams.ewc_p_norm,
        )
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=self.hparams.learning_rate,
        )

    def on_task_switch(self, task_id: Optional[int]):
        self.model.on_task_switch(task_id)


def demo_ewc():
    """Demo: Comparing two methods on the same setting:"""

    ## 1. Create the Setting (same as in quick_demo.py)
    setting = DomainIncrementalSLSetting(dataset="fashionmnist", nb_tasks=5, batch_size=64)
    # setting = DomainIncrementalSLSetting.from_args()

    # 2.1: Get the results for the base method
    base_method = DemoMethod()
    base_results = setting.apply(base_method)

    # 2.2: Get the results for the 'improved' method:
    new_method = ImprovedDemoMethod()
    new_results = setting.apply(new_method)

    # Compare the two results:
    print(
        f"\n\nComparison: DemoMethod vs ImprovedDemoMethod - (DomainIncrementalSLSetting, dataset=fashionmnist):"
    )
    print(base_results.summary())
    print(new_results.summary())

    exit()


if __name__ == "__main__":
    # Example: Comparing two methods on the same setting:
    from sequoia.settings import DomainIncrementalSLSetting

    ## 1. Create the Setting (same as in quick_demo.py)
    setting = DomainIncrementalSLSetting(
        dataset="fashionmnist", nb_tasks=5, monitor_training_performance=True
    )
    # setting = DomainIncrementalSLSetting.from_args()

    # Get the results for the base method:
    base_method = DemoMethod()
    base_results = setting.apply(base_method)

    # Get the results for the 'improved' method:
    new_method = ImprovedDemoMethod()
    new_results = setting.apply(new_method)

    print(
        f"\n\nComparison: DemoMethod vs ImprovedDemoMethod - (DomainIncrementalSLSetting, dataset=fashionmnist):"
    )
    print(base_results.summary())
    print(new_results.summary())

    exit()

    ##
    ## As a little bonus: Evaluate *both* methods on *ALL* their applicable
    ## settings, and aggregate the results in a nice LaTeX-formatted table.
    ##
    from examples.demo_utils import compare_results, demo_all_settings

    base_results = demo_all_settings(DemoMethod, datasets=["mnist", "fashionmnist"])
    improved_results = demo_all_settings(
        ImprovedDemoMethod,
        datasets=["mnist", "fashionmnist"],
        monitor_training_performance=True,
    )

    compare_results(
        {
            DemoMethod: base_results,
            ImprovedDemoMethod: improved_results,
        }
    )


================================================
FILE: examples/basic/quick_demo_packnet.py
================================================
from sequoia.methods.packnet_method import PackNetMethod
from sequoia.settings.sl import TaskIncrementalSLSetting

if __name__ == "__main__":
    setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=2)

    my_method = PackNetMethod()
    results = setting.apply(my_method)


================================================
FILE: examples/basic/quick_demo_test.py
================================================
""" TODO: Write tests that check that the examples are working correctly.
"""
import contextlib
import sys

import pytest

from examples.basic.quick_demo import demo_command_line, demo_simple
from sequoia.settings import ClassIncrementalSetting, Results


@pytest.mark.timeout(120)
def test_quick_demo(monkeypatch):
    """Test that runs the quick demo and checks that the results correspond to
    what you'd expect.
    """
    results: ClassIncrementalSetting.Results = None
    summary_method = ClassIncrementalSetting.Results.summary

    def summary(self: ClassIncrementalSetting.Results):
        nonlocal results
        results = self
        return summary_method(self)

    monkeypatch.setattr(ClassIncrementalSetting.Results, "summary", summary)

    demo_simple()

    from sequoia.common.metrics import ClassificationMetrics

    # NOTE: Results aren't going to give *exactly* the same results, so we can't
    # test like this directly:
    # assert results.average_metrics_per_task == [
    #     ClassificationMetrics(n_samples=1984, accuracy=0.500504),
    #     ClassificationMetrics(n_samples=2016, accuracy=0.499504),
    #     ClassificationMetrics(n_samples=1984, accuracy=0.817036),
    #     ClassificationMetrics(n_samples=2016, accuracy=0.835317),
    #     ClassificationMetrics(n_samples=1984, accuracy=0.99748),
    # ]

    assert results.final_performance_metrics[0].n_samples == 1984
    assert results.final_performance_metrics[1].n_samples == 2016
    assert results.final_performance_metrics[2].n_samples == 1984
    assert results.final_performance_metrics[3].n_samples == 2016
    assert results.final_performance_metrics[4].n_samples == 1984

    assert 0.48 <= results.final_performance_metrics[0].accuracy <= 0.55
    assert 0.48 <= results.final_performance_metrics[1].accuracy <= 0.70
    assert 0.60 <= results.final_performance_metrics[2].accuracy <= 1.00
    assert 0.70 <= results.final_performance_metrics[3].accuracy <= 1.00
    assert 0.99 <= results.final_performance_metrics[4].accuracy <= 1.00


================================================
FILE: examples/clcomp21/README.md
================================================
## Example Submissions for CLVision Workshop

Examples in this folder are aimed at solving the supervised learning track of the competition.

Each example builds on top of the previous, in a manner that improves the overall performance you can expect on any given CL setting.

As such, it is recommended that you take a look at the examples in the following order:

0. [DummyMethod](dummy_method.py)
    Non-parametric method that simply returns a random prediction for each observation.

1. [Simple Classifier](classifier.py):
    Standard neural net classifier without any CL-related mechanism. Works in the SL track, but has very poor performance.

2. [Multi-Head / Task Inference Classifier](multihead_classifier.py):
    Performs multi-head prediction, and a simple form of task inference. Gets better results that the example.

3. [CL Regularized Classifier](regularization_example.py):
    Adds a simple CL regularization loss to the multihead classifier above.

## RL Examples:

For RL, you can take a look at these examples:

- [A2C Example](a2c_example.py):
    Example where A2C is implemented from scratch as a Method for the RL track. The code for A2C was adapted from [this blogpost.](https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f)

- [SB3 Example](sb3_example.py):
    Example of how we can extend an existing Method from Stable-Baselines3.


================================================
FILE: examples/clcomp21/__init__.py
================================================


================================================
FILE: examples/clcomp21/a2c_example.py
================================================
from argparse import Namespace
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import gym
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from gym import spaces
from gym.spaces.utils import flatdim

# TODO: Migrate stuff to directly import simple-parsing's hparams module.
# from simple_parsing.helpers.hparams import HyperParameters
from simple_parsing import ArgumentParser
from torch import Tensor
from torch.distributions import Categorical

from sequoia.common.hparams import HyperParameters, log_uniform
from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings.rl import ActiveEnvironment, RLSetting


class ActorCritic(nn.Module):
    def __init__(
        self,
        observation_space: gym.Space,
        action_space: gym.Space,
        hidden_size: int,
    ):
        super().__init__()
        self.observation_space = observation_space
        # NOTE: See note below for why we don't use the task label portion of the space
        # here.
        self.num_inputs = flatdim(self.observation_space.x)
        self.hidden_size = hidden_size

        if not isinstance(action_space, spaces.Discrete):
            raise NotImplementedError("This example only works with discrete action spaces.")
        self.action_space = action_space
        self.num_actions = self.action_space.n

        if self.num_inputs < 100:
            # If we have a reasonably-small input space, use an MLP architecture.
            self.critic = nn.Sequential(
                nn.Flatten(),
                nn.Linear(self.num_inputs, self.hidden_size),
                nn.ReLU(inplace=True),
                nn.Linear(self.hidden_size, 1),
            )
            self.actor = nn.Sequential(
                nn.Flatten(),
                nn.Linear(self.num_inputs, self.hidden_size),
                nn.ReLU(inplace=True),
                nn.Linear(self.hidden_size, self.num_actions),
            )
        else:
            assert isinstance(self.observation_space.x, Image)
            channels = self.observation_space.x.channels
            self.encoder = nn.Sequential(
                nn.Conv2d(channels, 6, kernel_size=5, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(6),
                nn.ReLU(inplace=True),
                nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(16),
                nn.ReLU(inplace=True),
                nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(16),
                nn.AdaptiveAvgPool2d(output_size=(8, 8)),  # [16, 8, 8]
                nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(32),  # [32, 6, 6]
                nn.ReLU(inplace=True),
                nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(32),  # [32, 4, 4]
                nn.Flatten(),
            )
            # NOTE: Here we share the encoder for both the actor and critic.
            self.critic = nn.Sequential(
                self.encoder,
                nn.Linear(512, self.hidden_size),
                nn.ReLU(inplace=True),
                nn.Linear(self.hidden_size, 1),
            )
            self.actor = nn.Sequential(
                self.encoder,
                nn.Linear(512, self.hidden_size),
                nn.ReLU(inplace=True),
                nn.Linear(self.hidden_size, self.num_actions),
            )

    def forward(self, observation: RLSetting.Observations) -> Tuple[Tensor, Categorical]:
        x = observation.x
        state = torch.as_tensor(x, dtype=torch.float)

        # NOTE: Here you could for instance concatenate the task labels onto the state
        # to make the model multi-task! However if you target the IncrementalRLSetting
        # or above, you might not have these task labels at test-time, so that would
        # have to be taken into consideration (e.g. can't concat None to a Tensor)
        # task_labels = observation.task_labels
        x_space = self.observation_space.x
        batched_inputs = state.ndim > len(x_space.shape)
        if not batched_inputs:
            # Add a batch dimension if necessary.
            state = state.unsqueeze(0)

        value = self.critic(state)
        policy_logits = self.actor(state)

        if not batched_inputs:
            # Remove the batch dimension from the predictions if necessary.
            value = value.squeeze(0)
            policy_logits = policy_logits.squeeze(0)

        policy_dist = Categorical(logits=policy_logits)
        # policy_dist = F.relu(self.actor_linear1(state))
        # policy_dist = F.softmax(self.actor_linear2(policy_dist), dim=1)

        return value, policy_dist


class ExampleA2CMethod(Method, target_setting=RLSetting):
    """Example A2C method.

    Most of the code here was taken from:
    https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f
    """

    @dataclass
    class HParams(HyperParameters):
        """Hyper-Parameters of the model, as a dataclass.

        Fields get command-line arguments with simple-parsing.
        """

        # Hidden size (representation size).
        hidden_size: int = 256
        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)
        # Discount factor
        gamma: float = 0.99
        # Coefficient for the entropy term in the loss formula.
        entropy_term_coefficient: float = 0.001
        # Maximum length of an episode, when desired. (Generally not needed).
        max_episode_steps: Optional[int] = None

    def __init__(self, hparams: HParams = None, render: bool = False):
        self.hparams = hparams or self.HParams()
        self.task: int = 0
        self.plots_dir: Path = Path("plots")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.render = render

    def configure(self, setting: RLSetting):
        self.actor_critic = ActorCritic(
            observation_space=setting.observation_space,
            action_space=setting.action_space,
            hidden_size=self.hparams.hidden_size,
        ).to(self.device)
        self.ac_optimizer = optim.Adam(
            self.actor_critic.parameters(), lr=self.hparams.learning_rate
        )
        # If there is a limit on the number of steps per task, then observe that limit.
        self.max_training_steps = setting.steps_per_phase

    def fit(self, train_env: ActiveEnvironment, valid_env: ActiveEnvironment):
        assert isinstance(train_env, gym.Env)  # Just to illustrate that it's a gym Env.

        # NOTE: This example only works if the environment isn't vectorized.

        all_lengths: List[int] = []
        average_lengths: List[float] = []
        all_rewards: List[float] = []
        episode = 0
        total_steps = 0

        while not train_env.is_closed() and total_steps < self.max_training_steps:
            episode += 1

            log_probs: List[Tensor] = []
            values: List[Tensor] = []
            rewards: List[Tensor] = []
            entropy_term = 0

            observation: RLSetting.Observations = train_env.reset()
            # Convert numpy arrays in the observation into Tensors on the right device.
            observation = observation.torch(device=self.device)

            done = False
            episode_steps = 0
            while not done and total_steps < self.max_training_steps:
                episode_steps += 1
                value, policy_dist = self.actor_critic.forward(observation)
                value = value.cpu().detach().numpy()
                action = policy_dist.sample()

                log_prob = policy_dist.log_prob(action)
                entropy = policy_dist.entropy()
                # NOTE: 'correct' thing to do would be to pass Actions objects of the
                # right type. This is for future-proofing this Method so it can
                # still function in the future if new settings are added.
                action = RLSetting.Actions(y_pred=action.cpu().detach().numpy())

                if self.render:
                    train_env.render()

                new_observation: RLSetting.Observations
                reward: RLSetting.Rewards
                new_observation, reward, done, _ = train_env.step(action)
                new_observation = new_observation.torch(device=self.device)
                total_steps += 1

                # Likewise, in order to support different future settings, we receive a
                # Rewards object, which contains the reward value (the float when the
                # env isn't batched.).
                reward_value: float = reward.y

                rewards.append(reward_value)
                values.append(value)
                log_probs.append(log_prob)
                entropy_term += entropy

                observation = new_observation

            Qval, _ = self.actor_critic.forward(new_observation)
            Qval = Qval.detach().cpu().numpy()
            all_rewards.append(np.sum(rewards))
            all_lengths.append(episode_steps)
            average_lengths.append(np.mean(all_lengths[-10:]))

            if episode % 10 == 0:
                print(
                    f"step {total_steps}/{self.max_training_steps}, "
                    f"episode: {episode}, "
                    f"reward: {np.sum(rewards)}, "
                    f"total length: {episode_steps}, "
                    f"average length: {average_lengths[-1]} \n"
                )

            if total_steps >= self.max_training_steps:
                print(f"Reached the limit of {self.max_training_steps} steps.")
                break

            # compute Q values
            Q_values = np.zeros_like(values)
            # Use the last value from the critic as the final value estimate.
            q_value = Qval
            for t, reward in reversed(list(enumerate(rewards))):
                q_value = reward + self.hparams.gamma * q_value
                Q_values[t] = q_value

            # update actor critic
            values = torch.as_tensor(values, dtype=torch.float, device=self.device)
            Q_values = torch.as_tensor(Q_values, dtype=torch.float, device=self.device)
            log_probs = torch.stack(log_probs)

            advantage = Q_values - values
            actor_loss = (-log_probs * advantage).mean()
            critic_loss = 0.5 * advantage.pow(2).mean()
            ac_loss = (
                actor_loss + critic_loss + self.hparams.entropy_term_coefficient * entropy_term
            )

            self.ac_optimizer.zero_grad()
            ac_loss.backward()
            self.ac_optimizer.step()

        # Plot results
        smoothed_rewards = pd.Series.rolling(pd.Series(all_rewards), 10).mean()
        smoothed_rewards = [elem for elem in smoothed_rewards]
        plt.plot(all_rewards)
        plt.plot(smoothed_rewards)
        plt.plot()
        plt.xlabel("Episode")
        plt.ylabel("Reward")
        self.plots_dir.mkdir(parents=True, exist_ok=True)
        plt.savefig(self.plots_dir / f"task_{self.task}_0.png")
        # plt.show()

        plt.plot(all_lengths)
        plt.plot(average_lengths)
        plt.xlabel("Episode")
        plt.ylabel("Episode length")
        plt.savefig(self.plots_dir / f"task_{self.task}_1.png")
        # plt.show()

    def get_actions(
        self, observations: RLSetting.Observations, action_space: gym.Space
    ) -> RLSetting.Actions:
        # Move the observations to the right device, converting numpy arrays to tensors.
        observations = observations.torch(device=self.device)
        value, action_dist = self.actor_critic(observations)
        return RLSetting.Actions(y_pred=action_dist.sample())

    # The methods below aren't required, but are good to add.

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called by the Setting when switching between tasks.

        Parameters
        ----------
        task_id : Optional[int]
            the id of the new task. When None, we are
            basically being informed that there is a task boundary, but without
            knowing what task we're switching to.
        """
        if isinstance(task_id, int):
            self.task = task_id

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser):
        parser.add_arguments(cls.HParams, dest="hparams")

    @classmethod
    def from_argparse_args(cls, args: Namespace):
        hparams: ExampleA2CMethod.HParams = args.hparams
        return cls(hparams=hparams)

    def get_search_space(self, setting: RLSetting) -> Dict:
        return self.hparams.get_orion_space()

    def adapt_to_new_hparams(self, new_hparams: Dict) -> None:
        self.hparams = self.HParams.from_dict(new_hparams)


if __name__ == "__main__":

    # Create the Setting.

    # CartPole for debugging:
    from sequoia.settings.rl import TraditionalRLSetting

    setting = TraditionalRLSetting(dataset="CartPole-v0", nb_tasks=1, train_max_steps=10_000)

    # OR: Incremental CartPole:
    from sequoia.settings.rl import IncrementalRLSetting

    setting = IncrementalRLSetting(dataset="CartPole-v0", nb_tasks=5, train_steps_per_task=10_000)

    # OR: Setting of the RL Track of the competition:
    # setting = IncrementalRLSetting.load_benchmark("rl_track")

    # Create the Method:
    method = ExampleA2CMethod(render=True)

    # Apply the Method onto the Setting to get Results.
    results = setting.apply(method)
    print(results.summary())

    # BONUS: Running a hyper-parameter sweep:
    # method.hparam_sweep(setting)


================================================
FILE: examples/clcomp21/a2c_example_test.py
================================================
import pytest

from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.rl import IncrementalRLSetting, RLSetting
from sequoia.settings.sl import ClassIncrementalSetting

from .a2c_example import ExampleA2CMethod
from .dummy_method import DummyMethod


@slow
@pytest.mark.timeout(120)
def test_cartpole_state(cartpole_state_setting: SettingProxy[RLSetting]):
    """Applies this Method to a simple cartpole-state setting."""
    method = ExampleA2CMethod()
    results = cartpole_state_setting.apply(method)
    assert results.to_log_dict()

    results: RLSetting.Results
    # TODO: The example isn't actually performing that well! We should try to get
    # something that can easily and reproducibly solve cartpole to 200, if possible.
    # assert 150 < results.average_final_performance.mean_episode_length
    # TODO: Increase this bound when performance is improved.
    assert 5 < results.average_final_performance.mean_episode_length


@slow
@pytest.mark.timeout(120)
def test_incremental_cartpole_state(
    incremental_cartpole_state_setting: SettingProxy[IncrementalRLSetting],
):
    """Applies this Method to the class-incremental mnist Setting."""
    method = ExampleA2CMethod()
    results = incremental_cartpole_state_setting.apply(method)
    assert results.to_log_dict()

    results: ClassIncrementalSetting.Results
    # TODO: Increase this bound
    assert 5 <= results.average_online_performance.objective
    assert 5 <= results.average_final_performance.objective


@slow
@pytest.mark.timeout(300)
def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]):
    """Applies this Method to the Setting of the sl track of the competition."""
    method = DummyMethod()
    results = rl_track_setting.apply(method)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    online_perf = results.average_online_performance
    # TODO: get an estimate of the upper bound of the random method on the RL track.
    TODO = 1_000  # this is way too large.
    assert 0 < online_perf.objective < TODO
    final_perf = results.average_final_performance
    assert 0 < final_perf.objective < TODO


================================================
FILE: examples/clcomp21/classifier.py
================================================
""" Example Method for the SL track: Uses a simple classifier, without any CL mechanism.

As you'd expect, this Method exhibits complete forgetting of all previous tasks.
You can use this model and method as a jumping off point for your own submission.
"""
from argparse import Namespace
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Tuple, Type

import gym
import torch
import tqdm
from gym import spaces
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor, nn
from torch.optim.optimizer import Optimizer
from torchvision.models import ResNet, resnet18

from sequoia.common.hparams import HyperParameters, log_uniform
from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings import ClassIncrementalSetting
from sequoia.settings.sl import PassiveEnvironment
from sequoia.settings.sl.incremental import Actions, Environment, Observations, Rewards


@dataclass
class HParams(HyperParameters):
    """Hyper-parameters of the demo model."""

    # Learning rate of the optimizer.
    learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
    # L2 regularization coefficient.
    weight_decay: float = log_uniform(1e-9, 1e-3, default=1e-6)

    # Maximum number of training epochs per task.
    max_epochs_per_task: int = 10
    # Number of epochs with increasing validation loss after which we stop training.
    early_stop_patience: int = 2


class Classifier(nn.Module):
    """Simple classification model without any CL-related mechanism.

    This example model uses a resnet18 as the encoder, and a single output layer.
    """

    HParams: ClassVar[Type[HParams]] = HParams

    def __init__(
        self,
        observation_space: gym.Space,
        action_space: gym.Space,
        reward_space: gym.Space,
        hparams: HParams = None,
    ):
        super().__init__()
        self.hparams = hparams or self.HParams()

        image_space: Image = observation_space.x
        # image_shape = image_space.shape

        # This example is intended for classification / discrete action spaces.
        assert isinstance(action_space, spaces.Discrete)
        assert action_space == reward_space
        self.n_classes = action_space.n
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.encoder, self.representations_size = self.create_encoder(image_space)
        self.output = self.create_output_head()
        self.loss = nn.CrossEntropyLoss()

    def create_output_head(self) -> nn.Module:
        return nn.Linear(self.representations_size, self.n_classes).to(self.device)

    def configure_optimizers(self) -> Optimizer:
        return torch.optim.Adam(
            self.parameters(),
            lr=self.hparams.learning_rate,
            weight_decay=self.hparams.weight_decay,
        )

    def create_encoder(self, image_space: Image) -> Tuple[nn.Module, int]:
        """Create an encoder for the given image space.

        Returns the encoder, as well as the size of the representations it will produce.

        Parameters
        ----------
        image_space : Image
            A subclass of `gym.spaces.Box` for images. Represents the space the images
            will come from during training and testing. Its attributes of interest
            include `c`, `w`, `h`, `shape` and `dype`.

        Returns
        -------
        Tuple[nn.Module, int]
            The encoder to be used, (a nn.Module), as well as the size of the
            representations it will produce.

        Raises
        ------
        NotImplementedError
            If no encoder is available for the given image dimensions.
        """
        if image_space.width == image_space.height == 28:
            # Setup for mnist variants.
            # (not part of the competition, but used for debugging below).
            encoder = nn.Sequential(
                nn.Conv2d(image_space.channels, 6, 5),
                nn.ReLU(),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.ReLU(),
                nn.MaxPool2d(2),
                nn.Flatten(),
            )
            features = 256
        elif image_space.width == image_space.height == 32:
            # Synbols dataset: use a resnet18 by default.
            resnet: ResNet = resnet18(pretrained=False)
            features = resnet.fc.in_features
            # Disable/Remove the last layer.
            resnet.fc = nn.Sequential()
            encoder = resnet
        else:
            raise NotImplementedError(
                f"TODO: Add an encoder for the given image space {image_space}"
            )
        return encoder.to(self.device), features

    def forward(self, observations: Observations) -> Tensor:
        # NOTE: here we don't make use of the task labels.
        observations = observations.to(self.device)
        x = observations.x
        task_labels = observations.task_labels
        features = self.encoder(x)
        logits = self.output(features)
        return logits

    def shared_step(
        self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
    ) -> Tuple[Tensor, Dict]:
        """Shared step used for both training and validation.

        Parameters
        ----------
        batch : Tuple[Observations, Optional[Rewards]]
            Batch containing Observations, and optional Rewards. When the Rewards are
            None, it means that we'll need to provide the Environment with actions
            before we can get the Rewards (e.g. image labels) back.

            This happens for example when being applied in a Setting which cares about
            sample efficiency or training performance, for example.

        environment : Environment
            The environment we're currently interacting with. Used to provide the
            rewards when they aren't already part of the batch (as mentioned above).

        Returns
        -------
        Tuple[Tensor, Dict]
            The Loss tensor, and a dict of metrics to be logged.
        """
        # Since we're training on a Passive environment, we will get both observations
        # and rewards, unless we're being evaluated based on our training performance,
        # in which case we will need to send actions to the environments before we can
        # get the corresponding rewards (image labels).
        observations: Observations = batch[0]
        rewards: Optional[Rewards] = batch[1]
        # Get the predictions:
        logits = self(observations)
        y_pred = logits.argmax(-1)

        if rewards is None:
            # If the rewards in the batch is None, it means we're expected to give
            # actions before we can get rewards back from the environment.
            rewards = environment.send(Actions(y_pred))

        assert rewards is not None
        image_labels = rewards.y.to(self.device)

        loss = self.loss(logits, image_labels)

        accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
        metrics_dict = {"accuracy": f"{accuracy.cpu().item():3.2%}"}
        return loss, metrics_dict


class ExampleMethod(Method, target_setting=ClassIncrementalSetting):
    """Minimal example of a Method usable only in the SL track of the competition.

    This method uses the ExampleModel, which is quite simple.
    """

    ModelType: ClassVar[Type[Classifier]] = Classifier

    def __init__(self, hparams: HParams = None):
        self.hparams: HParams = hparams or HParams()

        # We will create those when `configure` will be called, before training.
        self.model: Classifier
        self.optimizer: torch.optim.Optimizer

    def configure(self, setting: ClassIncrementalSetting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        self.model = self.ModelType(
            observation_space=setting.observation_space,
            action_space=setting.action_space,
            reward_space=setting.reward_space,
        )
        self.optimizer = self.model.configure_optimizers()

    def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
        """Example train loop.
        You can do whatever you want with train_env and valid_env here.

        NOTE: In the Settings where task boundaries are known (in this case all
        the supervised CL settings), this will be called once per task.
        """
        # configure() will have been called by the setting before we get here.
        best_val_loss = inf
        best_epoch = 0
        for epoch in range(self.hparams.max_epochs_per_task):
            self.model.train()
            print(f"Starting epoch {epoch}")
            # Training loop:
            with tqdm.tqdm(train_env) as train_pbar:
                postfix = {}
                train_pbar.set_description(f"Training Epoch {epoch}")
                for i, batch in enumerate(train_pbar):
                    loss, metrics_dict = self.model.shared_step(batch, environment=train_env)
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    postfix.update(metrics_dict)
                    train_pbar.set_postfix(postfix)

            # Validation loop:
            self.model.eval()
            torch.set_grad_enabled(False)
            with tqdm.tqdm(valid_env) as val_pbar:
                postfix = {}
                val_pbar.set_description(f"Validation Epoch {epoch}")
                epoch_val_loss = 0.0

                for i, batch in enumerate(val_pbar):
                    batch_val_loss, metrics_dict = self.model.shared_step(
                        batch, environment=valid_env
                    )
                    epoch_val_loss += batch_val_loss
                    postfix.update(metrics_dict, val_loss=epoch_val_loss)
                    val_pbar.set_postfix(postfix)
            torch.set_grad_enabled(True)

            if epoch_val_loss < best_val_loss:
                best_val_loss = epoch_val_loss
                best_epoch = epoch
            if epoch - best_epoch > self.hparams.early_stop_patience:
                print(f"Early stopping at epoch {i}.")
                # NOTE: You should probably reload the model weights as they were at the
                # best epoch.
                break

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        """Get a batch of predictions (aka actions) for these observations."""
        with torch.no_grad():
            logits = self.model(observations)
        # Get the predicted classes
        y_pred = logits.argmax(dim=-1)
        return self.target_setting.Actions(y_pred)

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser):
        """Adds command-line arguments for this Method to an argument parser."""
        parser.add_arguments(cls.ModelType.HParams, "hparams")

    @classmethod
    def from_argparse_args(cls, args: Namespace):
        """Creates an instance of this Method from the parsed arguments."""
        hparams: Classifier.HParams = args.hparams
        return cls(hparams=hparams)


if __name__ == "__main__":
    # Create the Method:
    # - Manually:
    # method = ExampleMethod()
    # - From the command-line:
    from simple_parsing import ArgumentParser

    from sequoia.common import Config
    from sequoia.settings import ClassIncrementalSetting

    parser = ArgumentParser()
    ExampleMethod.add_argparse_args(parser)
    args = parser.parse_args()
    method = ExampleMethod.from_argparse_args(args)

    # Create the Setting:

    # - "Easy": Domain-Incremental MNIST Setting, useful for quick debugging, but
    #           beware that the action space is different than in class-incremental!
    #           (which is the type of Setting used in the SL track!)
    # from sequoia.settings.sl.class_incremental.domain_incremental import DomainIncrementalSetting
    # setting = DomainIncrementalSetting(
    #     dataset="mnist", nb_tasks=5, monitor_training_performance=True
    # )

    # - "Medium": Class-Incremental MNIST Setting, useful for quick debugging:
    # setting = ClassIncrementalSetting(
    #     dataset="mnist",
    #     nb_tasks=5,
    #     monitor_training_performance=True,
    #     known_task_boundaries_at_test_time=False,
    #     batch_size=32,
    #     num_workers=4,
    # )

    # - "HARD": Class-Incremental Synbols, more challenging.
    # NOTE: This Setting is very similar to the one used for the SL track of the
    # competition.
    setting = ClassIncrementalSetting(
        dataset="synbols",
        nb_tasks=12,
        known_task_boundaries_at_test_time=False,
        monitor_training_performance=True,
        batch_size=32,
        num_workers=4,
    )
    # NOTE: can also use pass a `Config` object to `setting.apply`. This object has some
    # configuration options like device, data_dir, etc.
    results = setting.apply(method, config=Config(data_dir="data"))
    print(results.summary())


================================================
FILE: examples/clcomp21/classifier_test.py
================================================
import pytest

from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.sl import ClassIncrementalSetting

from .classifier import Classifier, ExampleMethod


@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the class-incremental mnist Setting."""
    method = ExampleMethod(hparams=Classifier.HParams(max_epochs_per_task=1))
    results = mnist_setting.apply(method)
    assert results.to_log_dict()

    results: ClassIncrementalSetting.Results
    assert 0.60 <= results.average_online_performance.objective <= 1.00
    assert 0.10 <= results.average_final_performance.objective <= 0.30


@slow
@pytest.mark.timeout(300)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the Setting of the sl track of the competition."""
    method = ExampleMethod(hparams=Classifier.HParams(max_epochs_per_task=1))
    results = sl_track_setting.apply(method)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    online_perf = results.average_online_performance
    assert 0.15 <= online_perf.objective <= 0.30
    final_perf = results.average_final_performance
    assert 0.01 <= final_perf.objective <= 0.05


================================================
FILE: examples/clcomp21/conftest.py
================================================
import pytest

from sequoia.client.setting_proxy import SettingProxy
from sequoia.settings.rl import IncrementalRLSetting, TraditionalRLSetting
from sequoia.settings.sl import ClassIncrementalSetting, TaskIncrementalSLSetting


@pytest.fixture()
def mnist_setting():
    return SettingProxy(
        ClassIncrementalSetting,
        dataset="mnist",
        monitor_training_performance=True,
    )


@pytest.fixture()
def task_incremental_mnist_setting():
    return SettingProxy(
        TaskIncrementalSLSetting,
        dataset="mnist",
        monitor_training_performance=True,
    )


@pytest.fixture()
def fashion_mnist_setting():
    return SettingProxy(
        ClassIncrementalSetting,
        dataset="fashionmnist",
        monitor_training_performance=True,
    )


@pytest.fixture()
def sl_track_setting():
    setting = SettingProxy(
        ClassIncrementalSetting,
        "sl_track",
        # dataset="synbols",
        # nb_tasks=12,
        # class_order=class_order,
        # monitor_training_performance=True,
    )
    return setting


@pytest.fixture()
def cartpole_state_setting():
    setting = SettingProxy(
        TraditionalRLSetting,
        dataset="cartpole",
        train_max_steps=5_000,
        test_max_steps=2_000,
        nb_tasks=1,
    )
    return setting


@pytest.fixture()
def incremental_cartpole_state_setting():
    setting = SettingProxy(
        IncrementalRLSetting,
        dataset="cartpole",
        train_max_steps=10_000,
        nb_tasks=2,
        test_max_steps=2_000,
    )
    return setting


@pytest.fixture()
def rl_track_setting(tmp_path):
    # NOTE: Here instead of loading the `rl_track.yaml`, we create instantiate it
    # directly, because we want to reduce the length of the task for testing, and it
    # isn't currently possible to both pass a preset yaml file and also pass kwargs to
    # the SettingProxy.
    setting = SettingProxy(
        IncrementalRLSetting,
        dataset="monsterkong",
        train_task_schedule={
            0: {"level": 0},
            1: {"level": 1},
            2: {"level": 10},
            3: {"level": 11},
            4: {"level": 20},
            5: {"level": 21},
            6: {"level": 30},
            7: {"level": 31},
        },
        train_steps_per_task=2_000,  # Reduced length for testing
        test_steps_per_task=2_000,
        task_labels_at_train_time=True,
    )
    assert setting.steps_per_phase == 2000
    assert sorted(setting.train_task_schedule.keys()) == list(range(0, 16_000, 2000))
    return setting


================================================
FILE: examples/clcomp21/dummy_method.py
================================================
from typing import Optional

import gym
import numpy as np
import tqdm
from torch import Tensor

from sequoia.methods import Method
from sequoia.settings import Actions, Environment, Observations, Setting
from sequoia.settings.sl import SLSetting


class DummyMethod(Method, target_setting=Setting):
    """Dummy method that returns random actions for each observation."""

    def __init__(self):
        self.max_train_episodes: Optional[int] = None

    def configure(self, setting: Setting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        if isinstance(setting, SLSetting):
            # Being applied in SL, we will only do one 'epoch" (a.k.a. "episode").
            self.max_train_episodes = 1
        pass

    def fit(self, train_env: Environment, valid_env: Environment):
        """Example train loop.
        You can do whatever you want with train_env and valid_env here.

        NOTE: In the Settings where task boundaries are known (in this case all
        the supervised CL settings), this will be called once per task.
        """
        # configure() will have been called by the setting before we get here.
        episodes = 0
        with tqdm.tqdm(desc="training") as train_pbar:

            while not train_env.is_closed():
                for i, batch in enumerate(train_env):
                    if isinstance(batch, Observations):
                        observations, rewards = batch, None
                    else:
                        observations, rewards = batch

                    batch_size = observations.x.shape[0]

                    y_pred = train_env.action_space.sample()

                    # If we're at the last batch, it might have a different size, so w
                    # give only the required number of values.
                    if isinstance(y_pred, (np.ndarray, Tensor)):
                        if y_pred.shape[0] != batch_size:
                            y_pred = y_pred[:batch_size]

                    if rewards is None:
                        rewards = train_env.send(y_pred)

                    train_pbar.set_postfix(
                        {
                            "Episode": episodes,
                            "Step": i,
                        }
                    )
                    # train as you usually would.

                episodes += 1
                if self.max_train_episodes and episodes >= self.max_train_episodes:
                    train_env.close()
                    break

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        """Get a batch of predictions (aka actions) for these observations."""
        y_pred = action_space.sample()
        return self.target_setting.Actions(y_pred)


if __name__ == "__main__":
    from sequoia.common import Config
    from sequoia.settings import ClassIncrementalSetting

    # Create the Method:
    # - Manually:
    method = DummyMethod()

    # NOTE: This Setting is very similar to the one used for the SL track of the
    # competition.
    from sequoia.client import SettingProxy

    setting = SettingProxy(ClassIncrementalSetting, "sl_track")
    # setting = SettingProxy(ClassIncrementalSetting,
    #     dataset="synbols",
    #     nb_tasks=12,
    #     known_task_boundaries_at_test_time=False,
    #     monitor_training_performance=True,
    #     batch_size=32,
    #     num_workers=4,
    # )
    # NOTE: can also use pass a `Config` object to `setting.apply`. This object has some
    # configuration options like device, data_dir, etc.
    results = setting.apply(method, config=Config(data_dir="data"))
    print(results.summary())


================================================
FILE: examples/clcomp21/dummy_method_test.py
================================================
import pytest

from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.rl import IncrementalRLSetting
from sequoia.settings.sl import ClassIncrementalSetting

from .dummy_method import DummyMethod


@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the class-incremental mnist Setting."""
    method = DummyMethod()
    results = mnist_setting.apply(method)
    assert results.to_log_dict()

    results: ClassIncrementalSetting.Results
    assert 0.10 * 0.5 <= results.average_online_performance.objective <= 0.10 * 1.5
    assert 0.10 * 0.5 <= results.average_final_performance.objective <= 0.10 * 1.5


@slow
@pytest.mark.timeout(300)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the Setting of the sl track of the competition."""
    method = DummyMethod()
    results = sl_track_setting.apply(method)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    online_perf = results.average_online_performance
    assert 0.02 <= online_perf.objective <= 0.05
    final_perf = results.average_final_performance
    assert 0.02 <= final_perf.objective <= 0.05


@slow
@pytest.mark.timeout(300)
def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]):
    """Applies this Method to the Setting of the sl track of the competition."""
    method = DummyMethod()
    results = rl_track_setting.apply(method)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    online_perf = results.average_online_performance
    # TODO: get an estimate of the upper bound of the random method on the RL track.
    TODO = 1_000  # this is way too large.
    assert 0 < online_perf.objective < TODO
    final_perf = results.average_final_performance
    assert 0 < final_perf.objective < TODO


================================================
FILE: examples/clcomp21/multihead_classifier.py
================================================
""" Example Method for the SL track: Multi-Head Classifier with simple task inference.

You can use this model and method as a jumping off point for your own submission.
"""
from dataclasses import dataclass, replace
from logging import getLogger
from typing import ClassVar, Optional, Type

import torch
from gym import Space, spaces
from torch import Tensor, nn
from torch.nn import functional as F
from torch.optim.optimizer import Optimizer

from sequoia.settings.sl.incremental import ClassIncrementalSetting
from sequoia.settings.sl.incremental.objects import Observations

from .classifier import Classifier, ExampleMethod

logger = getLogger(__file__)


class MultiHeadClassifier(Classifier):
    @dataclass
    class HParams(Classifier.HParams):
        pass

    def __init__(
        self,
        observation_space: Space,
        action_space: spaces.Discrete,
        reward_space: spaces.Discrete,
        hparams: "MultiHeadClassifier.HParams" = None,
    ):
        super().__init__(observation_space, action_space, reward_space, hparams=hparams)
        # Use one output layer per task, rather than a single layer.
        self.output_heads = nn.ModuleList()
        # Use the output layer created in the Classifier constructor for task 0.
        self.output_heads.append(self.output)

        # NOTE: The optimizer will be set here, so that we can add the parameters of any
        # new output heads to it later.
        self.optimizer: Optional[torch.optim.Optimizer] = None
        self.current_task_id: int = 0

    def configure_optimizers(self) -> Optimizer:
        self.optimizer = super().configure_optimizers()
        return self.optimizer

    def create_output_head(self) -> nn.Module:
        return nn.Linear(self.representations_size, self.n_classes).to(self.device)

    def get_or_create_output_head(self, task_id: int) -> nn.Module:
        """Retrieves or creates a new output head for the given task index.

        Also stores it in the `output_heads`, and adds its parameters to the
        optimizer.
        """
        task_output_head: nn.Module
        if len(self.output_heads) > task_id:
            task_output_head = self.output_heads[task_id]
        else:
            logger.info(f"Creating a new output head for task {task_id}.")
            task_output_head = self.create_output_head()
            self.output_heads.append(task_output_head)
            assert self.optimizer, "need to set `optimizer` on the model."
            self.optimizer.add_param_group({"params": task_output_head.parameters()})
        return task_output_head

    def forward(self, observations: Observations) -> Tensor:
        """Smart forward pass with multi-head predictions and task inference.

        This forward pass can handle three different scenarios, depending on the
        contents of `observations.task_labels`:
        1.  Base case: task labels are present, and all examples are from the same task.
            - Perform the 'usual' forward pass (e.g. `super().forward(observations)`).
        2.  Task labels are present, and the batch contains a mix of samples from
            different tasks:
            - Create slices of the batch for each task, where all items in each
              'sub-batch' come from the same task.
            - Perform a forward pass for each task, by calling `forward` recursively
              with the sub-batch for each task as an argument (Case 1).
        3.  Task labels are *not* present. Perform some type of task inference, using
            the `task_inference_forward_pass` method. Check its docstring for more info.

        Parameters
        ----------
        observations : Observations
            Observations from an environment. As of right now, all Settings produce
            observations with (at least) the two following attributes:
            - x: Tensor (the images/inputs)
            - task_labels: Optional[Tensor] (The task labels, when available, else None)

        Returns
        -------
        Tensor
            The outputs, which in this case are the classification logits.
            All three cases above produce the same kind of outputs.
        """
        observations = observations.to(self.device)
        task_ids: Optional[Tensor] = observations.task_labels

        if task_ids is None:
            # Run the forward pass with task inference turned on.
            return self.task_inference_forward_pass(observations)

        task_ids_present_in_batch = torch.unique(task_ids)
        if len(task_ids_present_in_batch) > 1:
            # Case 2: The batch contains data from more than one task.
            return self.split_forward_pass(observations)

        # Base case: "Normal" forward pass, where all items come from the same task.
        # - Setup the model for this task, however you want, and then do a forward pass,
        # as you normally would.
        # NOTE: If you want to reuse this cool multi-headed forward pass in your
        # own model, these lines here are what you'd want to change.
        task_id: int = task_ids_present_in_batch.item()

        # <--------------- Change below ---------------->
        if task_id == self.current_task_id:
            output_head = self.output
        else:
            output_head = self.get_or_create_output_head(task_id)
        features = self.encoder(observations.x)
        logits = output_head(features)
        return logits

    def split_forward_pass(self, observations: Observations) -> Tensor:
        """Perform a forward pass for a batch of observations from different tasks.

        This is called in `forward` when there is more than one unique task label in the
        batch.
        This will call `forward` for each task id present in the batch, passing it a
        slice of the batch, in which all items are from that task.

        NOTE: This cannot cause recursion problems, because `forward`(d=2) will be
        called with a bach of items, all of which come from the same task. This makes it
        so `split_forward_pass` cannot then be called again.

        Parameters
        ----------
        observations : Observations
            Observations, in which the task labels might not all be the same.

        Returns
        -------
        Tensor
            The outputs/logits from each task, re-assembled into a single batch, with
            the task ordering from `observations` preserved.
        """
        assert observations.task_labels is not None
        # We have task labels.
        task_labels: Tensor = observations.task_labels
        unique_task_ids, inv_indices = torch.unique(task_labels, return_inverse=True)
        # There might be more than one task in the batch.
        batch_size = observations.batch_size
        assert batch_size is not None
        all_indices = torch.arange(batch_size, dtype=torch.int64, device=self.device)

        # Placeholder for the predicitons for each item in the batch.
        task_outputs = [None for _ in range(batch_size)]

        for i, task_id in enumerate(unique_task_ids):
            # Get the forward pass slice for this task.
            # Boolean 'mask' tensor, that selects entries from task `task_id`.
            is_from_this_task = inv_indices == i
            # Indices of the batch elements that are from task `task_id`.
            task_indices = all_indices[is_from_this_task]

            # Take a slice of the observations, in which all items come from this task.
            task_observations = observations[is_from_this_task]
            # Perform a "normal" forward pass (Base case).
            task_output = self.forward(task_observations)

            # Store the outputs for the items from this task.
            for i, index in enumerate(task_indices):
                task_outputs[index] = task_output[i]

        # Merge the results.
        assert all(item is not None for item in task_outputs)
        logits = torch.stack(task_outputs)
        return logits

    def task_inference_forward_pass(self, observations: Observations) -> Tensor:
        """Forward pass with a simple form of task inference."""
        # We don't have access to task labels (`task_labels` is None).
        # --> Perform a simple kind of task inference:
        # 1. Perform a forward pass with each task's output head;
        # 2. Merge these predictions into a single prediction somehow.
        assert observations.task_labels is None

        # NOTE: This assumes that the observations are batched.
        # These are used below to indicate the shape of the different tensors.
        B = observations.x.shape[0]
        T = n_known_tasks = len(self.output_heads)
        N = self.n_classes
        # Tasks encountered previously and for which we have an output head.
        known_task_ids: list[int] = list(range(n_known_tasks))
        assert known_task_ids
        # Placeholder for the predictions from each output head for each item in the
        # batch
        task_outputs = [None for _ in known_task_ids]  # [T, B, N]

        # Get the forward pass for each task.
        for task_id in known_task_ids:
            # Create 'fake' Observations for this forward pass, with 'fake' task labels.
            # NOTE: We do this so we can call `self.forward` and not get an infinite
            # recursion.
            task_labels = torch.full([B], task_id, device=self.device, dtype=int)
            task_observations = replace(observations, task_labels=task_labels)

            # Setup the model for task `task_id`, and then do a forward pass.
            task_logits = self.forward(task_observations)

            task_outputs[task_id] = task_logits

        # 'Merge' the predictions from each output head using some kind of task
        # inference.
        assert all(item is not None for item in task_outputs)
        # Stack the predictions (logits) from each output head.
        logits_from_each_head: Tensor = torch.stack(task_outputs, dim=1)
        assert logits_from_each_head.shape == (B, T, N)

        # Normalize the logits from each output head with softmax.
        # Example with batch size of 1, output heads = 2, and classes = 4:
        # logits from each head:  [[[123, 456, 123, 123], [1, 1, 2, 1]]]
        # 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]]
        probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1)
        assert probs_from_each_head.shape == (B, T, N)

        # Simple kind of task inference:
        # For each item in the batch, use the class that has the highest probability
        # accross all output heads.
        max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1)
        assert max_probs_across_heads.shape == (B, N)
        assert chosen_head_per_class.shape == (B, N)
        # Example (continued):
        # max probs across heads:        [[0.2, 0.6, 0.4, 0.2]]
        # chosen output heads per class: [[1, 0, 1, 1]]

        # Determine which output head has highest "confidence":
        max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1)
        assert max_prob_value.shape == (B,)
        assert most_probable_class.shape == (B,)
        # Example (continued):
        # max_prob_value: [0.6]
        # max_prob_class: [1]

        # A bit of boolean trickery to get what we need, which is, for each item, the
        # index of the output head that gave the most confident prediction.
        mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=self.device)
        chosen_output_head_per_item = chosen_head_per_class[mask]
        assert mask.shape == (B, N)
        assert chosen_output_head_per_item.shape == (B,)
        # Example (continued):
        # mask: [[False, True, False, True]]
        # chosen_output_head_per_item: [0]

        # Create a bool tensor to select items associated with the chosen output head.
        selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=self.device)
        assert selected_mask.shape == (B, T)
        # Select the logits using the mask:
        logits = logits_from_each_head[selected_mask]
        assert logits.shape == (B, N)
        return logits

    def on_task_switch(self, task_id: Optional[int]):
        """Executed when the task switches (to either a known or unknown task)."""
        if task_id is not None:
            # Switch the output head.
            self.current_task_id = task_id
            self.output = self.get_or_create_output_head(task_id)


class ExampleTaskInferenceMethod(ExampleMethod):

    ModelType: ClassVar[Type[Classifier]] = MultiHeadClassifier

    def __init__(self, hparams: MultiHeadClassifier.HParams = None):
        super().__init__(hparams=hparams or MultiHeadClassifier.HParams())
        self.hparams: MultiHeadClassifier.HParams

    def configure(self, setting: ClassIncrementalSetting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        self.model = MultiHeadClassifier(
            observation_space=setting.observation_space,
            action_space=setting.action_space,
            reward_space=setting.reward_space,
            hparams=self.hparams,
        )
        self.optimizer = self.model.configure_optimizers()
        # Share a reference to the Optimizer with the model, so it can add new weights
        # when needed.
        self.model.optimizer = self.optimizer

    def on_task_switch(self, task_id: Optional[int]):
        self.model.on_task_switch(task_id)

    def get_actions(self, observations, action_space):
        return super().get_actions(observations, action_space)


if __name__ == "__main__":
    # Create the Method, either manually:
    # method = ExampleTaskInferenceMethod()
    # Or, from the command-line:
    from simple_parsing import ArgumentParser

    from sequoia.settings.sl.class_incremental import (
        ClassIncrementalSetting,
        TaskIncrementalSLSetting,
    )

    parser = ArgumentParser(description=__doc__)
    ExampleTaskInferenceMethod.add_argparse_args(parser)
    args = parser.parse_args()
    method = ExampleTaskInferenceMethod.from_argparse_args(args)

    # Create the Setting:

    # Simpler Settings (useful for debugging):
    # setting = TaskIncrementalSLSetting(
    # setting = ClassIncrementalSetting(
    #     dataset="mnist",
    #     nb_tasks=5,
    #     monitor_training_performance=True,
    #     batch_size=32,
    #     num_workers=4,
    # )

    # Very similar setup to the SL Track of the competition:
    setting = ClassIncrementalSetting(
        dataset="synbols",
        nb_tasks=12,
        monitor_training_performance=True,
        known_task_boundaries_at_test_time=False,
        batch_size=32,
        num_workers=4,
    )
    results = setting.apply(method)


================================================
FILE: examples/clcomp21/multihead_classifier_test.py
================================================
import pytest

from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings import ClassIncrementalSetting, TaskIncrementalSLSetting

from .multihead_classifier import ExampleTaskInferenceMethod, MultiHeadClassifier


@pytest.mark.timeout(120)
def test_task_incremental_mnist(
    task_incremental_mnist_setting: SettingProxy[TaskIncrementalSLSetting],
):
    """Applies this Method to the class-incremental mnist Setting."""
    mnist_setting = task_incremental_mnist_setting
    method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1))
    results = mnist_setting.apply(method)
    assert results.to_log_dict()

    results: ClassIncrementalSetting.Results
    # There should be an improvement over the Method in `classifier.py`:
    assert 0.80 <= results.average_online_performance.objective <= 1.00
    assert 0.50 <= results.average_final_performance.objective <= 1.00


@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the class-incremental mnist Setting."""
    method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1))
    results = mnist_setting.apply(method)
    assert results.to_log_dict()

    results: ClassIncrementalSetting.Results
    # There should be an improvement over the Method in `classifier.py`:
    assert 0.80 <= results.average_online_performance.objective <= 1.00
    assert 0.50 <= results.average_final_performance.objective <= 1.00


@slow
@pytest.mark.timeout(600)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the Setting of the sl track of the competition."""
    method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1))
    results = sl_track_setting.apply(method)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    assert 0.30 <= results.average_online_performance.objective <= 0.50
    assert 0.02 <= results.average_final_performance.objective <= 0.05


================================================
FILE: examples/clcomp21/regularization_example.py
================================================
""" Example: Defines a new Method based on the ExampleMethod, adding an EWC-like loss to
help prevent the weights from changing too much between tasks.
"""
from copy import deepcopy
from dataclasses import dataclass
from typing import ClassVar, Dict, Optional, Tuple, Type

import gym
import torch
from torch import Tensor

from sequoia.common.hparams import uniform
from sequoia.settings import DomainIncrementalSLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards
from sequoia.utils.utils import dict_intersection
from sequoia.utils.logging_utils import get_logger

from .multihead_classifier import ExampleTaskInferenceMethod, MultiHeadClassifier

logger = get_logger(__name__)


class RegularizedClassifier(MultiHeadClassifier):
    """Adds an ewc-like penalty to the base classifier, to prevent its weights from
    shifting too much during training.
    """

    @dataclass
    class HParams(MultiHeadClassifier.HParams):
        """Hyperparameters of this improved method.

        Adds the hyper-parameters related the 'ewc-like' regularization to those of the
        ExampleMethod.

        NOTE: These `uniform()` and `log_uniform` and `HyperParameters` are just there
        to make it easier to run HPO sweeps for your Method, which isn't required for
        the competition.
        """

        # Coefficient of the ewc-like loss.
        reg_coefficient: float = uniform(0.0, 10.0, default=1.0)
        # Distance norm used in the regularization loss.
        reg_p_norm: int = 2

    def __init__(
        self,
        observation_space: gym.Space,
        action_space: gym.Space,
        reward_space: gym.Space,
        hparams: "RegularizedClassifier.HParams" = None,
    ):
        super().__init__(
            observation_space,
            action_space,
            reward_space,
            hparams=hparams,
        )
        self.reg_coefficient = self.hparams.reg_coefficient
        self.reg_p_norm = self.hparams.reg_p_norm

        self.previous_model_weights: Dict[str, Tensor] = {}

        self._previous_task: Optional[int] = None
        self._n_switches: int = 0

    def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs):
        base_loss, metrics = super().shared_step(batch, *args, **kwargs)
        ewc_loss = self.reg_coefficient * self.ewc_loss()
        metrics["ewc_loss"] = ewc_loss
        return base_loss + ewc_loss, metrics

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Executed when the task switches (to either a known or unknown task)."""
        super().on_task_switch(task_id)
        if self._previous_task is None and self._n_switches == 0:
            logger.debug("Starting the first task, no EWC update.")
        elif task_id is None or task_id != self._previous_task:
            # NOTE: We also switch between unknown tasks.
            logger.info(
                f"Switching tasks: {self._previous_task} -> {task_id}: "
                f"Updating the EWC 'anchor' weights."
            )
            self._previous_task = task_id
            self.previous_model_weights.clear()
            self.previous_model_weights.update(
                deepcopy({k: v.detach() for k, v in self.named_parameters()})
            )
        self._n_switches += 1

    def ewc_loss(self) -> Tensor:
        """Gets an 'ewc-like' regularization loss.

        NOTE: This is a simplified version of EWC where the loss is the P-norm
        between the current weights and the weights as they were on the begining
        of the task.
        """
        if self._previous_task is None:
            # We're in the first task: do nothing.
            return 0.0

        old_weights: Dict[str, Tensor] = self.previous_model_weights
        new_weights: Dict[str, Tensor] = dict(self.named_parameters())

        loss = 0.0
        for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
            loss += torch.dist(new_w, old_w.type_as(new_w), p=self.reg_p_norm)
        return loss


class ExampleRegMethod(ExampleTaskInferenceMethod):
    """Improved version of the ExampleMethod that uses a `RegularizedClassifier`."""

    HParams: ClassVar[Type[HParams]] = RegularizedClassifier.HParams

    def __init__(self, hparams: HParams = None):
        super().__init__(hparams=hparams or self.HParams.from_args())

    def configure(self, setting: DomainIncrementalSLSetting):
        # Use the improved model, with the added EWC-like term.
        self.model = RegularizedClassifier(
            observation_space=setting.observation_space,
            action_space=setting.action_space,
            reward_space=setting.reward_space,
            hparams=self.hparams,
        )
        self.optimizer = self.model.configure_optimizers()

    def on_task_switch(self, task_id: Optional[int]):
        self.model.on_task_switch(task_id)


if __name__ == "__main__":
    # Create the Method:
    # - Manually:
    # method = ExampleRegMethod()
    # - From the command-line:
    from simple_parsing import ArgumentParser

    from sequoia.common import Config
    from sequoia.settings import ClassIncrementalSetting

    parser = ArgumentParser()
    ExampleRegMethod.add_argparse_args(parser)
    args = parser.parse_args()
    method = ExampleRegMethod.from_argparse_args(args)

    # Create the Setting:

    # - "Easy": Domain-Incremental MNIST Setting, useful for quick debugging, but
    #           beware that the action space is different than in class-incremental!
    #           (which is the type of Setting used in the SL track!)
    # from sequoia.settings.sl.class_incremental.domain_incremental import DomainIncrementalSLSetting
    # setting = DomainIncrementalSLSetting(
    #     dataset="mnist", nb_tasks=5, monitor_training_performance=True
    # )

    # - "Medium": Class-Incremental MNIST Setting, useful for quick debugging:
    # setting = ClassIncrementalSetting(
    #     dataset="mnist",
    #     nb_tasks=5,
    #     monitor_training_performance=True,
    #     known_task_boundaries_at_test_time=False,
    #     batch_size=32,
    #     num_workes=4,
    # )

    # - "HARD": Class-Incremental Synbols, more challenging.
    # NOTE: This Setting is very similar to the one used for the SL track of the
    # competition.
    setting = ClassIncrementalSetting(
        dataset="synbols",
        nb_tasks=12,
        known_task_boundaries_at_test_time=False,
        monitor_training_performance=True,
        batch_size=32,
        num_workers=4,
    )

    # Run the experiment:
    results = setting.apply(method, config=Config(debug=True, data_dir="./data"))
    print(results.summary())


================================================
FILE: examples/clcomp21/regularization_example_test.py
================================================
import pytest

from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings import ClassIncrementalSetting

from .regularization_example import ExampleRegMethod, RegularizedClassifier


@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the class-incremental mnist Setting."""
    method = ExampleRegMethod(hparams=RegularizedClassifier.HParams(max_epochs_per_task=1))
    results = mnist_setting.apply(method)
    assert results.to_log_dict()

    results: ClassIncrementalSetting.Results
    # There should be an improvement over the Method in `multihead_classifier.py`:
    assert 0.80 <= results.average_online_performance.objective <= 1.00
    assert 0.30 <= results.average_final_performance.objective <= 0.50


@slow
@pytest.mark.timeout(600)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
    """Applies this Method to the Setting of the sl track of the competition."""
    method = ExampleRegMethod(hparams=RegularizedClassifier.HParams(max_epochs_per_task=1))
    results = sl_track_setting.apply(method)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    assert 0.30 <= results.average_online_performance.objective <= 0.50
    assert 0.02 <= results.average_final_performance.objective <= 0.05


================================================
FILE: examples/clcomp21/sb3_example.py
================================================
""" Example where we start from a Method from stable-baselines3 to solve the rl track.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Mapping, Optional, Type, Union

import gym
from gym import spaces
from simple_parsing import mutable_field

from sequoia.methods.stable_baselines3_methods.ppo import PPOMethod, PPOModel
from sequoia.settings.rl import ContinualRLSetting

# from stable_baselines3.ppo.policies import ActorCriticCnnPolicy, ActorCriticPolicy


class CustomPPOModel(PPOModel):
    @dataclass
    class HParams(PPOModel.HParams):
        """Hyper-parameters of the PPO Model."""


@dataclass
class CustomPPOMethod(PPOMethod):
    Model: ClassVar[Type[PPOModel]] = PPOModel
    # Hyper-parameters of the PPO Model.
    hparams: PPOModel.HParams = mutable_field(PPOModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """

    def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
        return super().get_search_space(setting)


if __name__ == "__main__":

    # Create the Setting.

    # CartPole-state for debugging:
    from sequoia.settings.rl import RLSetting

    setting = RLSetting(dataset="CartPole-v0")

    # OR: Incremental CartPole-state:
    from sequoia.settings.rl import IncrementalRLSetting

    setting = IncrementalRLSetting(
        dataset="CartPole-v0",
        monitor_training_performance=True,
        nb_tasks=1,
        train_steps_per_task=1_000,
        test_max_steps=2000,
    )

    # OR: Setting of the RL Track of the competition:
    # setting = IncrementalRLSetting.load_benchmark("rl_track")

    # Create the Method:
    method = CustomPPOMethod()

    # Apply the Method onto the Setting to get Results.
    results = setting.apply(method)
    print(results.summary())

    # BONUS: Running a hyper-parameter sweep:
    # method.hparam_sweep(setting)


================================================
FILE: examples/clcomp21/sb3_example_test.py
================================================
import pytest

from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.rl import IncrementalRLSetting, RLSetting
from sequoia.settings.sl import ClassIncrementalSetting

from .sb3_example import CustomPPOMethod, CustomPPOModel


@pytest.mark.timeout(120)
def test_cartpole_state(cartpole_state_setting: SettingProxy[RLSetting]):
    """Applies this Method to a simple cartpole-state setting."""
    method = CustomPPOMethod(hparams=CustomPPOModel.HParams(n_steps=64))
    results = cartpole_state_setting.apply(method)
    assert results.to_log_dict()

    results: RLSetting.Results
    # TODO: BUG: The SB3 method uses more than the number of steps allowed, probably
    # while filling up its buffer.
    assert 150 < results.average_final_performance.mean_episode_length


@pytest.mark.timeout(120)
def test_incremental_cartpole_state(
    incremental_cartpole_state_setting: SettingProxy[IncrementalRLSetting],
):
    """Applies this Method to the class-incremental mnist Setting."""
    method = CustomPPOMethod()
    results = incremental_cartpole_state_setting.apply(method)
    assert results.to_log_dict()

    results: ClassIncrementalSetting.Results
    # TODO: Increase this bound
    assert 5 <= results.average_online_performance.objective
    assert 5 <= results.average_final_performance.objective


@pytest.mark.timeout(300)
def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]):
    """Applies this Method to the Setting of the sl track of the competition."""
    method = CustomPPOMethod()
    results = rl_track_setting.apply(method)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    online_perf = results.average_online_performance
    # TODO: get an estimate of the upper bound of the random method on the RL track.
    assert 0 < online_perf.objective
    final_perf = results.average_final_performance
    assert 0 < final_perf.objective


================================================
FILE: examples/demo_utils.py
================================================
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Type

import pandas as pd
from simple_parsing import ArgumentParser

from sequoia.common.config import Config
from sequoia.settings import Method, Results, RLSetting, Setting, SLSetting


def demo_all_settings(
    MethodType: Type[Method],
    datasets: List[str] = ["mnist", "fashionmnist"],
    **setting_kwargs,
):
    """Evaluates the given Method on all its applicable settings.

    NOTE: Only evaluates on the mnist/fashion-mnist datasets for this demo.
    """
    # Iterate over all the applicable evaluation settings, using the default
    # options for each setting, and store the results inside this dictionary.
    all_results: Dict[Type[Setting], Dict[str, Results]] = defaultdict(dict)

    # Loop over all the types of settings this method is applicable on, i.e.
    # all the nodes in the tree below its target Setting).
    for setting_type in MethodType.get_applicable_settings():
        # Loop over all the available dataset for each setting:
        for dataset in setting_type.get_available_datasets():
            if datasets and dataset not in datasets:
                print(f"Skipping {setting_type} / {dataset} for now.")
                continue

            if issubclass(setting_type, RLSetting):
                print(f"Skipping {setting_type} (not considering RL settings for this demo).")
                continue

            # 1. Create a Method of the provided type, so we start fresh every time.
            method = MethodType()

            # 2. Create the setting
            setting = setting_type(dataset=dataset, **setting_kwargs)

            # 3. Apply the method on the setting.
            results: Results = setting.apply(method)

            print(f"Results on setting {setting_type}, dataset {dataset}:")
            print(results.summary())

            # Save the results in the dict defined above.
            all_results[setting_type][dataset] = results

    # Create a pandas dataframe with all the results:

    result_df: pd.DataFrame = make_result_dataframe(all_results)

    csv_path = Path(f"examples/results/results_{method.get_name()}.csv")
    csv_path.parent.mkdir(exist_ok=True, parents=True)
    result_df.to_csv(csv_path)
    print(f"Saved dataframe with results to path {csv_path}")

    # BONUS: Display the results in a LaTeX-formatted table!

    latex_table_path = Path(f"examples/results/table_{method.get_name()}.tex")
    caption = f"Results for method {type(method).__name__} settings."
    result_df.to_latex(
        buf=latex_table_path,
        caption=caption,
        na_rep="N/A",
        multicolumn=True,
    )
    print(f"Saved LaTeX table with results to path {latex_table_path}")

    return all_results


def make_result_dataframe(all_results):
    # Create a LaTeX table with all the results for all the settings.
    import pandas as pd

    all_settings: List[Type[Setting]] = list(all_results.keys())
    all_setting_names: List[str] = [s.get_name() for s in all_settings]

    all_datasets: List[str] = []
    for setting, dataset_to_results in all_results.items():
        all_datasets.extend(dataset_to_results.keys())
    all_datasets = list(set(all_datasets))

    ## Create a multi-index for the dataframe.
    # tuples = []
    # for setting, dataset_to_results in all_results.items():
    #     setting_name = setting.get_name()
    #     tuples.extend((setting_name, dataset) for dataset in dataset_to_results.keys())
    # tuples = sorted(list(set(tuples)))
    # multi_index = pd.MultiIndex.from_tuples(tuples, names=["setting", "dataset"])
    # single_index = pd.Index(["Objective"])
    # df = pd.DataFrame(index=multi_index, columns=single_index)

    df = pd.DataFrame(index=all_setting_names, columns=all_datasets)

    for setting_type, dataset_to_results in all_results.items():
        setting_name = setting_type.get_name()
        for dataset, result in dataset_to_results.items():
            # df["Objective"][setting_name, dataset] = result.objective
            df[dataset][setting_name] = result.objective
    return df


def compare_results(
    all_results: Dict[Type[Method], Dict[Type[Setting], Dict[str, Results]]]
) -> None:
    """Helper function, compares the results of the different methods by
    arranging them in a table (pandas dataframe).
    """
    # Make one huge dictionary that maps from:
    # <method, <setting, <dataset, result>>>
    from .demo_utils import make_comparison_dataframe

    comparison_df = make_comparison_dataframe(all_results)

    print("----- All Results -------")
    print(comparison_df)

    csv_path = Path("examples/results/comparison.csv")
    latex_path = Path("examples/results/table_comparison.tex")

    comparison_df.to_csv(csv_path)
    print(f"Saved dataframe with results to path {csv_path}")

    caption = f"Comparison of different methods on their applicable settings."
    comparison_df.to_latex(latex_path, caption=caption, multicolumn=False, multirow=False)
    print(f"Saved LaTeX table with results to path {latex_path}")


def make_comparison_dataframe(
    all_results: Dict[Type[Method], Dict[Type[Setting], Dict[str, Results]]]
) -> pd.DataFrame:
    """Helper function: takes in the dictionary with all the results and
    re-arranges it into a pandas dataframe.
    """
    # Get all the method names.
    all_methods: List[Type[Method]] = list(all_results.keys())
    all_method_names: List[str] = [m.get_name() for m in all_methods]

    # Get all the setting names.
    all_settings: List[Type[Setting]] = []
    for method_class, setting_to_dataset_to_results in all_results.items():
        all_settings.extend(setting_to_dataset_to_results.keys())
    all_settings = list(set(all_settings))
    all_setting_names: List[str] = [s.get_name() for s in all_settings]

    # Get all the dataset names.
    all_datasets: List[str] = []
    for method_class, setting_to_dataset_to_results in all_results.items():
        for setting, dataset_to_results in setting_to_dataset_to_results.items():
            all_datasets.extend(dataset_to_results.keys())
    all_datasets = list(set(all_datasets))

    # Create the a multi-index, so we can later index df[setting, datset][method]
    # Option 1: All [settings x all datasets]
    # iterables = [all_setting_names, all_datasets]
    # columns = pd.MultiIndex.from_product(iterables, names=["setting", "dataset"])

    # Option 2: Index will be [Setting, <datasets in that setting>]
    # Create the column index using the tuples that apply.
    tuples = []
    for method_class, setting_to_dataset_to_results in all_results.items():
        for setting, dataset_to_results in setting_to_dataset_to_results.items():
            setting_name = setting.get_name()
            tuples.extend((setting_name, dataset) for dataset in dataset_to_results.keys())
    tuples = sorted(list(set(tuples)))
    multi_index = pd.MultiIndex.from_tuples(tuples, names=["setting", "dataset"])
    single_index = pd.Index(all_method_names, name="Method")

    df = pd.DataFrame(index=multi_index, columns=single_index)

    for method_class, setting_to_dataset_to_results in all_results.items():
        method_name = method_class.get_name()
        for setting, dataset_to_results in setting_to_dataset_to_results.items():
            setting_name = setting.get_name()
            for dataset, result in dataset_to_results.items():
                df[method_name][setting_name, dataset] = result.objective
    return df


================================================
FILE: examples/prerequisites/dataclasses_example.py
================================================
""" Example describing dataclasses and how simple-parsing can be used to create
command-line arguments from them.
"""

from dataclasses import dataclass


@dataclass
class Point:
    x: float = 1.2
    y: float = 4.5

    # This generates the following method (among others):
    # def __init__(self, x: float = 1.2, y: float = 4.5):
    #     self.x = x
    #     self.y = y


if __name__ == "__main__":
    p1 = Point(0, 0)
    print(p1)
    expected = "Point(x=0, y=0)"

#
# Second example: HyperParameters with simple-parsing:
#

from simple_parsing import ArgumentParser
from simple_parsing.helpers import choice


@dataclass
class HParams:
    """Hyper-Parameters of my model."""

    # Learning rate.
    learning_rate: float = 3e-4
    # L2 regularization coefficient.
    weight_decay: float = 1e-6
    # Choice of optimizer
    optimizer: str = choice("adam", "sgd", "rmsprop", default="sgd")


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_arguments(HParams, "hparams")
    parser.print_help()
    import textwrap

    expected += textwrap.dedent(
        """\
        usage: dataclasses_example.py [-h] [--learning_rate float]
                                      [--weight_decay float]
                                      [--optimizer {adam,sgd,rmsprop}]

        optional arguments:
          -h, --help            show this help message and exit

        HParams ['hparams']:
          Hyper-Parameters of my model.

          --learning_rate float, --hparams.learning_rate float
                                Learning rate. (default: 0.0003)
          --weight_decay float, --hparams.weight_decay float
                                L2 regularization coefficient. (default: 1e-06)
          --optimizer {adam,sgd,rmsprop}, --hparams.optimizer {adam,sgd,rmsprop}
                                Choice of optimizer (default: sgd)
        """
    )

    args = parser.parse_args("")
    hparams: HParams = args.hparams
    print(hparams)
    expected += """\
    HParams(learning_rate=0.0003, weight_decay=1e-06, optimizer='sgd')
    """


================================================
FILE: mypy.ini
================================================
# Global options:

[mypy]
python_version = 3.7
warn_return_any = True
warn_unused_configs = True
follow_imports = normal

================================================
FILE: pytest.ini
================================================
[pytest]
timeout = 30
testpaths =
    sequoia
    examples
addopts =
    --doctest-modules
norecursedirs =
    methods/d3rlpy_methods
    settings/offline_rl
    examples/advances/procgen_example


================================================
FILE: requirements.txt
================================================
# Fork of gym with more flexible utility functions.
gym @ git+https://www.github.com/openai/gym@8819d561132082f6130d4a2388c68a963f41ec4f#egg=gym
# nngeometry module used in the EWC method
nngeometry @ git+https://github.com/oleksost/nngeometry.git#egg=nngeometry
# Temporary fix for issue#128
pyyaml!=5.4.*,>=5.1
simple_parsing==0.1.2.post1
# matplotlib==3.2.2
matplotlib
# NOTE: @lebrice: PyTorch suddenly got really picky about type annotations in 1.9.0 for
# some reason, and they really don't do a great job at evaluating them, so removing it
# for now.
torch==1.8.1
torchvision==0.9.1
scikit-learn
tqdm
continuum==1.0.19
# Only required for the current demo:
wandb
plotly
pandas
# Only for python < 3.8
singledispatchmethod;python_version<'3.8'
# NOTE: PyTorch-Lightning version 1.4.0 is "working" but raises lots of warnings.
pytorch-lightning==1.5.9
lightning-bolts==0.5.0
# Requirements for running tests:
pytest-timeout
pytest-xdist
pytest-xvfb # Prevents the gym popups from displaying during tests.
# Required for the RL methods
pyvirtualdisplay
# Required for the synbols dataset to work. 
h5py


================================================
FILE: scripts/eai/cancel_all_queuing.sh
================================================
all_ids=$(eai job ls --state queuing -c "$1" --fields id --no-header)
for id in $all_ids
do
  eai job kill $id
done

================================================
FILE: scripts/eai/cancel_all_running.sh
================================================
all_ids=$(eai job ls --state running  -c "$1" --fields id --no-header)
for id in $all_ids
do
  eai job kill $id
done

================================================
FILE: scripts/eai/job.sh
================================================
#!/bin/bash
set -o errexit    # Used to exit upon error, avoiding cascading errors
set -o errtrace    # Show error trace
set -o pipefail   # Unveils hidden failures
# set -o nounset    # Exposes unset variables

# Get organization name
ORG_NAME=$(eai organization get --field name)
# Get account name
ACCOUNT_NAME=$(eai account get --field name)
ACCOUNT_ID=$ORG_NAME.$ACCOUNT_NAME

EAI_Registry=${EAI_Registry:-"registry.console.elementai.com/$ACCOUNT_ID"}
echo "Using registry $EAI_Registry"

CURRENT_BRANCH="`git branch --show-current`"
BRANCH=${BRANCH:-$CURRENT_BRANCH}
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}

echo "Building eai-specific container for branch $BRANCH"

if [ "$NO_BUILD" ]; then
    echo "skipping build."
else
    echo "building"
    # TODO: There is something wrong here: How can they possibly build their job, if
    # they don't have the eai dockerfile?
    source dockers/eai/build.sh
fi

# The image we're using is going to be called sequoai_eai:$BRANCH, and will have been
# pushed to the user's eai registry.

eai job submit \
    --restartable \
    --data $ACCOUNT_ID.home:/mnt/home \
    --data $ACCOUNT_ID.data:/mnt/data \
    --data $ACCOUNT_ID.results:/mnt/results \
    --env WANDB_API_KEY="$WANDB_API_KEY" \
    --env HOME=/home/toolkit \
    --image $EAI_Registry/sequoia_eai:$BRANCH \
    --gpu 1 --cpu 8 --mem 12 \
    -- "$@"


# eai job submit \
#     --restartable \
#     --data $ACCOUNT_ID.home:/mnt/home \
#     --data $ACCOUNT_ID.data:/mnt/data \
#     --data $ACCOUNT_ID.results:/mnt/results \
#     --env WANDB_API_KEY="$WANDB_API_KEY" \
#     --env HOME=/home/toolkit \
#     --image $EAI_Registry/sequoia_eai:$BRANCH \
#     --gpu 1 --cpu 8 --mem 12 --gpu-model-filter 12gb \
#     -- "$@"


================================================
FILE: scripts/eai/rl_sweep.sh
================================================
#!/bin/bash
set -o errexit  # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset  # Exposes unset variables
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}

source dockers/eai/build.sh

export NO_BUILD=1

# Number of runs per combination.
MAX_RUNS=20
PROJECT="crl_study"

SETTINGS=(
    "continual_rl"
    "discrete_task_agnostic_rl"
    "incremental_rl"
    "task_incremental_rl"
    "multi_task_rl"
    "traditional_rl"
)
METHODS=(
    "ppo"
    "a2c"
    "dqn"
    "ddpg"
    "sac"
    "td3"
    "baseline"
    "methods.ewc"
)
BENCHMARKS=(
    "cartpole"
    "monsterkong_mix"
    "mountaincar_continuous"
)
# "half_cheetah"

for METHOD in "${METHODS[@]}"; do
    for SETTING in "${SETTINGS[@]}"; do
        for BENCHMARK in "${BENCHMARKS[@]}"; do
            # Share the trials from different datasets, hopefully reusing something?
            DATABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl"
            scripts/eai/job.sh sequoia_sweep \
                --max_runs $MAX_RUNS --database_path $DATABASE_PATH \
                --setting $SETTING --benchmark $BENCHMARK --project $PROJECT \
                --method $METHOD \
                "$@"
        done
    done
done

# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar10  --project csl_study --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar100 --project csl_study --nb_tasks 20 --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset synbols  --project csl_study --nb_tasks 12 --method baseline


================================================
FILE: scripts/eai/shell_job.sh
================================================
#!/bin/bash
set -o errexit    # Used to exit upon error, avoiding cascading errors
set -o errtrace    # Show error trace
# set -o pipefail   # Unveils hidden failures
# set -o nounset    # Exposes unset variables

# Get organization name
ORG_NAME=$(eai organization get --field name)
# Get account name
ACCOUNT_NAME=$(eai account get --field name)
ACCOUNT_ID=$ORG_NAME.$ACCOUNT_NAME

EAI_Registry=registry.console.elementai.com/$ACCOUNT_ID

CURRENT_BRANCH="`git branch --show-current`"
BRANCH=${BRANCH:-$CURRENT_BRANCH}

existing_interactive_job_id=`eai job ls  --state alive --fields id,interactive | grep true | awk '{print $1}'`
if [ $existing_interactive_job_id ]; then
    echo "Found existing interactive job, with id $existing_interactive_job_id"
    eai job kill $existing_interactive_job_id
    echo "Sleeping for 5 seconds, just to give the job a chance to change its status."
    sleep 5
fi;


if [ "$NO_BUILD" ]; then
    echo "skipping build."
else
    echo "building"
    # TODO: There is something wrong here: How can they possibly build their job, if
    # they don't have the eai dockerfile?
    source dockers/eai/build.sh
fi

# The image we're using is going to be called sequoai_eai:$BRANCH, and will have been
# pushed to the user's eai registry.

eai job submit \
    --interactive \
    --data $ACCOUNT_ID.home:/mnt/home \
    --data $ACCOUNT_ID.data:/mnt/data \
    --data $ACCOUNT_ID.results:/mnt/results \
    --env WANDB_API_KEY="$WANDB_API_KEY" \
    --env HOME=/home/toolkit \
    --image $EAI_Registry/sequoia_eai:$BRANCH \
    --gpu 1 --cpu 8 --mem 12 --gpu-model-filter 12gb


================================================
FILE: scripts/eai/sl_sweep.sh
================================================
#!/bin/bash
set -o errexit  # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset  # Exposes unset variables
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}

source dockers/eai/build.sh

export NO_BUILD=1

# Number of runs per combination.
MAX_RUNS=20
PROJECT="csl_study"

SETTINGS=(
    "continual_sl"
    "discrete_task_agnostic_sl"
    "incremental_sl"
    "task_incremental_sl"
    "multi_task_sl"
    "traditional_sl"
)
METHODS=(
    # "random_baseline"
    "gdumb"
    "agem"
    "ar1"
    "cwr_star"
    "gem"
    "lwf"
    "replay"
    "synaptic_intelligence"
    "avalanche.ewc"
    "baseline"
    "methods.ewc"
    "experience_replay"
    "hat"
    "pnn"
)
DATASETS=(
    "synbols --nb_tasks 12"
    "cifar10"
    "cifar100 --nb_tasks 10"
    "mnist"
)

for METHOD in "${METHODS[@]}"; do
    for SETTING in "${SETTINGS[@]}"; do
        for DATASET in "${DATASETS[@]}"; do
            # Share the trials from different datasets, hopefully reusing something?
            DABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl"
            scripts/eai/job.sh sequoia_sweep \
                --max_runs $MAX_RUNS --database_path $DABASE_PATH \
                --setting $SETTING --dataset $DATASET --project $PROJECT \
                --method $METHOD --monitor_training_performance True \
                "$@"
        done
    done
done

# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar10  --project csl_study --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar100 --project csl_study --nb_tasks 20 --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset synbols  --project csl_study --nb_tasks 12 --method baseline


================================================
FILE: scripts/slurm/launch_many_sweeps.sh
================================================
#!/bin/bash
set -o errexit  # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset  # Exposes unset variables
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}

module load anaconda/3
conda activate sequoia

cd ~/Sequoia
pip install -e .[hpo,monsterkong]

# Number of runs per combination.
MAX_RUNS=20
PROJECT="csl_study"

SETTINGS=("class_incremental" "task_incremental" "multi_task" "iid")
METHODS=(
    "gdumb" "random_baseline" "pnn" "agem"
    "ar1" "cwr_star" "gem" "gdumb" "lwf" "replay" "synaptic_intelligence"
    "avalanche.ewc" "methods.ewc" "experience_replay" "hat" "baseline"
)
DATASETS=(
    "synbols --nb_tasks 12"
    "cifar10"
    "cifar100 --nb_tasks 10"
    "mnist"
)

for METHOD in "${METHODS[@]}"; do
    for SETTING in "${SETTINGS[@]}"; do
        for DATASET in "${DATASETS[@]}"; do
            # Share the trials from different datasets, hopefully reusing something?
            DABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl"
            scripts/slurm/sweep.sh \
                --max_runs $MAX_RUNS --database_path $DABASE_PATH \
                --setting $SETTING --dataset $DATASET --project $PROJECT \
                --WANDB_API_KEY $WANDB_API_KEY \
                --method $METHOD \
                "$@"
        done
    done
done


================================================
FILE: scripts/slurm/run.sh
================================================
#!/bin/bash
#SBATCH --array=0-3%2
#SBATCH --cpus-per-task=2
#SBATCH --gres=gpu:1
#SBATCH --mem=10GB
#SBATCH --time=11:59:00

module load anaconda/3
conda activate sequoia

cd ~/Sequoia
pip install -e .[hpo,monsterkong,avalanche]

sequoia --data_dir $SLURM_TMPDIR "$@"


================================================
FILE: scripts/slurm/sweep.sh
================================================
#!/bin/bash
#SBATCH --array=0-10%2
#SBATCH --cpus-per-task=2
#SBATCH --gres=gpu:1
#SBATCH --mem=10GB
#SBATCH --time=11:59:00
set -o errexit    # Used to exit upon error, avoiding cascading errors
set -o errtrace    # Show error trace
set -o pipefail   # Unveils hidden failures

module load anaconda/3
conda activate sequoia
cd ~/Sequoia

# TODO: Set data_dir in Config to `DATA_DIR` as a priority, and then as SLURM_TMPDIR/DATA (not just SLURM_TMPDIR!)
cp -r data $SLURM_TMPDIR/

export DATA_DIR=$SLURM_TMPDIR/data

#pip install -e .[hpo,monsterkong,avalanche]


# TODO: Change the setting, the number of tasks, the method, etc.
/home/mila/n/normandf/.conda/envs/sequoia/bin/sequoia_sweep --data_dir $SLURM_TMPDIR/data "$@"


================================================
FILE: sequoia/README.md
================================================
# sequoia

## Packages:
- [settings](settings): definitions for the settings (machine learning problems).
- [methods](methods): Contains the methods (which can be applied to settings).
- [common](common): utilities such as metrics, transforms, layers, gym wrappers configuration classes, etc. that are used by Settings and Methods.
- [utils](utils): miscelaneous utility functions (logging, command-line parsing, etc)
- [experiments](experiments): Command-line interface entry-points, via the `Experiment` class.
- [client (wip)](client): defines a proxy to a Setting and its environments, in order to further isolate the Method and Setting from each other (used for the CLVision competition). 


================================================
FILE: sequoia/__init__.py
================================================
""" Sequoia - The Research Tree """
from ._version import get_versions
from .settings import Environment, Method, Setting

# from .experiments import Experiment

__version__ = get_versions()["version"]
del get_versions


================================================
FILE: sequoia/_version.py
================================================
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.

# This file is released into the public domain. Generated by
# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)

"""Git implementation of _version.py."""

import errno
import os
import re
import subprocess
import sys


def get_keywords():
    """Get the keywords needed to look up the version information."""
    # these strings will be replaced by git during git-archive.
    # setup.py/versioneer.py will grep for the variable names, so they must
    # each be defined on a line of their own. _version.py will just call
    # get_keywords().
    git_refnames = "$Format:%d$"
    git_full = "$Format:%H$"
    git_date = "$Format:%ci$"
    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
    return keywords


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""


def get_config():
    """Create, populate and return the VersioneerConfig() object."""
    # these strings are filled in when 'setup.py versioneer' creates
    # _version.py
    cfg = VersioneerConfig()
    cfg.VCS = "git"
    cfg.style = "pep440-post"
    cfg.tag_prefix = "v"
    cfg.parentdir_prefix = "sequoia-"
    cfg.versionfile_source = "sequoia/_version.py"
    cfg.verbose = False
    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


LONG_VERSION_PY = {}
HANDLERS = {}


def register_vcs_handler(vcs, method):  # decorator
    """Create decorator to mark a method as the handler of a VCS."""

    def decorate(f):
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
        HANDLERS[vcs][method] = f
        return f

    return decorate


def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
    """Call the given command(s)."""
    assert isinstance(commands, list)
    p = None
    for c in commands:
        try:
            dispcmd = str([c] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            p = subprocess.Popen(
                [c] + args,
                cwd=cwd,
                env=env,
                stdout=subprocess.PIPE,
                stderr=(subprocess.PIPE if hide_stderr else None),
            )
            break
        except EnvironmentError:
            e = sys.exc_info()[1]
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %s" % dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %s" % (commands,))
        return None, None
    stdout = p.communicate()[0].strip().decode()
    if p.returncode != 0:
        if verbose:
            print("unable to run %s (error)" % dispcmd)
            print("stdout was %s" % stdout)
        return None, p.returncode
    return stdout, p.returncode


def versions_from_parentdir(parentdir_prefix, root, verbose):
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for i in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {
                "version": dirname[len(parentdir_prefix) :],
                "full-revisionid": None,
                "dirty": False,
                "error": None,
                "date": None,
            }
        else:
            rootdirs.append(root)
            root = os.path.dirname(root)  # up a level

    if verbose:
        print(
            "Tried directories %s but none started with prefix %s"
            % (str(rootdirs), parentdir_prefix)
        )
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords = {}
    try:
        f = open(versionfile_abs, "r")
        for line in f.readlines():
            if line.strip().startswith("git_refnames ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["refnames"] = mo.group(1)
            if line.strip().startswith("git_full ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["full"] = mo.group(1)
            if line.strip().startswith("git_date ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["date"] = mo.group(1)
        f.close()
    except EnvironmentError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
    """Get version information from git keywords."""
    if not keywords:
        raise NotThisMethod("no keywords at all, weird")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = set([r.strip() for r in refnames.strip("()").split(",")])
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = set([r for r in refs if re.search(r"\d", r)])
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
        print("likely tags: %s" % ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix) :]
            if verbose:
                print("picking %s" % r)
            return {
                "version": r,
                "full-revisionid": keywords["full"].strip(),
                "dirty": False,
                "error": None,
                "date": date,
            }
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {
        "version": "0+unknown",
        "full-revisionid": keywords["full"].strip(),
        "dirty": False,
        "error": "no suitable tags",
        "date": None,
    }


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = run_command(
        GITS,
        ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix],
        cwd=root,
    )
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[: git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
        if not mo:
            # unparseable. Maybe git-describe is misbehaving?
            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%s' doesn't start with prefix '%s'"
                print(fmt % (full_tag, tag_prefix))
            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix) :]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
        pieces["distance"] = int(count_out)  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def plus_or_dot(pieces):
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces):
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_pre(pieces):
    """TAG[.post0.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += ".post0.dev%d" % pieces["distance"]
    else:
        # exception #1
        rendered = "0.post0.dev%d" % pieces["distance"]
    return rendered


def render_pep440_post(pieces):
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
    return rendered


def render_pep440_old(pieces):
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces):
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces):
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces, style):
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {
            "version": "unknown",
            "full-revisionid": pieces.get("long"),
            "dirty": None,
            "error": pieces["error"],
            "date": None,
        }

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%s'" % style)

    return {
        "version": rendered,
        "full-revisionid": pieces["long"],
        "dirty": pieces["dirty"],
        "error": None,
        "date": pieces.get("date"),
    }


def get_versions():
    """Get version information or return default if unable to do so."""
    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
    # __file__, we can work backwards from there to the root. Some
    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
    # case we can only use expanded keywords.

    cfg = get_config()
    verbose = cfg.verbose

    try:
        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
    except NotThisMethod:
        pass

    try:
        root = os.path.realpath(__file__)
        # versionfile_source is the relative path from the top of the source
        # tree (where the .git directory might live) to this file. Invert
        # this to find the root from __file__.
        for i in cfg.versionfile_source.split("/"):
            root = os.path.dirname(root)
    except NameError:
        return {
            "version": "0+unknown",
            "full-revisionid": None,
            "dirty": None,
            "error": "unable to find root of source tree",
            "date": None,
        }

    try:
        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
        return render(pieces, cfg.style)
    except NotThisMethod:
        pass

    try:
        if cfg.parentdir_prefix:
            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
    except NotThisMethod:
        pass

    return {
        "version": "0+unknown",
        "full-revisionid": None,
        "dirty": None,
        "error": "unable to compute version",
        "date": None,
    }


================================================
FILE: sequoia/client/README.md
================================================
# (WIP) Sequoia Client

This is only currently used for the competition. The idea is that the setting (and its environments) are isolated from the user (the 'client'), in order to prevent any modifications / hacking of the environment.


================================================
FILE: sequoia/client/__init__.py
================================================
from .env_proxy import EnvironmentProxy
from .setting_proxy import SettingProxy


================================================
FILE: sequoia/client/__main__.py
================================================
""" TODO: launch the 'sequoia gRPC server' at a given address / port. """
import argparse

from .server import server

if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--ip", type=str, help="gRPC host ip", default="")
    parser.add_argument("-p", "--port", type=int, help="gRPC port", default=13337)
    args = parser.parse_args()

    server(
        grpc_host=args.ip,
        grpc_port=args.port,
    )


================================================
FILE: sequoia/client/env.proto
================================================
syntax = "proto3";
// Adapted from https://github.com/AppliedDeepLearning/gymx/blob/master/gymx/env.proto

enum SettingType {
  CLASS_INCREMENTAL = 0;
  TASK_INCREMENTAL = 1;
  CONTINUAL_RL = 2;
  INCREMENTAL_RL = 3;
}

service Environment {
  rpc Make (Name) returns (Info) {};
  rpc Reset (Empty) returns (Observation) {};
  rpc Step (Action) returns (Transition) {};
}

message Name {
  string value = 1;
}

message Info {
  repeated int32 observation_shape = 1;
  int32 num_actions = 2;
  int32 max_episode_steps = 3;
}

message Action {
  int32 value = 1;
}

message Observation {
  repeated float data = 1;
  repeated int32 shape = 2;
}

message Transition {
  Observation observation = 1;
  float reward = 2;
  Observation next_episode = 3;
}

message Empty {}

================================================
FILE: sequoia/client/env_proxy.py
================================================
"""TODO: Create an 'environment proxy' that relays observations / actions etc from a remote environment via gRPC.

For now this simply holds the 'remote' environment in memory.
"""
from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union

import numpy as np
from torch import Tensor

from sequoia.common.metrics import Metrics
from sequoia.settings import (
    Actions,
    ActionType,
    Environment,
    Observations,
    ObservationType,
    Results,
    Rewards,
    RewardType,
    Setting,
)

MISSING = object()


class EnvironmentProxy(Environment[ObservationType, ActionType, RewardType]):
    def __init__(self, env_fn, setting_type: Type[Setting]):
        # TODO: Actually interact with a given environment of the remote Setting
        # TODO: env_fn is just a callable that returns the actual env now, but the idea
        # is that it would perhaps be a handle/address/whatever which we could contact?
        self.__environment = env_fn()
        # TODO: Remove this if possible
        self._environment_type = type(self.__environment)
        self._setting_type = setting_type

        self.observation_space = self.get_attribute("observation_space")
        self.action_space = self.get_attribute("action_space")

        # NOTE: We don't define the `reward_space` attribute if the underlying env
        # doesnt have it.
        missing = object()
        reward_space = self.get_attribute("reward_space", default=missing)
        if reward_space is not missing:
            self.reward_space = reward_space

        # TODO: Double check this also works for RL
        batch_size = self.get_attribute("batch_size", default=missing)
        if batch_size is not missing:
            self.batch_size: Optional[int] = batch_size

    def get_attribute(self, name: str, default: Any = MISSING) -> Any:
        if default is MISSING:
            # TODO: actually get the value from the 'remote' env.
            return getattr(self.__environment, name)
        else:
            return getattr(self.__environment, name, default)

    def reset(self) -> ObservationType:
        obs = self.__environment.reset()
        return obs

    def __len__(self) -> int:
        return self.__environment.__len__()

    def step(
        self, actions: ActionType
    ) -> Tuple[
        ObservationType,
        RewardType,
        Union[bool, Sequence[bool]],
        Union[Dict, Sequence[Dict]],
    ]:
        # Simulate converting things to a pickleable object?
        if isinstance(actions, Actions):
            actions = actions.numpy()
        actions_pkl = actions
        # TODO: Use some kind of gRPC endpoint.
        observations_pkl, rewards_pkl, done_pkl, info_pkl = self.__environment.step(actions_pkl)
        if isinstance(observations_pkl, (Observations, dict)):
            observations = self._setting_type.Observations(**observations_pkl)
        else:
            observations = observations_pkl
        if isinstance(rewards_pkl, (Rewards, dict)):
            rewards = self._setting_type.Rewards(**rewards_pkl)
        else:
            rewards = rewards_pkl
        done = np.array(done_pkl)
        info = np.array(info_pkl)
        return observations, rewards, done, info

    def __iter__(self):
        return self.__environment.__iter__()

    def __next__(self) -> ObservationType:
        return self.__environment.__next__()

    def send(self, actions: ActionType):
        if isinstance(actions, Actions):
            actions = actions.y_pred
        if isinstance(actions, Tensor):
            actions = actions.cpu().numpy()
        actions_pkl = actions
        rewards_pkl = self.__environment.send(actions_pkl)
        if isinstance(rewards_pkl, (Rewards, dict)):
            rewards = self._setting_type.Rewards(**rewards_pkl)
        else:
            rewards = rewards_pkl
        return rewards

    def close(self):
        self.__environment.close()

    @property
    def is_closed(self) -> bool:
        return self.get_attribute("is_closed")

    def render(self, *args, **kwargs):
        return self.__environment.render(*args, **kwargs)

    def get_results(self) -> Results:
        return self.__environment.get_results()

    def get_online_performance(self) -> List[Metrics]:
        return self.__environment.get_online_performance()

    def get_average_online_performance(self) -> Metrics:
        return self.__environment.get_average_online_performance()

    def __getattr__(self, name: str):
        if name.startswith("_"):
            raise AttributeError(f"attempted to get missing private attribute '{name}'")
        return self.get_attribute(name)


================================================
FILE: sequoia/client/env_proxy_test.py
================================================
import platform
from functools import partial
from typing import ClassVar, Iterable, Tuple, Type, TypeVar

import gym
import numpy as np
import psutil
import pytest
from torch import Tensor
from torchvision.datasets import MNIST

from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from sequoia.common.gym_wrappers.env_dataset_test import TestEnvDataset as _TestEnvDataset
from sequoia.common.gym_wrappers.utils import is_proxy_to
from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms
from sequoia.settings.assumptions import IncrementalAssumption
from sequoia.settings.rl.continual.environment import GymDataLoader
from sequoia.settings.rl.continual.environment_test import TestGymDataLoader as _TestGymDataLoader
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.environment_test import TestPassiveEnvironment as _TestPassiveEnvironment

from .env_proxy import EnvironmentProxy

# Note: import with underscores so we don't re-run those tests again.

EnvType = TypeVar("EnvType", bound=gym.Env, covariant=True)


def wrap_type_with_proxy(env_type: Type[EnvType]) -> EnvType:
    class _EnvProxy(EnvironmentProxy):
        def __init__(self, *args, **kwargs):
            env_fn = partial(env_type, *args, **kwargs)
            super().__init__(env_fn, setting_type=IncrementalAssumption)

    return _EnvProxy


ProxyEnvDataset = wrap_type_with_proxy(EnvDataset)
ProxyPassiveEnvironment = wrap_type_with_proxy(PassiveEnvironment)
ProxyGymDataLoader = wrap_type_with_proxy(GymDataLoader)


class TestEnvironmentProxy(_TestEnvDataset, _TestPassiveEnvironment, _TestGymDataLoader):
    # IDEA: Reuse the tests for the EnvDataset, but using a proxy to the environment
    # instead.
    EnvDataset: ClassVar[Type[EnvDataset]] = ProxyEnvDataset

    # IDEA: Reuse the tests for the PassiveEnvironment, but using a proxy to the env.
    PassiveEnvironment: ClassVar[Type[PassiveEnvironment]] = ProxyPassiveEnvironment

    # Reuse the tests for the Gym DataLoader, using a proxy to the loader instead.
    GymDataLoader: ClassVar[Type[GymDataLoader]] = ProxyGymDataLoader


def test_sanity_check():
    env = ProxyEnvDataset(gym.make("CartPole-v0"))
    assert isinstance(env, EnvironmentProxy)
    assert issubclass(type(env), EnvironmentProxy)


@pytest.mark.parametrize("use_wrapper", [False, True])
def test_is_proxy_to(use_wrapper: bool):
    import numpy as np

    from sequoia.common.transforms import Compose, Transforms

    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
    from torchvision.datasets import MNIST

    from sequoia.common.spaces import Image

    batch_size = 32
    dataset = MNIST("data", transform=transforms)
    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)

    env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment
    env: Iterable[Tuple[Tensor, Tensor]] = env_type(
        dataset,
        batch_size=batch_size,
        n_classes=10,
        observation_space=obs_space,
    )
    if use_wrapper:
        assert isinstance(env, EnvironmentProxy)
        assert issubclass(type(env), EnvironmentProxy)
        assert is_proxy_to(env, PassiveEnvironment)
    else:
        assert not is_proxy_to(env, PassiveEnvironment)


# TODO: Write a test that first reproduces issue #204 and then check that removing
# `self.__environment.reset()` from __iter__ fixed it.


@pytest.mark.skipif(
    platform.system() != "Linux",
    reason="Not sure this would work the same on non-Linux systems.",
)
def test_issue_204():
    """Test that reproduces the issue #204, which was that some zombie processes
    appeared to be created when iterating using an EnvironmentProxy.

    The issue appears to have been caused by calling `self.__environment.reset()` in
    `__iter__`, which I think caused another dataloader iterator to be created?
    """
    transforms = Compose([Transforms.to_tensor, Transforms.three_channels])

    batch_size = 2048
    num_workers = 12

    dataset = MNIST("data", transform=transforms)
    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)

    current_process = psutil.Process()
    print(
        f"Current process is using {current_process.num_threads()} threads, with "
        f" {len(current_process.children(recursive=True))} child processes."
    )
    starting_threads = current_process.num_threads()
    starting_processes = len(current_process.children(recursive=True))

    for use_wrapper in [False, True]:

        threads = current_process.num_threads()
        processes = len(current_process.children(recursive=True))
        assert threads == starting_threads
        assert processes == starting_processes

        env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment
        env: Iterable[Tuple[Tensor, Tensor]] = env_type(
            dataset,
            batch_size=batch_size,
            n_classes=10,
            observation_space=obs_space,
            num_workers=num_workers,
            persistent_workers=True,
        )
        for i, _ in enumerate(env):
            threads = current_process.num_threads()
            processes = len(current_process.children(recursive=True))
            assert threads == starting_threads + num_workers
            assert processes == starting_processes + num_workers
            print(
                f"Current process is using {threads} threads, with "
                f" {processes} child processes."
            )

        for i, _ in enumerate(env):
            threads = current_process.num_threads()
            processes = len(current_process.children(recursive=True))
            assert threads == starting_threads + num_workers
            assert processes == starting_processes + num_workers
            print(
                f"Current process is using {threads} threads, with "
                f" {processes} child processes."
            )

        obs = env.reset()
        done = False
        while not done:
            obs, reward, done, info = env.step(env.action_space.sample())

            # env.render(mode="human")

            threads = current_process.num_threads()
            processes = len(current_process.children(recursive=True))
            if not done:
                assert threads == starting_threads + num_workers
                assert processes == starting_processes + num_workers
                print(
                    f"Current process is using {threads} threads, with "
                    f" {processes} child processes."
                )

        env.close()

        import time

        # Need to give it a second (or so) to cleanup.
        time.sleep(1)

        threads = current_process.num_threads()
        processes = len(current_process.children(recursive=True))
        assert threads == starting_threads
        assert processes == starting_processes


def test_interaction_with_test_environment():
    # IDEA: Maybe write tests for the 'test' environments, and see that they work even
    # through the proxy?
    pass


================================================
FILE: sequoia/client/server.py
================================================
def server(grpc_host: str, grpc_port: int):
    raise NotImplementedError(f"TODO")


================================================
FILE: sequoia/client/setting_proxy.py
================================================
import time
import warnings
from functools import partial
from logging import getLogger
from pathlib import Path
from typing import Any, Callable, Dict, Generic, List, Optional, Type, TypeVar

import gym
import numpy as np

from sequoia.common.config import Config
from sequoia.methods import Method
from sequoia.settings import ClassIncrementalSetting, IncrementalRLSetting, Results, Setting
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import SettingABC

from .env_proxy import EnvironmentProxy

logger = getLogger(__file__)

# IDEA: Dict that indicates for each setting, which attributes are *NOT* writeable.
_readonly_attributes: Dict[Type[Setting], List[str]] = {
    ClassIncrementalSetting: ["test_transforms"],
    IncrementalRLSetting: ["test_transforms"],
}
# IDEA: Dict that indicates for each setting, which attributes are *NOT* readable.
_hidden_attributes: Dict[Type[Setting], List[str]] = {
    ClassIncrementalSetting: ["test_class_order"],
    IncrementalRLSetting: ["test_task_schedule", "test_wrappers"],
}

SettingType = TypeVar("SettingType", bound=Setting)


class SettingProxy(SettingABC, Generic[SettingType]):
    """Proxy for a Setting.

    TODO: Creating the Setting locally for now, but we'd spin-up or contact a gRPC
    service" that would have at least the following endpoints:

    - get_attribute(name: str) -> Any:
        returns the attribute from the setting, if that attribute can be read.

    - set_attribute(name: str, value: Any) -> bool:
        Sets the given attribute to the given value, if that is allowed.

    - train_dataloader()
    - val_dataloader()
    - test_dataloader()
    """

    # NOTE: Using __slots__ so we can detect errors if Method tries to set non-existent
    # attribute on the SettingProxy.
    # TODO: I don't think this has any effect, because we subclass SettingABC which
    # doesn't use __slots__.
    __slots__ = ["__setting", "_setting_type", "_train_env", "_val_env", "_test_env"]

    def __init__(
        self,
        setting_type: Type[SettingType],
        setting_config_path: Path = None,
        **setting_kwargs,
    ):
        self._setting_type = setting_type
        self.__setting: SettingType
        if setting_config_path:
            self.__setting = setting_type.load_benchmark(setting_config_path)
            if setting_kwargs:
                raise RuntimeError(
                    "Can't use keyword arguments when passing a path to a yaml file!"
                )
        else:
            self.__setting = setting_type(**setting_kwargs)
        self.__setting.monitor_training_performance = True
        super().__init__()

        self._train_env = None
        self._val_env = None
        self._test_env = None

    @property
    def observation_space(self) -> gym.Space:
        self.set_attribute("train_transforms", self.train_transforms)
        return self.get_attribute("observation_space")

    @property
    def action_space(self) -> gym.Space:
        return self.get_attribute("action_space")

    @property
    def reward_space(self) -> gym.Space:
        return self.get_attribute("reward_space")

    @property
    def train_env(self) -> EnvironmentProxy:
        return self._train_env

    @property
    def val_env(self) -> EnvironmentProxy:
        return self._val_env

    @property
    def test_env(self) -> EnvironmentProxy:
        if not self._is_readable("test_env"):
            raise RuntimeError("You don't have access to the test_env attribute!")
        return self._setting_type.test_env(self)

    @test_env.setter
    def test_env(self, value) -> None:
        if not self._is_writeable("test_env"):
            raise RuntimeError("You don't have access to the test_env attribute!")
        self.__setting.test_env = value

    def _temp_make_readable(self, attribute: str) -> None:
        """Temporarily makes an attribute readable."""
        # if attribute in _hidden_attributes:

    @property
    def config(self) -> Config:
        return self.get_attribute("config")

    @config.setter
    def config(self, value: Config) -> None:
        self.set_attribute("config", value)

    def prepare_data(self, *args, **kwargs):
        self.__setting.prepare_data(*args, **kwargs)

    def setup(self, stage: str = None):
        self.__setting.setup(stage=stage)

    def get_name(self):
        return self.__setting.get_name()

    def _is_readable(self, attribute: str) -> bool:
        if self._setting_type in _hidden_attributes:
            key = self._setting_type
        else:
            for parent_setting_type in self._setting_type.get_parents():
                if parent_setting_type in _hidden_attributes:
                    key = parent_setting_type
                    break
            else:
                return True
        return attribute not in _hidden_attributes[key]

    def _is_writeable(self, attribute: str) -> bool:
        if self._setting_type in _readonly_attributes:
            key = self._setting_type
        else:
            for parent_setting_type in self._setting_type.get_parents():
                if parent_setting_type in _readonly_attributes:
                    key = parent_setting_type
                    break
            else:
                return True
        return attribute not in _readonly_attributes[key]

    @property
    def batch_size(self) -> Optional[int]:
        return self.get_attribute("batch_size")

    @batch_size.setter
    def batch_size(self, value: Optional[int]) -> None:
        self.set_attribute("batch_size", value)

    @property
    def train_transforms(self) -> List[Callable]:
        return self.__setting.train_tansforms

    @train_transforms.setter
    def train_transforms(self, value: List[Callable]):
        self.__setting.train_transforms = value

    @property
    def val_transforms(self) -> List[Callable]:
        return self.__setting.val_tansforms

    @val_transforms.setter
    def val_transforms(self, value: List[Callable]):
        self.__setting.val_transforms = value

    @property
    def test_transforms(self) -> List[Callable]:
        return self.__setting.test_tansforms

    @test_transforms.setter
    def test_transforms(self, value: List[Callable]):
        self.__setting.test_transforms = value

    def apply(self, method: Method, config: Config = None) -> Results:
        # TODO: Figure out where the 'config' should be defined?
        method.configure(setting=self)
        self.config = self._setup_config(method)
        # TODO: Not sure if the method is changing the train_transforms.
        # Run the Main loop.
        self.Observations = self._setting_type.Observations
        self.Actions = self._setting_type.Actions
        self.Rewards = self._setting_type.Rewards

        if hasattr(self._setting_type, "TestEnvironment"):
            self.TestEnvironment = self._setting_type.TestEnvironment
        # results = self._setting_type.apply(self, method, config=config)

        results: Results = self.main_loop(method)
        logger.info(f"Results objective: {results.objective}")
        logger.info(results.summary())
        method.receive_results(self, results=results)
        return results

    def get_attribute(self, name: str) -> Any:
        value = getattr(self.__setting, name)
        if value is None:
            return value
        if not isinstance(value, (int, str, bool, np.ndarray, gym.Space, list)):
            warnings.warn(
                RuntimeWarning(
                    f"TODO: Attribute {name} has a value of type {type(value)}, which "
                    f"wouldn't necessarily be easy to transfer with gRPC. This could "
                    f"mean that we need to implement this on the proxy itself. "
                )
            )
        return value

    def set_attribute(self, name: str, value: Any) -> None:
        return setattr(self.__setting, name, value)

    def train_dataloader(self, batch_size: int = None, num_workers: int = None) -> EnvironmentProxy:
        # TODO: Faking this 'remote-ness' for now:
        return EnvironmentProxy(
            env_fn=partial(
                self.__setting.train_dataloader,
                batch_size=batch_size,
                num_workers=num_workers,
            ),
            setting_type=self._setting_type,
        )

        batch_size = batch_size if batch_size is not None else self.get_attribute("batch_size")
        num_workers = num_workers if num_workers is not None else self.get_attribute("num_workers")
        if self._train_env:
            self._train_env.close()
            del self._train_env

        self._train_env = EnvironmentProxy(
            env_fn=partial(
                self.__setting.train_dataloader,
                batch_size=batch_size,
                num_workers=num_workers,
            ),
            setting_type=self._setting_type,
        )
        return self._train_env

    def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> EnvironmentProxy:
        return EnvironmentProxy(
            env_fn=partial(
                self.__setting.val_dataloader,
                batch_size=batch_size,
                num_workers=num_workers,
            ),
            setting_type=self._setting_type,
        )

        if self._val_env:
            self._val_env.close()
            del self._val_env

        self._val_env = EnvironmentProxy(
            env_fn=partial(
                self._setting_type.val_dataloader,
                self,
                batch_size=batch_size,
                num_workers=num_workers,
            ),
            setting_type=self._setting_type,
        )
        return self._val_env

    def test_dataloader(self, batch_size: int = None, num_workers: int = None):
        # TODO: Get the caller, and if it's 'internal' to sequoia then let it through.
        # raise RuntimeError("You don't have access to the test_dataloader method!")
        return EnvironmentProxy(
            env_fn=partial(
                self.__setting.test_dataloader,
                batch_size=batch_size,
                num_workers=num_workers,
            ),
            setting_type=self._setting_type,
        )
        # return EnvironmentProxy(
        #     partial(self._setting_type.test_dataloader, self, batch_size=batch_size, num_workers=num_workers),
        #     setting_type=self._setting_type,
        # )

    def __test_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> EnvironmentProxy:

        batch_size = batch_size if batch_size is not None else self.get_attribute("batch_size")
        num_workers = num_workers if num_workers is not None else self.get_attribute("num_workers")
        if self._test_env:
            self._test_env.close()
            del self._test_env
        self._test_env = EnvironmentProxy(
            env_fn=partial(
                self.__setting.test_dataloader,
                batch_size=batch_size,
                num_workers=num_workers,
            ),
            setting_type=self._setting_type,
        )
        return self._test_env

    def main_loop(self, method: Method) -> Results:
        # TODO: Implement the 'remote' equivalent of the main loop of the IncrementalAssumption.

        # test_results = self._setting_type.Results()
        method.set_training()

        dataset: str = self.get_attribute("dataset")
        nb_tasks = self.get_attribute("nb_tasks")
        known_task_boundaries_at_train_time: bool = self.get_attribute(
            "known_task_boundaries_at_train_time"
        )
        task_labels_at_train_time: bool = self.get_attribute("task_labels_at_train_time")

        # Send the train / val transforms to the 'remote' env.
        self.set_attribute("train_transforms", self.train_transforms)
        self.set_attribute("val_transforms", self.val_transforms)
        self.Results = self._setting_type.Results

        # TODO: Can we avoid duplicating the main loop here?
        # test_results = self.__setting.main_loop(method)
        # test_results._objective_scaling_factor = (
        #     0.01 if dataset.startswith("MetaMonsterKong") else 1.0
        # )
        test_results = self._setting_type.main_loop(self, method=method)
        start_time = time.process_time()

        # for task_id in range(nb_tasks):
        #     logger.info(
        #         f"Starting training" + (f" on task {task_id}." if nb_tasks > 1 else ".")
        #     )
        #     self.set_attribute("_current_task_id", task_id)

        #     if known_task_boundaries_at_train_time:
        #         # Inform the model of a task boundary. If the task labels are
        #         # available, then also give the id of the new task to the
        #         # method.
        #         # TODO: Should we also inform the method of wether or not the
        #         # task switch is occuring during training or testing?
        #         if not hasattr(method, "on_task_switch"):
        #             logger.warning(
        #                 UserWarning(
        #                     f"On a task boundary, but since your method doesn't "
        #                     f"have an `on_task_switch` method, it won't know about "
        #                     f"it! "
        #                 )
        #             )
        #         elif not task_labels_at_train_time:
        #             method.on_task_switch(None)
        #         else:
        #             # NOTE: on_task_switch won't be called if there is only one "task",
        #             # (as-in one task in a 'sequence' of tasks).
        #             # TODO: in multi-task RL, i.e. RLSetting(dataset=..., nb_tasks=10),
        #             # for instance, then there are indeed 10 tasks, but `self.tasks`
        #             # is used here to describe the number of 'phases' in training and
        #             # testing.
        #             if nb_tasks > 1:
        #                 method.on_task_switch(task_id)

        #     task_train_loader = self.train_dataloader()
        #     task_valid_loader = self.val_dataloader()
        #     success = method.fit(
        #         train_env=task_train_loader, valid_env=task_valid_loader,
        #     )
        #     task_train_loader.close()
        #     task_valid_loader.close()

        #     test_results._online_training_performance.append(
        #         task_train_loader.get_online_performance()
        #     )

        #     test_loop_results = self.test_loop(method)
        #     test_results.append(test_loop_results)

        #     logger.info(f"Finished Training on task {task_id}.")

        runtime = time.process_time() - start_time
        test_results._runtime = runtime
        return test_results

    def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
        """(WIP): Runs an incremental test loop and returns the Results.

        The idea is that this loop should be exactly the same, regardless of if
        you're on the RL or the CL side of the tree.

        NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the
        method has the `on_task_switch` callback defined, then a callback
        wrapper is added that will invoke the method's `on_task_switch` and pass
        it the task id (or `None` if `not self.task_labels_available_at_test_time`)
        when a task boundary is encountered.

        This `on_task_switch` 'callback' wrapper gets added the same way for
        Supervised or Reinforcement learning settings.
        """
        nb_tasks = self.get_attribute("nb_tasks")
        known_task_boundaries_at_test_time = self.get_attribute(
            "known_task_boundaries_at_test_time"
        )
        # TODO: Always setting this to False for now.
        task_labels_at_test_time = self.get_attribute("task_labels_at_test_time")
        if task_labels_at_test_time:
            warnings.warn(
                RuntimeWarning("no task labels at test time for now when using a SettingProxy")
            )
        # TODO: Avoid duplicating the test loop here?
        test_results = self.__setting.test_loop(method=method)

        # was_training = method.training
        # method.set_testing()
        # test_env = self.__test_dataloader()

        # if known_task_boundaries_at_test_time and nb_tasks > 1:
        #     # TODO: We need to have a way to inform the Method of task boundaries, if the
        #     # Setting allows it.
        #     # Not sure how to do this. It might be simpler to just do something like
        #     # `obs, rewards, done, info, task_switched = <endpoint>.step(actions)`?
        #     # # Add this wrapper that will call `on_task_switch` when the right step is
        #     # # reached.
        #     # test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch])
        #     pass

        # obs = test_env.reset()
        # batch_size = test_env.batch_size
        # max_steps: int = self.get_attribute("test_steps") // (batch_size or 1)

        # # Reset on the last step is causing trouble, since the env is closed.
        # pbar = tqdm.tqdm(itertools.count(), total=train_max_steps, desc="Test")
        # episode = 0
        # for step in pbar:
        #     if test_env.is_closed():
        #         logger.debug(f"Env is closed")
        #         break

        #     # BUG: This doesn't work if the env isn't batched.
        #     action_space = test_env.action_space
        #     batch_size = getattr(
        #         test_env, "num_envs", getattr(test_env, "batch_size", 0)
        #     )
        #     env_is_batched = batch_size is not None and batch_size >= 1
        #     if env_is_batched:
        #         # NOTE: Need to pass an action space that actually reflects the batch
        #         # size, even for the last batch!
        #         obs_batch_size = obs.x.shape[0] if obs.x.shape else None
        #         action_space_batch_size = (
        #             test_env.action_space.shape[0]
        #             if test_env.action_space.shape
        #             else None
        #         )
        #         if (
        #             obs_batch_size is not None
        #             and obs_batch_size != action_space_batch_size
        #         ):
        #             action_space = batch_space(
        #                 test_env.single_action_space, obs_batch_size
        #             )

        #     action = method.get_actions(obs, action_space)

        #     # logger.debug(f"action: {action}")
        #     obs, reward, done, info = test_env.step(action)

        #     # TODO: Add something to `info` that indicates when a task boundary is
        #     # reached, so that we can call the `on_task_switch` method on the Method
        #     # ourselves.

        #     if done and not test_env.is_closed():
        #         # logger.debug(f"end of test episode {episode}")
        #         obs = test_env.reset()
        #         episode += 1

        # test_env.close()
        # test_results = test_env.get_results()

        # if was_training:
        #     method.set_training()

        return test_results

    # NOTE: Was experimenting with the idea of allowing the regular getattr and setattr
    # to forward calls to the remote. In the end I think it's better to explicitly
    # prevent any of these from happening.

    def __getattr__(self, name: str):
        # NOTE: This only ever gets called if the attribute was not found on the
        if self._is_readable(name):
            print(f"Accessing missing attribute {name} from the 'remote' setting.")
            return self.get_attribute(name)
        raise AttributeError(
            f"Attribute {name} is either not present on the setting, or not marked as " f"readable!"
        )

    # def __setattr__(self, name: str, value: Any) -> None:
    #     # Weird pytorch-lightning stuff:
    #     logger.debug(f"__setattr__ called for attribute {name}")
    #     if name in {"_setting_type", "__setting"}:
    #         assert name not in self.__dict__, f"Can't change attribute {name}"
    #         object.__setattr__(self, name, value)

    #     elif self._is_writeable(name):
    #         logger.info(f"Setting attribute {name} on the 'remote' setting.")
    #         self.set_attribute(name, value)
    #     else:
    #         raise AttributeError(f"Attribute {name} is marked as read-only!")


================================================
FILE: sequoia/client/setting_proxy_test.py
================================================
"""TODO: Tests for the SettingProxy.

"""
from functools import partial
from typing import ClassVar, Type

import numpy as np
import pytest
from gym import spaces

from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.common.spaces import Image, Sparse
from sequoia.common.transforms import Transforms
from sequoia.conftest import slow
from sequoia.methods.base_method import BaseMethod
from sequoia.methods.method_test import key_fn
from sequoia.methods.random_baseline import RandomBaselineMethod
from sequoia.settings import Setting, all_settings
from sequoia.settings.rl import IncrementalRLSetting, TaskIncrementalRLSetting
from sequoia.settings.rl.continual.setting import ContinualRLSetting
from sequoia.settings.rl.continual.setting_test import (
    TestContinualRLSetting as ContinualRLSettingTests,
)
from sequoia.settings.sl import ClassIncrementalSetting, DomainIncrementalSLSetting
from sequoia.settings.sl.continual.setting import ContinualSLSetting
from sequoia.settings.sl.continual.setting_test import (
    TestContinualSLSetting as ContinualSLSettingTests,
)

from .setting_proxy import SettingProxy


@pytest.mark.parametrize("setting_type", sorted(all_settings, key=key_fn))
def test_spaces_match(setting_type: Type[Setting]):
    setting = setting_type()
    s_proxy = SettingProxy(setting_type)
    assert s_proxy.observation_space == setting.observation_space
    assert s_proxy.action_space == setting.action_space
    assert s_proxy.reward_space == setting.reward_space


def test_transforms_get_propagated():
    for setting in [
        TaskIncrementalRLSetting(dataset="MetaMonsterKong-v0"),
        SettingProxy(TaskIncrementalRLSetting, dataset="MetaMonsterKong-v0"),
    ]:
        assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
        setting.transforms.append(Transforms.to_tensor)
        setting.transforms.append(Transforms.resize_32x32)
        # TODO: The observation space doesn't update directly in RL whenever the
        # transforms are changed.
        assert setting.observation_space.x == Image(0, 1, shape=(3, 32, 32))
        assert setting.train_dataloader().reset().x.shape == (3, 32, 32)


class TestContinualSLSettingProxy(ContinualSLSettingTests):
    Setting: ClassVar[Type[Setting]] = partial(SettingProxy, ContinualSLSetting)


class TestContinualRLSettingProxy(ContinualRLSettingTests):
    Setting: ClassVar[Type[Setting]] = partial(SettingProxy, ContinualRLSetting)


@pytest.mark.timeout(30)
def test_random_baseline(config):
    method = RandomBaselineMethod()
    setting = SettingProxy(DomainIncrementalSLSetting, config=config)
    results = setting.apply(method, config=config)
    # domain incremental mnist: 2 classes per task -> chance accuracy of 50%.
    assert 0.45 <= results.objective <= 0.55


@pytest.mark.timeout(180)
def test_random_baseline_rl():
    method = RandomBaselineMethod()
    setting = SettingProxy(
        IncrementalRLSetting,
        dataset="monsterkong",
        monitor_training_performance=True,
        # observe_state_directly=False, ## TODO: Make sure this doesn't change anything.
        train_steps_per_task=1_000,
        test_steps_per_task=1_000,
        train_task_schedule={
            0: {"level": 0},
            1: {"level": 1},
            2: {"level": 10},
            3: {"level": 11},
            4: {"level": 0},
        },
        # Interesting problem: Will it always do at least an entire episode here per
        # env?
        # batch_size=2,
        # num_workers=0,
    )
    assert setting.train_max_steps == 4_000
    assert setting.test_max_steps == 4_000
    results: IncrementalRLSetting.Results[EpisodeMetrics] = setting.apply(method)
    assert 20 <= results.average_final_performance.mean_reward_per_episode


@pytest.mark.timeout(120)
def test_random_baseline_SL_track():
    method = RandomBaselineMethod()
    setting = SettingProxy(ClassIncrementalSetting, dataset="synbols", nb_tasks=12)
    results = setting.apply(method)
    assert 1 / 48 * 0.5 <= results.objective <= 1 / 48 * 1.5


@slow
@pytest.mark.timeout(300)
def test_baseline_SL_track(config):
    """Applies the BaseMethod on something ressembling the SL track of the
    competition.
    """
    method = BaseMethod(max_epochs=1)
    import numpy as np

    class_order = np.random.permutation(48).tolist()
    setting = SettingProxy(
        ClassIncrementalSetting,
        dataset="synbols",
        nb_tasks=12,
        class_order=class_order,
    )
    results = setting.apply(method, config)
    assert results.to_log_dict()

    # TODO: Add tests for having a different ordering of test tasks vs train tasks.
    results: ClassIncrementalSetting.Results
    online_perf = results.average_online_performance
    assert 0.30 <= online_perf.objective <= 0.65
    final_perf = results.average_final_performance
    assert 0.02 <= final_perf.objective <= 0.06


def test_rl_track_setting_is_correct():
    setting = SettingProxy(
        IncrementalRLSetting,
        "rl_track",
    )
    assert setting.nb_tasks == 8
    assert setting.dataset == "MetaMonsterKong-v0"
    assert setting.observation_space == spaces.Dict(
        x=Image(0, 1, (3, 64, 64), dtype=np.float32),
        task_labels=Sparse(spaces.Discrete(8)),
    )
    assert setting.action_space == spaces.Discrete(6)
    # TODO: The reward range of the MetaMonsterKongEnv is (0, 50), which seems wrong.
    # This isn't really a big deal though.
    # assert setting.reward_space == spaces.Box(0, 100, shape=(), dtype=np.float32)
    assert setting.steps_per_task == 200_000
    assert setting.test_steps_per_task == 10_000
    assert setting.known_task_boundaries_at_train_time is True
    assert setting.known_task_boundaries_at_test_time is False
    assert setting.monitor_training_performance is True
    assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]
    assert setting.val_transforms == [Transforms.to_tensor, Transforms.three_channels]
    assert setting.test_transforms == [Transforms.to_tensor, Transforms.three_channels]

    train_env = setting.train_dataloader()
    assert train_env.observation_space == spaces.Dict(
        x=Image(0, 1, (3, 64, 64), dtype=np.float32),
        task_labels=spaces.Discrete(8),
    )
    assert train_env.reset() in train_env.observation_space

    valid_env = setting.val_dataloader()
    assert valid_env.observation_space == spaces.Dict(
        x=Image(0, 1, (3, 64, 64), dtype=np.float32),
        task_labels=spaces.Discrete(8),
    )

    # IDEA: Prevent submissions from calling the test_dataloader method or accessing the
    # test_env / test_dataset property?
    with pytest.raises(RuntimeError):
        test_env = setting.test_dataloader()
        test_env.reset()

    with pytest.raises(RuntimeError):
        test_env = setting.test_env
        test_env.reset()


def test_sl_track_setting_is_correct():
    setting = SettingProxy(
        ClassIncrementalSetting,
        "sl_track",
    )
    assert setting.nb_tasks == 12
    assert setting.dataset == "synbols"
    assert setting.observation_space == spaces.Dict(
        x=Image(0, 1, (3, 32, 32), dtype=np.float32),
        task_labels=spaces.Discrete(12),
    )
    assert setting.n_classes_per_task == 4
    assert setting.action_space == spaces.Discrete(48)
    assert setting.reward_space == spaces.Discrete(48)
    assert setting.known_task_boundaries_at_train_time is True
    assert setting.known_task_boundaries_at_test_time is False
    assert setting.monitor_training_performance is True
    assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]
    assert setting.val_transforms == [Transforms.to_tensor, Transforms.three_channels]
    assert setting.test_transforms == [Transforms.to_tensor, Transforms.three_channels]


================================================
FILE: sequoia/common/__init__.py
================================================
from .batch import Batch
from .config import Config
from .loss import Loss
from .metrics import ClassificationMetrics, Metrics, RegressionMetrics, get_metrics
from .spaces import Sparse


================================================
FILE: sequoia/common/batch.py
================================================
""" WIP (@lebrice): Playing around with the idea of using a typed object to
represent the different forms of "batches" that settings produce and that
different models expect.
"""
import dataclasses
import itertools
from abc import ABC
from collections import namedtuple
from dataclasses import dataclass
from functools import partial, singledispatch
from typing import (
    Any,
    Callable,
    ClassVar,
    Dict,
    Iterable,
    Iterator,
    KeysView,
    List,
    Mapping,
    NamedTuple,
    Optional,
    Tuple,
    Type,
    TypeVar,
    Union,
)

import gym
import numpy as np
import torch
from torch import Tensor

from sequoia.utils.logging_utils import get_logger

try:
    from functools import singledispatchmethod  # type: ignore
except ImportError:
    from singledispatchmethod import singledispatchmethod  # type: ignore

logger = get_logger(__name__)

B = TypeVar("B", bound="Batch", covariant=True)
T = TypeVar("T", Tensor, np.ndarray, "Batch")
V = TypeVar("V")


def hasmethod(obj: Any, method_name: str) -> bool:
    return hasattr(obj, method_name) and callable(getattr(obj, method_name))


@dataclass(frozen=True, eq=False)
class Batch(ABC, Mapping[str, T]):
    """Abstract base class for typed, immutable objects holding tensors.

    Can be used as an immutable dictionary mapping from strings to tensors, or
    as a tuple if you index with an integer.
    Also has some Tensor-like helper methods like `to()`, `numpy()`, `detach()`,
    etc.

    Other features:
    - numpy-style indexing/slicing/masking
    - moving all items between devices
    - changing the dtype of all tensors
    - detaching all tensors
    - Convertign all tensors to numpy arrays
    - convertible to a tuple or a dict

    NOTE: Using dataclasses rather than namedtuples, because those aren't really
    meant to be subclassed, so we couldn't use them to make the 'Observations'
    hierarchy, for instance.
    Dataclasses work better for that purpose.

    Examples:

    >>> import torch
    >>> from typing import Optional
    >>> from dataclasses import dataclass

    >>> @dataclass(frozen=True)
    ... class MyBatch(Batch):
    ...     x: Tensor
    ...     y: Tensor = None

    >>> batch = MyBatch(x=torch.ones([10, 3, 32, 32]), y=torch.arange(10))
    >>> batch.shapes
    {'x': torch.Size([10, 3, 32, 32]), 'y': torch.Size([10])}
    >>> batch.batch_size
    10
    >>> batch.dtypes
    {'x': torch.float32, 'y': torch.int64}
    >>> batch.dtype # No shared dtype, so dtype returns None.
    >>> batch.float().dtype # Converting the all items to float dtype:
    torch.float32

    Device-related methods:


    >>> from dataclasses import dataclass
    >>> import torch
    >>> from torch import Tensor

    >>> @dataclass(frozen=True)
    ... class Observations(Batch):
    ...     x: Tensor
    ...     task_labels: Tensor
    ...     done: Tensor
    ...
    >>> # Example: observations from two gym environments (e.g. VectorEnv)
    >>> observations = Observations(
    ...     x = torch.arange(10).reshape([2, 5]),
    ...     task_labels = torch.arange(2, dtype=int),
    ...     done = torch.zeros(2, dtype=bool),
    ... )

    >>> observations.shapes
    {'x': torch.Size([2, 5]), 'task_labels': torch.Size([2]), 'done': torch.Size([2])}
    >>> observations.batch_size
    2

    Datatypes:

    >>> observations.dtypes
    {'x': torch.int64, 'task_labels': torch.int64, 'done': torch.bool}
    >>> observations.dtype # No shared dtype, so dtype returns None.
    >>> observations.float().dtype # Converting the all items to float dtype:
    torch.float32


    Returns the device common to all items, or None:

    >>> observations.device
    device(type='cpu')
    >>> # observations.to("cuda").device
    >>> # device(type='cuda', index=0)

    >>> observations[0]
    tensor([[0, 1, 2, 3, 4],
            [5, 6, 7, 8, 9]])

    Additionally, when slicing a Batch across the first dimension, you get
    other typed objects as a result! For example:

    >>> observations[:, 0]
    Observations(x=tensor([0, 1, 2, 3, 4]), task_labels=tensor(0), done=tensor(False))

    >>> observations[:, 1]
    Observations(x=tensor([5, 6, 7, 8, 9]), task_labels=tensor(1), done=tensor(False))
    """

    # TODO: Would it make sense to add a gym Space class variable here?
    space: ClassVar[Optional[gym.Space]]
    # TODO: Remove these:
    field_names: ClassVar[List[str]]
    _namedtuple: ClassVar[Type[NamedTuple]]

    def __init_subclass__(cls, *args, **kwargs):
        # IDEA: By not marking 'Batch' a dataclass, we would let the subclass
        # decide it if wants to be frozen or not!

        # Subclasses of `Batch` should be dataclasses!
        if not dataclasses.is_dataclass(cls):
            raise RuntimeError(f"{__class__} subclass {cls} must be a dataclass!")
        super().__init_subclass__(*args, **kwargs)

    def __post_init__(self):
        # Create some class attributes, if they don't already exist.
        # TODO: We have to set these here because __init_subclass__ is called
        # before the dataclasses package sets the 'fields' attribute, it seems.
        cls = type(self)
        if "field_names" not in cls.__dict__:
            type(self).field_names = [f.name for f in dataclasses.fields(self)]
        # Create a NamedTuple type for this new subclass.
        if "_named_tuple" not in cls.__dict__:
            type(self)._namedtuple = namedtuple(type(self).__name__ + "Tuple", self.field_names)

    def __iter__(self) -> Iterator[str]:
        """Yield the 'keys' of this object, i.e. the names of the fields."""
        return iter(self.field_names)

    def __len__(self) -> int:
        """Returns the number of fields."""
        return len(self.field_names)

    def __eq__(self, other: Union["Batch", Any]) -> bool:
        # Not sure this is useful.
        return NotImplemented

        if not isinstance(other, Batch):
            return NotImplemented
        if type(self) != type(other):
            # Not allowing these sorts of comparisons.
            return NotImplemented
        items_equal = {k: v == other[k] for k, v in self.items()}
        return all(
            is_equal.all() if isinstance(is_equal, (Tensor, np.ndarray)) else is_equal
            for is_equal in items_equal.values()
        )

    @singledispatchmethod
    def __getitem__(self, index: Any) -> T:
        """Select a subset of the fields of this object. Can also be indexed
        with tuples, boolean numpy arrays or tensors, as well as None.
        """
        raise KeyError(index)

    @__getitem__.register(type(None))
    def _getitem_none(self, index: None) -> "Batch":
        """Indexing with 'None' gives back a copy with all the items having an
        extra batch dimension.
        """
        return self.with_batch_dimension()
        return getattr(self, index)

    @__getitem__.register
    def _getitem_by_name(self, index: str) -> Union[Tensor, Any]:
        return getattr(self, index)

    @__getitem__.register
    def _getitem_by_index(self, index: int) -> Union[Tensor, Any]:
        return getattr(self, self.field_names[index])

    @__getitem__.register(slice)
    def _getitem_with_slice(self, index: slice) -> "Batch":
        # NOTE: I don't think it would be a good idea to support slice indexing,
        # as it could be confusing and give the user the impression that it
        # is slicing into the tensors, rather than into the fields.
        # I guess this might be doable, but is it really useful?
        raise NotImplementedError("Batch objects don't support indexing with (just) slices atm.")
        if index == slice(None, None, None) or index == slice(0, len(self), 1):
            return self

    @__getitem__.register(type(Ellipsis))
    def _(self: B, index) -> B:
        return self

    @__getitem__.register(np.ndarray)
    @__getitem__.register(Tensor)
    def _getitem_with_array(self, index: np.ndarray) -> B:
        """
        NOTE: Indexing with just an array uses the array as a 'mask' on all
        fields, instead of indexing the "keys" of this object.
        """
        assert len(index) == self.batch_size
        return self[:, index]

    @__getitem__.register(tuple)
    def _getitem_with_tuple(self, index: Tuple[Union[slice, Tensor, np.ndarray, int], ...]):
        """When slicing with a tuple, if the first item is an integer, we get
        the attribute at that index and slice it with the rest.
        For now, the first item in the tuple can only be either an int or an
        empty slice.
        """
        if len(index) <= 1:
            raise IndexError(
                f"Invalid index {index}: When indexing with "
                f"tuples or lists, they need to have len > 1."
            )
        field_index = index[0]
        item_index = index[1:]
        # if len(item_index) == 1:
        #     item_index = item_index[0]

        if isinstance(field_index, int):
            # logger.debug(f"Getting the {field_index}'th field, with slice {index[1:]}")
            return self[field_index][item_index]

        # e.g: forward_pass[:, 1]
        if isinstance(field_index, slice):
            if field_index == slice(None):
                # logger.debug(f"Indexing all fields {field_index} with index: {item_index}")
                return type(self)(
                    **{
                        key: (
                            value[index]
                            if isinstance(value, Batch)
                            else value[item_index]
                            if value is not None
                            else None
                        )
                        for key, value in self.items()
                    }
                )

        # batch[..., 0] : Not sure this would really be that helpful.
        if field_index == Ellipsis:
            logger.debug(f"Using ellipsis (...) as the field index?")
            return type(self)(
                **{
                    key: value[Ellipsis, item_index] if value is not None else None
                    for key, value in self.items()
                }
            )

        raise NotImplementedError(
            f"Only support tuple indexing with emptyslices or int as first "
            f"tuple item for now. (index={index})"
        )

    def slice(self: B, index: Union[int, slice, np.ndarray, Tensor]) -> B:
        """Gets a slice across the first (batch) dimension.
        Raises an error if there is no batch size.

        Always returns an object with a batch dimension, even when `index` has len of 1.
        """
        if not isinstance(index, (int, slice, np.ndarray, Tensor)):
            raise NotImplementedError(f"can't slice with index {index}")

        # BUG: By putting a 'None' value in the ForwardPass
        def getitem_if_val_is_not_none(val, index):
            if val is None:
                return None
            return val[index]

        sliced_value = self._map(partial(getitem_if_val_is_not_none, index=index), recursive=True)
        if isinstance(index, int):
            sliced_value = sliced_value.with_batch_dimension()
        return sliced_value
        # return type(self)(**{
        #     k: v.slice(index) if isinstance(v, Batch) else
        #     v[index] if v is not None else None
        #     for k, v in self.items()
        # })

    def __setitem__(self, index: Union[int, str], value: Any):
        """Set a value in slices of one or more of the fields.

        NOTE: Since this class is marked as frozen, we can't change the
        attributes, so the index should be a tuple (to change parts of the
        tensors, for instance.
        """
        if not isinstance(index, tuple) or len(index) < 2:
            raise NotImplementedError("index needs to be tuple with len >= 2")
        # Get which keys/fields were selected:
        selected_fields = np.array(self.field_names)[index[0]]
        for selected_field in selected_fields:
            item = self[selected_field]
            if item is not None:
                item[index[1:]] = value

    def keys(self) -> KeysView[str]:
        return KeysView(self.field_names)

    def values(self) -> Tuple[T, ...]:
        return self.as_namedtuple()

    def items(self) -> Iterable[Tuple[str, T]]:
        for name in self.field_names:
            yield name, getattr(self, name)

    @property
    def devices(self) -> Dict[str, Union[Optional[torch.device], Dict]]:
        """Dict from field names to their device if they have one, else None.

        If `self` has `Batch` fields, the values for those will be dicts.
        """
        return {
            k: v.devices if isinstance(v, Batch) else getattr(v, "device", None)
            for k, v in self.items()
        }

    @property
    def device(self) -> Optional[torch.device]:
        """Returns the device common to all items, or `None`.

        Returns
        -------
        Tuple[Optional[torch.device]]
            None if the devices are unknown/different, or the common device.
        """
        device: Optional[torch.device] = None
        # TODO: These kinds of methods can't discriminate between a child item
        # having all all None tensors and it having different devices atm.
        for key, value in self.items():
            if isinstance(value, Batch):
                item_device = value.device
                if item_device is None:
                    # Child item doesn't have a 'device', so `self` also doesnt.
                    return None
            else:
                item_device = getattr(value, "device", None)

            if item_device is None:
                continue
            if device is None:
                device = item_device
            elif item_device != device:
                return None
        return device

    @property
    def dtypes(self) -> Dict[str, Union[Optional[torch.dtype], Dict]]:
        """Dict from field names to their dtypes if they have one, else None.

        If `self` has `Batch` fields, the values for those will be dicts.
        """
        return {
            k: v.dtypes if isinstance(v, Batch) else getattr(v, "dtype", None)
            for k, v in self.items()
        }

    @property
    def dtype(self) -> Tuple[Optional[torch.dtype]]:
        """Returns the dtype common to all tensors, or None.

        Returns
        -------
        Dict[Optional[torch.dtype]]
            The common dtype, or `None` if the dtypes are unknown/different.
        """
        dtype: Optional[torch.dtype] = None

        for key, value in self.items():
            item_dtype = getattr(value, "dtype", None)
            if item_dtype is None:
                continue
            if dtype is None:
                dtype = item_dtype
            elif item_dtype != dtype:
                return None
        return dtype

    def as_namedtuple(self) -> Tuple[T, ...]:
        return self._namedtuple(**{k: v for k, v in self.items()})

    def as_list_of_tuples(self) -> Iterable[Tuple[T, ...]]:
        """Returns an iterable of the items in the 'batch', each item as a
        namedtuple (list of tuples).
        """
        # If one of the fields is None, then we convert it into a list of Nones,
        # so we can zip all the fields to create a list of tuples.
        field_items = [
            [items for _ in range(self.batch_size)]
            if items is None or items is {}
            else [item for item in items]
            for items in self.as_tuple()
        ]
        assert all([len(items) == self.batch_size for items in field_items])
        return list(itertools.starmap(self._namedtuple, zip(*field_items)))

    def as_tuple(self) -> Tuple[T, ...]:
        """Returns a namedtuple containing the 'batched' attributes of this
        object (tuple of lists).
        """
        # TODO: Turning on the namedtuple return value by default.
        # return tuple(
        #     getattr(self, f.name) for f in dataclasses.fields(self)
        # )
        return self.as_namedtuple()

    # def as_dict(self) -> Dict[str, T]:
    #     # NOTE: dicts are ordered since python 3.7
    #     return {
    #         field_name: getattr(self, field_name)
    #         for field_name in self.field_names
    #     }

    def to(self, *args, **kwargs):
        def _to(item, *args_, **kwargs_):
            if hasattr(item, "to") and callable(item.to):
                return item.to(*args_, **kwargs_)
            return item

        return self._map(_to, *args, **kwargs, recursive=True)

    def float(self, dtype=torch.float):
        return self.to(dtype=dtype)

    def float32(self, dtype=torch.float32):
        return self.to(dtype=dtype)

    def int(self, dtype=torch.int):
        return self.to(dtype=dtype)

    def double(self, dtype=torch.double):
        return self.to(dtype=dtype)

    def numpy(self):
        """Returns a new Batch object of the same type, with all Tensors
        converted to numpy arrays.

        Returns
        -------
        [type]
            [description]
        """

        def _numpy(v):
            if isinstance(v, (Tensor, Batch)):
                return v.detach().cpu().numpy()
            return v

        return self._map(_numpy, recursive=True)
        # return type(self)(**{
        #     k: v.detach().cpu().numpy() if isinstance(v, (Tensor, Batch)) else v
        #     for k, v in self.items()
        # })

    def detach(self):
        """Returns a new Batch object of the same type, with all Tensors
        detached.

        Returns
        -------
        Batch
            New object of the same type, but with all tensors detached.
        """
        from sequoia.utils.generic_functions import detach

        return self._map(detach)
        # return type(self)(**detach({
        #     k: v.detach() if isinstance(v, (Tensor, Batch)) else v for k, v in self.items()
        # }))

    def cpu(self, **kwargs):
        """Returns a new Batch object of the same type, with all Tensors
        moved to cpu.

        Returns
        -------
        Batch
            New object of the same type, but with all tensors moved to CPU.
        """
        return self.to(device="cpu", **kwargs)

    def cuda(self, device=None, **kwargs):
        """Returns a new Batch object of the same type, with all Tensors
        moved to cuda device.

        Returns
        -------
        Batch
            New object of the same type, but with all tensors moved to cuda.
        """
        return self.to(device=(device or "cuda"), **kwargs)

    @property
    def shapes(self) -> Dict[str, Union[torch.Size, Dict]]:
        """Dict from field names to their shapes if they have one, else None.

        If `self` has `Batch` fields, the values for those will be dicts.
        """
        return {
            k: v.shapes if isinstance(v, Batch) else getattr(v, "shape", None)
            for k, v in self.items()
        }

    @property
    def batch_size(self) -> Optional[int]:
        """Returns the length of the first dimension if it is common to all
        tensors in this object, else None.
        """
        # NOTE: If all tensors have just one dimension and are all the same
        # length, then this would give back that length.
        batch_size: Optional[int] = None
        for k, v in self.items():
            if isinstance(v, Batch):
                v_batch_size = v.batch_size
                if v_batch_size is None:
                    # child item doesn't have a batch size, so we dont either.
                    return None
                elif batch_size is None:
                    batch_size = v_batch_size
                elif v_batch_size != batch_size:
                    return None
            else:
                item_shape = getattr(v, "shape", None)
                if item_shape is None:
                    continue
                if not item_shape:
                    return None
                v_batch_size = item_shape[0]
                if batch_size is None:
                    batch_size = v_batch_size
                elif v_batch_size != batch_size:
                    return None
        return batch_size

    def with_batch_dimension(self: B) -> B:
        """Returns a copy of `self` where all numpy arrays / tensors have an
        extra `batch` dimension of size 1.
        """
        # TODO: Do we 'wrap' the `None` values? or keep them as-is?
        from sequoia.utils.categorical import Categorical

        @singledispatch
        def unsqueeze(v: Any) -> Any:
            if v is None:
                return v
            return np.asarray([v])

        @unsqueeze.register(Categorical)
        @unsqueeze.register(np.ndarray)
        @unsqueeze.register(Tensor)
        def _unsqueeze_array(
            v: Union[np.ndarray, Tensor, Categorical]
        ) -> Union[np.ndarray, Tensor, Categorical]:
            return v[None]

        return self._map(unsqueeze)

    def remove_batch_dimension(self: B) -> B:
        """Returns a copy of `self` where all numpy arrays / tensors have an
        the extra `batch` dimension removed.

        Raises an error if any non-None value doesn't have a batch dimension of
        size 1.
        """
        return self[:, 0]

    def split(self: B) -> List[B]:
        """Returns an iterable of the items in the 'batch', each item as a
        object of the same type as `self`.
        """
        # If one of the fields is None, then we convert it into a list of Nones,
        # so we can zip all the fields to create a list of tuples.
        return [self[:, i] for i in range(self.batch_size)]

    @classmethod
    def stack(cls: Type[B], items: List[B]) -> B:
        items = list(items)
        from sequoia.utils.generic_functions import stack

        # Just to make sure that the returned item will be of the type `cls`.
        assert isinstance(items[0], cls)
        return stack(items)

    @classmethod
    def concatenate(cls: Type[B], items: List[B], **kwargs) -> B:
        items = list(items)
        from sequoia.utils.generic_functions import concatenate

        assert isinstance(items[0], cls)
        return concatenate(items, **kwargs)

    def torch(self, device: Union[str, torch.device] = None, dtype: torch.dtype = None):
        """Converts any ndarrays to Tensors if possible and returns a new
        object of the same type.

        NOTE: This is the opposite of `self.numpy()`
        """

        def _from_numpy(v: Union[np.ndarray, Any]) -> Union[Tensor, Any]:
            try:
                return torch.as_tensor(v, device=device, dtype=dtype)
            except (TypeError, RuntimeError):
                return v

        return self._map(_from_numpy, recursive=True)

    def _map(self: B, func: Callable, *args, recursive: bool = True, **kwargs) -> B:
        """Returns an object of the same type as `self`, where function `func`
        has been applied (with positional args `args` and keyword-arguments
        `kwargs`) to all its values, (inluding the values of nested `Batch`
        objects if `recursive` is True).
        """
        new_items = {}
        for key, value in self.items():
            if isinstance(value, Batch):
                if not recursive:
                    # don't apply the function to nested Batch objects unless
                    # `recursive` is True.
                    new_items[key] = value
                else:
                    new_items[key] = value._map(func, *args, recursive=recursive, **kwargs)
            else:
                new_items[key] = func(value, *args, **kwargs)  # type: ignore
        return type(self)(**new_items)

    def _apply(
        self: B, func: Callable[[T, Any], None], *args, recursive: bool = True, **kwargs
    ) -> None:
        """Applies function `func` to all the values in `self`, and optionally
        to all its nested values when `recursive` is True.

        Returns None, as this assumes that `func` modifies the values in-place.
        """
        for key, value in self.items():
            if isinstance(value, Batch) and not recursive:
                # Skip any Batch objects if `recursive` is False.
                continue
            func(value, *args, **kwargs)  # type: ignore


from sequoia.utils.generic_functions.replace import replace


@replace.register(Batch)
def _replace_batch_items(obj: Batch, **items) -> Batch:
    return dataclasses.replace(obj, **items)


from typing import Sequence

from sequoia.utils.generic_functions import get_slice, set_slice


@get_slice.register(Batch)
def _get_batch_slice(value: Batch, indices: Sequence[int]) -> Batch:
    return value.slice(indices)
    # assert False, f"Removing this in favor of just doing Batch[:, indices]. "
    # return type(value)(**{
    #     field_name: get_slice(field_value, indices) if field_value is not None else None
    #     for field_name, field_value in value.as_dict().items()
    # })


@set_slice.register(Batch)
def set_batch_slice(target: Batch, indices: Sequence[int], values: Batch) -> None:
    for key, target_values in target.items():
        set_slice(target_values, indices, values[key])


if __name__ == "__main__":
    import doctest

    doctest.testmod()


================================================
FILE: sequoia/common/batch_test.py
================================================
""" Tests for the `Batch` class.
"""


from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Type

import numpy as np
import pytest
import torch
from torch import Tensor

from sequoia.utils.categorical import Categorical

from .batch import Batch


@dataclass(frozen=True)
class Observations(Batch):
    x: Tensor
    task_labels: Optional[Tensor] = None


@dataclass(frozen=True)
class Actions(Batch):
    y_pred: Tensor


@dataclass(frozen=True)
class RLActions(Actions):
    action_dist: Categorical


@dataclass(frozen=True)
class Rewards(Batch):
    y: Tensor


@pytest.mark.parametrize(
    "batch_type, items_dict",
    [
        (
            Observations,
            dict(
                x=torch.arange(10),
                task_labels=torch.arange(10) + 1,
            ),
        ),
    ],
)
def test_batch_behaves_like_a_dict(batch_type, items_dict):
    obj = batch_type(**items_dict)

    # NOTE: dicts, along with their .keys() and .values() are ordered as of py37

    for i, (k, v) in enumerate(obj.items()):
        original_value = items_dict[k]

        assert k == list(items_dict.keys())[i]  # key order is the same.
        assert (v == original_value).all()
        if isinstance(original_value, Tensor):
            assert v is original_value  # Tensors shouldn't be cloned or copied

        assert (obj[k] == v).all()  # values are the same.
        assert (obj[k] == getattr(obj, k)).all()  # getattr same as __getitem__
        assert (obj[i] == v).all()  # can also be indexed with ints like a tuple.


@pytest.mark.parametrize(
    "batch_type, items_dict",
    [
        (
            Observations,
            dict(
                x=torch.arange(10),
                task_labels=torch.arange(10) + 1,
            ),
        ),
    ],
)
def test_to(batch_type: Type[Batch], items_dict: Dict[str, Tensor]):
    """Test that the 'to' method behaves like `torch.Tensor.to`, so that we
    can move all the items in a `Batch` between devices or dtypes.
    """
    original_devices: Dict[str, torch.device] = {k: v.device for k, v in items_dict.items()}
    original_dtypes: Dict[str, torch.dtype] = {k: v.dtype for k, v in items_dict.items()}

    obj = batch_type(**items_dict)

    # The devices and dtypes remain the same when creating the Batch with the
    # given items.
    for k, v in obj.items():
        original_value = items_dict[k]
        assert v.device == original_value.device == original_devices[k]
        assert v.dtype == original_value.dtype == original_dtypes[k]

    # The 'devices' and 'dtypes' attributes give the devices and dtypes of all
    # items.
    assert obj.devices == original_devices
    assert obj.dtypes == original_dtypes
    devices = list(original_devices.values())
    dtypes = list(original_dtypes.values())
    if len(set(devices)) == 1:
        # If they all share the same device, then the `device` attribute on the
        # `batch` is this shared device.
        common_device = devices[0]
        assert obj.device == common_device

    if len(set(dtypes)) == 1:
        # If all tensors have the same dtype, then the `dtype` attribute on the
        # `batch` is this shared dtype.
        common_dtype = dtypes[0]
        assert obj.dtype == common_dtype

    # Test moving to another device, if possible.
    if torch.cuda.is_available():
        cuda_obj = obj.to("cuda")
        for i, (k, v) in enumerate(cuda_obj.items()):
            assert v.device.type == "cuda"

    float_obj = obj.to(dtype=torch.float32)
    for k, v in float_obj.items():
        original_value = items_dict[k]
        assert v.device == original_value.device
        assert v.dtype == torch.float32
        assert (v == original_value.to(dtype=torch.float32)).all()


@pytest.mark.parametrize(
    "batch_type, items_dict",
    [
        (
            Observations,
            dict(
                x=torch.arange(25).reshape([5, 5]),
                task_labels=torch.arange(25).reshape([5, 5]) + 1,
            ),
        ),
    ],
)
@pytest.mark.parametrize(
    "index",
    [
        (0, 0),  # obj[0, 0]
        (0, ..., 0),  # obj[0, ..., 0]
        (slice(None), 0),  # obj[:, 0]
        (slice(None), slice(3)),  # obj[:, :3]
        (slice(None), slice(None, -3)),  # obj[:, -3:]
        (slice(None), slice(None, None, 2)),  # obj[:, ::2]
        (slice(None), np.arange(5) % 2 == 0),  # obj[:, even_mask]
        (slice(None), np.arange(5) % 2 == 0),  # obj[:, even_mask]
    ],
)
def test_tuple_indexing(
    batch_type: Type[Batch], items_dict: Dict[str, Tensor], index: Tuple[Any, ...]
):
    """Test that we can index into the object in the same style as an ndarray"""
    obj = batch_type(**items_dict)

    keys = list(items_dict.keys())
    print(f"Expected keys: {keys}")
    expected_items = {k: items_dict[k][index[1:]] for k in np.array(keys)[index[0]]}

    print(f"expected sliced items:")
    for key, value in expected_items.items():
        print(key, value)

    actual_slice = obj[index]

    if index[0] == slice(None):
        # actual_slice: Batch
        assert isinstance(actual_slice, batch_type)
        assert list(actual_slice.keys()) == keys

        for k, sliced_value in actual_slice.items():
            print(f"key {k}, index {index}")
            print(f"Sliced value: {sliced_value}")
            expected_value = expected_items[k]
            print(f"Expected value: {expected_value}")
            assert (sliced_value == expected_value).all()

    if isinstance(index[0], int):
        # e.g. Observations[0, <...>]
        key = keys[index[0]]
        expected_value = expected_items[key]
        assert (actual_slice == expected_value).all()


def test_masking():
    """Test indexing or changing values in the item using a mask array."""
    bob = Observations(
        x=torch.arange(25).reshape([5, 5]),
    )
    odd_rows = np.arange(5) % 2 == 1
    bob[:, odd_rows] = False

    tensor = torch.as_tensor

    expected = Observations(
        x=tensor(
            [
                [0, 1, 2, 3, 4],
                [0, 0, 0, 0, 0],
                [10, 11, 12, 13, 14],
                [0, 0, 0, 0, 0],
                [20, 21, 22, 23, 24],
            ]
        ),
        task_labels=None,
    )
    assert (expected.x == bob.x).all()
    assert expected.task_labels == bob.task_labels


def test_newaxis():
    """WIP: Trying out np.newaxis as a way to add an extra batch dimension."""
    x = Observations(
        x=torch.arange(5),
        task_labels=1,
    )
    # Test out different ways of 'unsqueezing' the object.
    for expanded in [x[np.newaxis], x.with_batch_dimension()]:
        assert str(expanded) == str(
            Observations(
                x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int),
                task_labels=np.array([1]),
            )
        )


def test_single_index():
    """observations[0] should gives the first field."""
    obs = Observations(
        x=torch.arange(5),
        task_labels=1,
    )
    assert obs[0] is obs.x


def test_remove_batch_dim():
    """Removing an extra batch dimension."""
    bob = Observations(
        x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int),
        task_labels=np.array([1]),
    )
    expected = Observations(
        x=torch.arange(5),
        task_labels=1,
    )
    for expanded in [bob.remove_batch_dimension(), bob[:, 0]]:
        assert str(expanded) == str(expected)

    bob = Observations(
        x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int),
        task_labels=None,
    )
    expected = Observations(
        x=torch.arange(5),
        task_labels=None,
    )
    for expanded in [
        bob.remove_batch_dimension(),
        bob[
            :,
            0,
        ],
    ]:
        assert str(expanded) == str(expected)


def test_remove_batch_dim_with_nested_objects():
    obj = ForwardPass(
        observations=Observations(
            x=torch.arange(5).reshape([1, 5]),
            task_labels=None,
        ),
        h_x=torch.arange(4).reshape([1, 4]),
        actions=Actions(
            y_pred=torch.tensor(1).reshape(
                [
                    1,
                ]
            ),
        ),
    )
    actual = obj.remove_batch_dimension()
    assert str(actual) == str(
        ForwardPass(
            observations=Observations(
                x=torch.arange(5),
                task_labels=None,
            ),
            h_x=torch.arange(4),
            actions=Actions(
                y_pred=torch.tensor(1),
            ),
        )
    )


def test_split():
    """Split a batch into a list of Batch objects"""
    bob = Observations(
        x=torch.tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
        task_labels=np.array([0, 1]),
    )
    expected = [
        Observations(
            x=torch.arange(5) + i * 5,
            task_labels=i,
        )
        for i in range(2)
    ]
    assert str(bob.split()) == str(expected)


@pytest.mark.parametrize(
    "items, expected",
    [
        (
            [
                Observations(
                    x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    task_labels=np.array(0),
                ),
                Observations(
                    x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
                    task_labels=np.array(1),
                ),
            ],
            Observations(
                x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
                task_labels=np.array([0, 1]),
            ),
        ),
        (
            [
                RLActions(
                    y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
                ),
                RLActions(
                    y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
                ),
            ],
            RLActions(
                y_pred=torch.as_tensor([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], dtype=int),
                action_dist=Categorical(logits=torch.ones([2, 5, 5], dtype=float) / 5),
            ),
        ),
    ],
)
def test_stack(items: List[Batch], expected: Batch):
    """Split a batch into a list of Batch objects"""
    assert str(type(items[0]).stack(items)) == str(expected)
    # Same test, but with only numpy arrays as items:
    assert str(type(items[0]).stack(map(lambda i: i.numpy(), items))) == str(expected.numpy())
    # Same test, but with Tensor items:
    assert str(type(items[0]).stack(map(lambda i: i.torch(), items))) == str(expected.torch())


@pytest.mark.parametrize(
    "items, expected",
    [
        (
            [
                Observations(
                    x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    task_labels=None,
                ),
                Observations(
                    x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
                    task_labels=None,
                ),
            ],
            Observations(
                x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
                task_labels=None,
            ),
        ),
        (
            [
                Observations(
                    x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    task_labels=None,
                ),
                Observations(
                    x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
                    task_labels=1,
                ),
            ],
            Observations(
                x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
                task_labels=np.array([None, 1]),
            ),
        ),
    ],
)
def test_stack_with_none_values(items: List[Batch], expected: Batch):
    """Test that if all values are None, a single None is produced, but if only some
    values are None, then an ndarray of dtype `object` is created instead.
    """
    cls = type(items[0])
    assert str(cls.stack(items)) == str(expected)
    # Same test, but with only numpy arrays as items:
    items = [item.numpy() for item in items]
    assert str(cls.stack(items)) == str(expected.numpy())


@pytest.mark.parametrize(
    "items, expected",
    [
        (
            [
                Observations(
                    x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    task_labels=0,
                ),
                Observations(
                    x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
                    task_labels=1,
                ),
            ],
            Observations(
                x=torch.as_tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int),
                task_labels=np.array([0, 1]),
            ),
        ),
        (
            [
                Observations(
                    x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    task_labels=None,
                ),
                Observations(
                    x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
                    task_labels=None,
                ),
            ],
            Observations(
                x=torch.as_tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int),
                task_labels=None,
            ),
        ),
        (
            [
                RLActions(
                    y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
                ),
                RLActions(
                    y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
                    action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
                ),
            ],
            RLActions(
                y_pred=torch.as_tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int),
                action_dist=Categorical(logits=torch.ones([10, 5], dtype=float) / 5),
            ),
        ),
    ],
)
def test_concatenate(items: List[Batch], expected: Batch):
    """Split a batch into a list of Batch objects"""
    assert str(type(items[0]).concatenate(items)) == str(expected)
    # Same test, but with only numpy arrays as items:
    assert str(type(items[0]).concatenate(map(lambda i: i.numpy(), items))) == str(expected.numpy())
    # Same test, but with Tensor items:
    assert str(type(items[0]).concatenate(map(lambda i: i.torch(), items))) == str(expected.torch())


@pytest.mark.parametrize(
    "numpy_batch, torch_batch",
    [
        (
            Observations(
                x=np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]),
                task_labels=np.array([None, None]),
            ),
            Observations(
                x=torch.tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
                task_labels=np.array([None, None]),
            ),
        ),
    ],
)
def test_convert_between_ndarrays_and_tensors(numpy_batch: Batch, torch_batch: Batch):
    assert str(numpy_batch.torch()) == str(torch_batch)
    assert str(numpy_batch.torch().numpy()) == str(numpy_batch)

    assert str(torch_batch.numpy()) == str(numpy_batch)
    assert str(torch_batch.numpy().torch()) == str(torch_batch)

    if torch.cuda.is_available():
        torch_batch = torch_batch.cuda()
        assert torch_batch.device.type == "cuda"

        assert str(numpy_batch.torch(device="cuda")) == str(torch_batch)
        assert str(numpy_batch.torch(device="cuda").numpy()) == str(numpy_batch)

        assert str(torch_batch.numpy()) == str(numpy_batch)
        assert str(torch_batch.numpy().torch(device="cuda")) == str(torch_batch)


@dataclass(frozen=True)
class ForwardPass(Batch):
    observations: Observations
    h_x: Tensor
    actions: Actions


def test_nesting():
    obj = ForwardPass(
        observations=Observations(
            x=torch.arange(10).reshape([2, 5]),
            task_labels=torch.arange(2, dtype=int),
        ),
        h_x=torch.arange(8).reshape([2, 4]),
        actions=Actions(
            y_pred=torch.arange(2, dtype=int),
        ),
    )
    assert obj.batch_size == 2
    assert obj[0, 1, 0] == obj.observations.task_labels[0]
    tensor = torch.as_tensor
    assert str(obj.slice(0)) == str(
        ForwardPass(
            observations=Observations(x=tensor([[0, 1, 2, 3, 4]]), task_labels=tensor([0])),
            h_x=tensor([[0, 1, 2, 3]]),
            actions=Actions(y_pred=tensor([0])),
        )
    )


def test_slicing_with_one_item():
    observations = Observations(
        x=torch.arange(10).reshape([2, 5]),
        task_labels=torch.arange(2, dtype=int),
    )
    indices = torch.as_tensor([0])
    assert observations.slice(indices).shapes == {
        "x": torch.Size([1, 5]),
        "task_labels": torch.Size([1]),
    }


================================================
FILE: sequoia/common/callbacks/__init__.py
================================================
"""
TODO: Migrate the addons to Pytorch-Lightning, maybe in the form of callbacks
or as optional extensions to be added to Classifier?
"""
# from .knn_callback import KnnCallback
# from .vae_callback import SaveVaeSamplesCallback


================================================
FILE: sequoia/common/callbacks/knn_callback.py
================================================
""" Callback that evaluates representations with a KNN after each epoch.

TODO: The code here is split into too many functions and its a bit confusing.
    Will Need to rework that at some point.

NOTE: Currently unused.
"""

import math
from dataclasses import asdict, dataclass
from typing import List, Optional, Tuple

import numpy as np
import torch
from pytorch_lightning import Callback, LightningModule, Trainer
from simple_parsing import field, mutable_field
from sklearn.metrics import log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from torch import Tensor
from torch.utils.data import DataLoader

from sequoia.common.loss import Loss

# from sequoia.methods.models.base_model.model import LightningModule
from sequoia.settings import Setting
from sequoia.settings.sl import ClassIncrementalSetting
from sequoia.utils.logging_utils import get_logger, pbar
from sequoia.utils.utils import roundrobin, take

logger = get_logger(__name__)


@dataclass
class KnnClassifierOptions:
    """Set of options for configuring the KnnClassifier."""

    n_neighbors: int = field(default=5, alias="n_neighbours")  # Number of neighbours.
    metric: str = "cosine"
    algorithm: str = "auto"  # See the sklearn docs
    leaf_size: int = 30  # See the sklearn docs
    p: int = 2  # see the sklean docs
    n_jobs: Optional[int] = -1  # see the sklearn docs.


@dataclass
class KnnCallback(Callback):
    """Addon that adds the option of evaluating representations with a KNN.

    TODO: Perform the KNN evaluations in different processes using multiprocessing.
    TODO: We could even evaluate the representations of a DIFFERENT dataset with
    the KNN, if the shapes were compatible with the model! For example, we could
    train the model on some CL/RL/etc task, like Omniglot or something, and at
    the same time, evaluate how good the model's representations are at
    disentangling the classes from MNIST or Fashion-MNIST or something else
    entirely! This could be nice when trying to argue about better generalization
    in the model's representations.
    """

    # Options for the KNN classifier
    knn_options: KnnClassifierOptions = mutable_field(KnnClassifierOptions)
    # Maximum number of examples to take from the dataloaders. When None, uses
    # the entire training/validaton/test datasets.
    knn_samples: int = 0

    def __post_init__(self):
        self.max_num_batches: int = 0

        self.model: LightningModule
        self.trainer: Trainer

    def on_train_start(self, trainer, pl_module):
        """Called when the train begins."""
        self.trainer = trainer
        self.model = pl_module
        self.setting: ClassIncrementalSetting

    def setup(self, trainer, pl_module, stage: str):
        """Called when fit or test begins"""
        super().setup(trainer, pl_module, stage)

    def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
        self.trainer = trainer
        self.model = pl_module
        self.setting = self.model.setting
        config = self.model.config

        if self.knn_samples > 0:
            batch_size = pl_module.batch_size
            # We round this up so we always take at least one batch_size of
            # samples from each dataloader.
            self.max_num_batches = math.ceil(self.knn_samples / batch_size)
            logger.debug(
                f"Taking a maximum of {self.max_num_batches} batches from each dataloader."
            )

            if config.debug:
                self.knn_samples = min(self.knn_samples, 100)

            valid_knn_loss, test_knn_loss = self.evaluate_knn(pl_module)

            # assert False, trainer.callback_metrics.keys()
            loss: Optional[Loss] = trainer.callback_metrics.get("loss_object")
            if loss:
                assert "knn/valid" not in loss.losses
                assert "knn/test" not in loss.losses
                loss.losses["knn/valid"] = valid_knn_loss
                loss.losses["knn/test"] = test_knn_loss

    def log(self, loss_object: Loss):
        if self.trainer.logger:
            self.trainer.logger.log_metrics(loss_object.to_log_dict())

    def get_dataloaders(self, model: LightningModule, mode: str) -> List[DataLoader]:
        """Retrieve the train/val/test dataloaders for all 'tasks'."""
        setting = model.datamodule
        assert setting, "The LightningModule must have its 'datamodule' attribute set for now."
        # if the setting defines a dataloaders() method, those are for each of the tasks, which is what we want!
        fn = getattr(setting, f"{mode}_dataloaders", getattr(setting, f"{mode}_dataloader"))
        loaders = fn()
        if isinstance(loaders, DataLoader):
            return [loaders]
        assert isinstance(loaders, list)
        return loaders

    def evaluate_knn(self, model: LightningModule) -> Tuple[Loss, Loss]:
        """Evaluate the representations with a KNN in the context of CL.

        We shorten the train dataloaders to take only the first
        `knn_samples` samples in order to save some compute.
        TODO: Figure out a way to cleanly add the metrics from the callback to
        the ``log dict'' which is returned by the model. Right now they are
        only printed / logged to wandb directly from here.
        """
        setting = model.datamodule
        assert isinstance(setting, Setting)
        # TODO: Remove this if we want to use this for something else than a
        # Continual setting in the future.
        assert isinstance(setting, ClassIncrementalSetting)
        num_classes = setting.num_classes

        # Check wether the method has access to the task labels at train/test time.
        task_labels_at_test_time: bool = False
        from sequoia.settings import TaskIncrementalSLSetting

        if isinstance(setting, TaskIncrementalSLSetting):
            if setting.task_labels_at_test_time:
                task_labels_at_test_time = True
        # TODO: Figure out a way to make sure that we get at least one example
        # of each class to fit the KNN.
        self.knn_samples = max(self.knn_samples, num_classes**2)
        self.max_num_batches = math.ceil(self.knn_samples / model.batch_size)
        logger.info(f"number of classes: {num_classes}")
        logger.info(f"Number of KNN samples: {self.knn_samples}")
        logger.debug(f"Taking a maximum of {self.max_num_batches} batches from each dataloader.")

        train_loaders: List[DataLoader] = self.get_dataloaders(model, mode="train")
        valid_loaders: List[DataLoader] = self.get_dataloaders(model, mode="val")
        test_loaders: List[DataLoader] = self.get_dataloaders(model, mode="test")

        # Only take the first `knn_samples` samples from each dataloader.
        def shorten(dataloader: DataLoader):
            return take(dataloader, n=self.max_num_batches)

        if self.max_num_batches:
            train_loaders = list(map(shorten, train_loaders))
            valid_loaders = list(map(shorten, valid_loaders))
            test_loaders = list(map(shorten, test_loaders))

        # Create an iterator that alternates between each of the train dataloaders.
        # NOTE: we shortened each of the dataloaders just to be sure that we get at least
        train_loader = roundrobin(*train_loaders)

        h_x, y = get_hidden_codes_array(
            model=model, dataloader=train_loader, description="KNN (Train)"
        )
        train_loss, scaler, knn_classifier = fit_knn(
            x=h_x, y=y, options=self.knn_options, num_classes=num_classes, loss_name="knn/train"
        )
        logger.info(f"KNN Train Acc: {train_loss.accuracy:.2%}")
        self.log(train_loss)
        total_valid_loss = Loss("knn/valid")

        # Save the current task ID so we can reset it after testing.
        starting_task_id = model.setting.current_task_id

        for i, dataloader in enumerate(valid_loaders):
            if task_labels_at_test_time:
                model.on_task_switch(i, training=False)
            loss_i = evaluate(
                model=model,
                dataloader=dataloader,
                loss_name=f"[{i}]",
                scaler=scaler,
                knn_classifier=knn_classifier,
                num_classes=setting.num_classes_in_task(i),
            )
            # We use `.absorb(loss_i)` here so that the metrics get merged.
            # That way, if we access `total_valid_loss.accuracy`, this gives the
            # accuracy over all the validation tasks.
            # If we instead used `+= loss_i`, then loss_i would become a subloss
            # of `total_valid_loss`, since they have different names.
            # TODO: Explain this in more detail somewhere else.
            total_valid_loss.absorb(loss_i)
            logger.info(f"KNN Valid[{i}] Acc: {loss_i.accuracy:.2%}")
            self.log(loss_i)

        logger.info(f"KNN Average Valid Acc: {total_valid_loss.accuracy:.2%}")
        self.log(total_valid_loss)

        total_test_loss = Loss("knn/test")
        for i, dataloader in enumerate(test_loaders):
            if task_labels_at_test_time:
                model.on_task_switch(i, training=False)

            # TODO Should we set the number of classes to be the number of
            # classes in the current task?

            loss_i = evaluate(
                model=model,
                dataloader=dataloader,
                loss_name=f"[{i}]",
                scaler=scaler,
                knn_classifier=knn_classifier,
                num_classes=num_classes,
            )
            total_test_loss.absorb(loss_i)
            logger.info(f"KNN Test[{i}] Acc: {loss_i.accuracy:.2%}")
            self.log(loss_i)

        if task_labels_at_test_time:
            model.on_task_switch(starting_task_id, training=False)

        logger.info(f"KNN Average Test Acc: {total_test_loss.accuracy:.2%}")
        self.log(total_test_loss)
        return total_valid_loss, total_test_loss


def evaluate(
    model: LightningModule,
    dataloader: DataLoader,
    loss_name: str,
    scaler: StandardScaler,
    knn_classifier: KNeighborsClassifier,
    num_classes: int,
) -> Loss:
    """Evaluates the 'quality of representations' using a KNN.

    Assumes that the knn classifier was fitted on the same classes as
    the ones present in the dataloader.

    Args:
        model (Classifier): a Classifier model to use to encode samples.
        dataloader (DataLoader): a dataloader.
        loss_name (str): name to give to the resulting loss.
        scaler (StandardScaler): the scaler used during fitting.
        knn_classifier (KNeighborsClassifier): The KNN classifier.

    Returns:
        Loss: The loss object containing metrics and a 'total loss'
        which isn't a tensor in this case (since passing through the KNN
        isn't a differentiable operation).
    """
    h_x_test, y_test = get_hidden_codes_array(
        model,
        dataloader,
        description=f"KNN ({loss_name})",
    )
    train_classes = set(knn_classifier.classes_)
    test_classes = set(y_test)
    # Check that the same classes were used.
    assert test_classes.issubset(train_classes), (
        f"y and y_test should contain the same classes: "
        f"(train classes: {train_classes}, "
        f"test classes: {test_classes})."
    )
    test_loss = get_knn_performance(
        x_t=h_x_test,
        y_t=y_test,
        loss_name=loss_name,
        scaler=scaler,
        knn_classifier=knn_classifier,
        num_classes=num_classes,
    )
    test_loss.loss = torch.as_tensor(test_loss.loss)
    logger.info(f"{loss_name} Acc: {test_loss.accuracy:.2%}")
    return test_loss


def get_hidden_codes_array(
    model: LightningModule, dataloader: DataLoader, description: str = "KNN"
) -> Tuple[np.ndarray, np.ndarray]:
    """Gets the hidden vectors and corresponding labels."""
    h_x_list: List[np.ndarray] = []
    y_list: List[np.ndarray] = []

    for batch in pbar(dataloader, description, leave=False):
        # TODO: Debug this, make sure this callback still works.
        x, y = batch
        assert isinstance(x, Tensor), type(x)

        # We only do KNN with examples that have a label.
        assert y is not None, f"Should have a 'y' for now! {x}, {y}"
        if y is not None:
            # TODO: There will probably be some issues with trying to use
            # the model's encoder to encode stuff when using DataParallel or
            # DistributedDataParallel, as PL might be interfering somehow.
            h_x = model.encode(x.to(model.device))
            h_x_list.append(h_x.detach().cpu().numpy())
            y_list.append(y.detach().cpu().numpy())
    codes = np.concatenate(h_x_list)
    labels = np.concatenate(y_list)
    return codes.reshape(codes.shape[0], -1), labels


def fit_knn(
    x: np.ndarray,
    y: np.ndarray,
    num_classes: int,
    options: KnnClassifierOptions = None,
    loss_name: str = "knn",
) -> Tuple[Loss, StandardScaler, KNeighborsClassifier]:
    # print(x.shape, y.shape, x_t.shape, y_t.shape)
    options = options or KnnClassifierOptions()

    scaler = StandardScaler()
    x_s = scaler.fit_transform(x)
    # Create and train the Knn Classifier using the options as the kwargs
    knn_classifier = KNeighborsClassifier(**asdict(options)).fit(x_s, y)
    train_loss = get_knn_performance(
        x_t=x,
        y_t=y,
        scaler=scaler,
        knn_classifier=knn_classifier,
        num_classes=num_classes,
    )
    return train_loss, scaler, knn_classifier


def get_knn_performance(
    x_t: np.ndarray,
    y_t: np.ndarray,
    scaler: StandardScaler,
    knn_classifier: KNeighborsClassifier,
    num_classes: int,
    loss_name: str = "KNN",
) -> Loss:
    # Flatten the inputs to two dimensions only.
    x_t = x_t.reshape(x_t.shape[0], -1)
    assert len(x_t.shape) == 2
    x_t = scaler.transform(x_t)
    y_t_prob = knn_classifier.predict_proba(x_t)

    classes = knn_classifier.classes_
    # make sure the classes are sorted:
    assert np.array_equal(sorted(classes), classes)

    if y_t_prob.shape[-1] == num_classes:
        y_t_logits = y_t_prob
    else:
        # Not all classes were encountered, so we need to 'expand' the predicted
        # logits to the right shape.
        logger.info(f"{y_t_prob.shape} {num_classes}")
        num_classes = max(num_classes, y_t_prob.shape[-1])

        y_t_logits = np.zeros([y_t_prob.shape[0], num_classes], dtype=y_t_prob.dtype)

        for i, logits in enumerate(y_t_prob):
            for label, logit in zip(classes, logits):
                y_t_logits[i][label - 1] = logit

    ## We were constructing this to reorder the classes in case the ordering was
    ## not the same between the KNN's internal `classes_` attribute and the task
    ## classes, However I'm not sure if this is necessary anymore.

    # y_t_logits = np.zeros((y_t.size, y_t.max() + 1))
    # for i, label in enumerate(classes):
    #     y_t_logits[:, label] = y_t_prob[:, i]

    # We get the Negative Cross Entropy using the scikit-learn function, but we
    # could instead get it using pytorch's function (maybe even inside the
    # Loss object!
    nce_t = log_loss(y_true=y_t, y_pred=y_t_prob, labels=classes)
    # BUG: There is sometimes a case where some classes aren't present in
    # `classes_`, and as such the ClassificationMetrics object created in the
    # Loss constructor has an error.
    test_loss = Loss(loss_name, loss=nce_t, y_pred=y_t_logits, y=y_t)
    return test_loss


from simple_parsing.helpers.serialization import register_decoding_fn

register_decoding_fn(KnnCallback, lambda v: v)


================================================
FILE: sequoia/common/callbacks/vae_callback.py
================================================
from dataclasses import dataclass
from typing import Optional

import torch
from pytorch_lightning import Callback, Trainer
from torch import Tensor
from torchvision.utils import save_image

from sequoia.methods.aux_tasks.reconstruction import AEReconstructionTask, VAEReconstructionTask
from sequoia.methods.models import BaseModel
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


@dataclass
class SaveVaeSamplesCallback(Callback):
    """Callback which saves some generated/reconstructed samples.

    Reconstructs and/or generates samples periodically during training if any of
    of the autoencoder/generative model based auxiliary tasks are used.
    """

    def __post_init__(self, *args, **kwargs):
        self.reconstruction_task: Optional[AEReconstructionTask] = None
        self.generation_task: Optional[VAEReconstructionTask] = None
        self.latents_batch: Optional[Tensor] = None
        self.model: BaseModel
        self.trainer: Trainer

    def setup(self, trainer, pl_module, stage: str):
        """Called when fit or test begins"""
        super().setup(trainer, pl_module, stage)

    def on_train_start(self, trainer, pl_module):
        """Called when the train begins."""
        self.trainer = trainer
        self.model = pl_module
        from sequoia.methods.models.base_model.self_supervised_model import SelfSupervisedModel

        if isinstance(pl_module, SelfSupervisedModel):
            # if our model has auxiliary tasks (i.e., if it's a self-supervised model.)
            if VAEReconstructionTask.name in self.model.tasks:
                self.reconstruction_task = self.model.tasks[VAEReconstructionTask.name]
                self.generation_task = self.reconstruction_task
                self.latents_batch = torch.randn(64, self.model.hp.hidden_size)

            elif AEReconstructionTask.name in pl_module.tasks:
                self.reconstruction_task = self.model.tasks[AEReconstructionTask.name]
                self.generation_task = None

    def on_train_epoch_end(self, trainer: Trainer, pl_module: BaseModel):
        # do something
        if self.generation_task:
            # Save a batch of fake images after each epoch.
            self.generate_samples()

        ## Reconstruct some samples after each epoch.
        # TODO: change this to use an interval instead.
        x_batch = None
        if x_batch is not None:
            self.reconstruct_samples(x_batch)

    @torch.no_grad()
    def reconstruct_samples(self, data: Tensor):
        if not self.reconstruction_task or not self.reconstruction_task.enabled:
            return
        n = min(data.size(0), 16)

        originals = data[:n]
        reconstructed = self.reconstruction_task.reconstruct(originals)
        comparison = torch.cat([originals, reconstructed])

        reconstruction_images_dir = self.model.config.log_dir / "reconstruction"
        reconstruction_images_dir.mkdir(parents=True, exist_ok=True)
        file_name = reconstruction_images_dir / f"step_{self.trainer.global_step:08d}.png"
        comparison = comparison.cpu().detach()
        # TODO: Debug this:
        # import wandb
        # if self.trainer.logger:
        #     self.trainer.logger.log({"reconstruction": wandb.Image(comparison)})
        save_image(comparison, file_name, nrow=n)

    @torch.no_grad()
    def generate_samples(self):
        if not self.generation_task or not self.generation_task.enabled:
            return
        n = 64
        latents = self.latents_batch
        fake_samples = self.generation_task.generate(latents)
        fake_samples = fake_samples.cpu().reshape(n, *reversed(self.model.setting.dims))
        # fake_samples = (fake_samples * 255).astype(np.uint8)

        generation_images_dir = self.model.config.log_dir / "generated_samples"
        generation_images_dir.mkdir(parents=True, exist_ok=True)
        file_name = generation_images_dir / f"step_{self.trainer.global_step:08d}.png"

        # import wandb
        # if self.model.logger:
        #     self.model.logger.experiment.log({"generated": wandb.Image(fake_samples)})

        save_image(fake_samples, file_name, normalize=True)
        logger.debug(f"saved image at path {file_name}")


================================================
FILE: sequoia/common/config/__init__.py
================================================
from .config import Config
from .wandb_config import WandbConfig


================================================
FILE: sequoia/common/config/config.py
================================================
""" Config dataclasses for use with pytorch lightning.

@author Fabrice Normandin (@lebrice)
"""
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

import numpy as np
import torch
from pytorch_lightning import seed_everything
from pyvirtualdisplay import Display
from simple_parsing import Serializable, flag

from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable

# from .trainer_config import TrainerConfig
logger = get_logger(__name__)


virtual_display = None


@dataclass
class Config(Serializable, Parseable):
    """Configuration options for an experiment.

    TODO: This should contain configuration options that are not specific to
    either the Setting or the Method, or common to both. For instance, the
    random seed, or the log directory, wether CUDA is to be used, etc.
    """

    # Directory containing the datasets.
    data_dir: Path = Path(os.environ.get("SLURM_TMPDIR", os.environ.get("DATA_DIR", "data")))
    # Directory containing the results of an experiment.
    log_dir: Path = Path(os.environ.get("RESULTS_DIR", "results"))

    # Run in Debug mode: no wandb logging, extra output.
    debug: bool = flag(False)
    # Wether to render the environment observations. Slows down training.
    render: bool = flag(False)

    # Enables more verbose logging.
    verbose: bool = flag(False)
    # Number of workers for the dataloaders.
    num_workers: Optional[int] = None
    # Random seed.
    seed: Optional[int] = None
    # Which device to use. Defaults to 'cuda' if available.
    device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def __post_init__(self):
        self.seed_everything()
        self._display: Optional[Display] = None
        self.rng = np.random.default_rng(self.seed)
        self.log_dir = Path(self.log_dir)
        self.data_dir = Path(self.data_dir)

    def __del__(self):
        if self._display:
            self._display.stop()

    def get_display(self) -> Optional[Display]:
        if self._display:
            return self._display
        if not self.render:
            # If `--render` isn't set, then try to create a virtual display.
            # This has the same effect as running the script with xvfb-run
            try:
                virtual_display = Display(visible=False, size=(1366, 768))
                virtual_display.start()
                self._display = virtual_display
            except Exception as e:
                logger.warning(
                    RuntimeWarning(
                        f"Rendering is disabled, but we were unable to start the "
                        f"virtual display! {e}\n"
                        f"Make sure that xvfb is installed on your machine if you "
                        f"want to prevent rendering the environment's observations."
                    )
                )
        return self._display

    def seed_everything(self) -> None:
        if self.seed is not None:
            seed_everything(self.seed)


================================================
FILE: sequoia/common/config/wandb_config.py
================================================
"""TODO: Re-enable the wandb stuff (disabled for now).
"""
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import *

from pytorch_lightning.loggers import WandbLogger
from simple_parsing import field, list_field

import wandb
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import Serializable


def patched_monitor():
    vcr = wandb.util.get_module(
        "gym.wrappers.monitoring.video_recorder",
        required="Couldn't import the gym python package, install with pip install gym",
    )
    print(f"Using patched version of `wandb.gym.monitor()`")
    if hasattr(vcr.ImageEncoder, "orig_close"):
        print(f"wandb.gym.monitor() has already been called.")
        return
    else:
        vcr.ImageEncoder.orig_close = vcr.ImageEncoder.close

    def close(self):
        vcr.ImageEncoder.orig_close(self)
        m = re.match(r".+(video\.\d+).+", self.output_path)
        if m:
            key = m.group(1)
        else:
            key = "videos"
        wandb.log({key: wandb.Video(self.output_path)})

    vcr.ImageEncoder.close = close
    wandb.patched["gym"].append(["gym.wrappers.monitoring.video_recorder.ImageEncoder", "close"])


import wandb.integration.gym

wandb.integration.gym.monitor = patched_monitor


# GYM_MONITOR = os.environ.get("GYM_MONITOR", "")
# if not GYM_MONITOR:
#     wandb.gym.monitor()
#     os.environ["GYM_MONITOR"] = "True"
# else:
#     assert False, "importing this a second time?"

logger = get_logger(__name__)


@dataclass
class WandbConfig(Serializable):
    """Set of configurations options for calling wandb.init directly."""

    # Which user to use
    entity: str = ""

    # project name to use in wandb.
    project: str = ""

    # Name used to easily group runs together.
    # Used to create a parent folder that will contain the `run_name` directory.
    # A unique string shared by all runs in a given group
    # Used to create a parent folder that will contain the `run_name` directory.
    group: Optional[str] = None
    # Wandb run name. If None, will use wandb's automatic name generation
    run_name: Optional[str] = None

    # Identifier unique to each individual wandb run. When given, will try to
    # resume the corresponding run, generates a new ID each time.
    run_id: Optional[str] = None

    # An run number is used to differentiate different iterations of the same experiment.
    # Runs with the same name can be later grouped with wandb to produce stderr plots.
    # TODO: Could maybe use the run_id instead?
    run_number: Optional[int] = None

    # Path where the wandb files should be stored. If the 'WANDB_DIR'
    # environment variable is set, uses that value. Otherwise, defaults to
    # the value of "<log_dir_root>/wandb"
    wandb_path: Optional[Path] = (
        Path(os.environ["WANDB_DIR"]) if "WANDB_DIR" in os.environ else None
    )

    # Tags to add to this run with wandb.
    tags: List[str] = list_field()

    # Notes about this particular experiment. (will be logged to wandb if used.)
    notes: Optional[str] = None

    # Root Logging directory.
    log_dir_root: Path = Path("results")

    monitor_gym: bool = True

    # Wandb api key. Useful for preventing the login prompt from wandb from appearing
    # when running on clusters or docker-based setups where the environment variables
    # aren't always shared.
    wandb_api_key: Optional[Union[str, Path]] = field(
        default=os.environ.get("WANDB_API_KEY"),
        to_dict=False,  # Do not serialize this field.
        repr=False,  # Do not show this field in repr().
    )

    # Run offline (data can be streamed later to wandb servers).
    offline: bool = False
    # Enables or explicitly disables anonymous logging.
    anonymous: bool = False
    # Sets the version, mainly used to resume a previous run.
    version: Optional[str] = None

    # Save checkpoints in wandb dir to upload on W&B servers.
    log_model: bool = False

    # Class variables used to check wether wandb.login has already been called or not.
    logged_in: ClassVar[bool] = False
    key_configured: ClassVar[bool] = False

    @property
    def log_dir(self):
        return self.log_dir_root.joinpath(
            (self.project or ""),
            (self.group or ""),
            (self.run_name or "default"),
            (f"run_{self.run_number}" if self.run_number is not None else ""),
        )

    def wandb_login(self) -> bool:
        """Calls `wandb.login()`.

        Returns
        -------
        bool
            If the key is configured.
        """
        key = None
        if self.wandb_api_key is not None and self.project:
            if Path(self.wandb_api_key).is_file():
                key = Path(self.wandb_api_key).read_text()
            else:
                key = str(self.wandb_api_key)
            assert isinstance(key, str)

        cls = type(self)
        if not cls.logged_in:
            cls.key_configured = wandb.login(key=key)
            cls.logged_in = True
        return cls.key_configured

    def wandb_init_kwargs(self) -> Dict:
        """Return the kwargs to pass to wandb.init()"""
        if self.run_name is None:
            # TODO: Create a run name using the coefficients of the tasks, etc?
            # At the moment, if no run name is given, the 'random' name from wandb is used.
            pass
        if self.wandb_path is None:
            self.wandb_path = self.log_dir_root / "wandb"
        self.wandb_path.mkdir(parents=True, mode=0o777, exist_ok=True)
        return dict(
            dir=str(self.wandb_path),
            project=self.project,
            entity=self.entity,
            name=self.run_name,
            id=self.run_id,
            group=self.group,
            notes=self.notes,
            reinit=True,
            tags=self.tags,
            resume="allow",
            monitor_gym=self.monitor_gym,
        )

    def wandb_init(self, config_dict: Dict = None) -> wandb.wandb_run.Run:
        """Executes the call to `wandb.init()`.

        TODO(@lebrice): Not sure if it still makes sense to call `wandb.init`
        ourselves when using Pytorch Lightning, should probably ask @jeromepl
        for advice on this.

        Args:
            config_dict (Dict): The configuration dictionary. Usually obtained
            by calling `to_dict()` on a `Serializable` dataclass, or `asdict()`
            on a regular dataclass.

        Returns:
            wandb.wandb_run.Run: Whatever gets returned by `wandb.init()`.
        """

        logger.info(f"Wandb run id: {self.run_id}")
        logger.info(
            f"Using wandb. Group name: {self.group} run name: {self.run_name}, "
            f"log_dir: {self.log_dir}"
        )
        self.wandb_login()

        init_kwargs = self.wandb_init_kwargs()
        init_kwargs["config"] = config_dict

        run = wandb.init(**init_kwargs)
        logger.info(f"Run: {run}")
        if run:
            if self.run_name is None:
                self.run_name = run.name
            # run.save()
            if run.resumed:
                # TODO: add *proper* wandb resuming, probaby by using @nitarshan 's md5 id cool idea.
                # wandb.restore(self.log_dir / "checkpoints")
                pass
        return run

    def make_logger(self, wandb_parent_dir: Path = None) -> WandbLogger:
        logger.info(f"Creating a WandbLogger with using options {self}.")
        self.wandb_login()
        wandb_logger = WandbLogger(
            name=self.run_name,
            save_dir=str(wandb_parent_dir) if wandb_parent_dir else None,
            offline=self.offline,
            id=self.run_id,
            anonymous=self.anonymous,
            version=self.version,
            project=self.project,
            tags=self.tags,
            log_model=self.log_model,
            entity=self.entity,
            group=self.group,
            monitor_gym=self.monitor_gym,
            reinit=True,
        )
        return wandb_logger


================================================
FILE: sequoia/common/gym_wrappers/__init__.py
================================================
""" Contains some potentially useful gym wrappers. """
from .add_done import AddDoneToObservation
from .add_info import AddInfoToObservation
from .convert_tensors import ConvertToFromTensors
from .env_dataset import EnvDataset
from .multi_task_environment import MultiTaskEnvironment
from .pixel_observation import PixelObservationWrapper
from .policy_env import PolicyEnv
from .smooth_environment import SmoothTransitions
from .step_callback_wrapper import PeriodicCallback, StepCallback, StepCallbackWrapper
from .transform_wrappers import TransformAction, TransformObservation, TransformReward
from .utils import IterableWrapper, RenderEnvWrapper, has_wrapper


================================================
FILE: sequoia/common/gym_wrappers/action_limit.py
================================================
""" IDEA: same as ObservationLimit, for for the number of total actions (steps).
"""
import gym
from gym.error import ClosedEnvironmentError

from sequoia.utils import get_logger

from .utils import IterableWrapper

logger = get_logger(__name__)


class ActionCounter(IterableWrapper):
    """Wrapper that counts the total number of actions performed so far.
    (including those in the individual environments when wrapping a VectorEnv.)
    """

    def __init__(self, env: gym.Env):
        super().__init__(env=env)
        self._action_counter: int = 0

    def step_count(self) -> int:
        return self._action_counter

    def action_count(self) -> int:
        return self._action_counter

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self._action_counter += self.env.num_envs if self.is_vectorized else 1
        return obs, reward, done, info


class ActionLimit(ActionCounter):
    """Closes the env when `max_steps` actions have been performed *in total*.

    For vectorized environments, each step consumes up to `num_envs` from this
    total budget, i.e. the step counter is incremented by the batch size at
    each step.
    """

    def __init__(self, env: gym.Env, max_steps: int):
        super().__init__(env=env)

        self._max_steps = max_steps
        self._initial_reset = False
        self._is_closed: bool = False

    @property
    def max_steps(self) -> int:
        return self._max_steps

    def __len__(self):
        return self.max_steps

    def closed_error_message(self) -> str:
        return f"Env reached max number of steps ({self._max_steps})"

    def step(self, action):
        if self._action_counter >= self._max_steps:
            raise ClosedEnvironmentError(f"Env reached max number of steps ({self._max_steps})")

        obs, reward, done, info = super().step(action)
        # logger.debug(f"(step {self._action_counter}/{self._max_steps})")

        # BUG: If we dont use >=, then iteration with EnvDataset doesn't work.
        if self._action_counter >= self._max_steps:
            self.close()
            # done = True
            # info["truncated"] = True

        return obs, reward, done, info


================================================
FILE: sequoia/common/gym_wrappers/action_limit_test.py
================================================
from typing import List

import gym
import pytest
from gym.wrappers import TimeLimit

from sequoia.common.gym_wrappers.env_dataset import EnvDataset

from .action_limit import ActionLimit


def test_basics():
    env = gym.make("CartPole-v0")
    env = ActionLimit(env, max_steps=10)


def test_EnvDataset_of_ActionLimit():
    max_episode_steps = 10
    max_steps = 100
    env = gym.make("CartPole-v0")
    env = TimeLimit(env, max_episode_steps=max_episode_steps)
    env = ActionLimit(env, max_steps=max_steps)
    env = EnvDataset(env)
    done = False
    episode_steps: List[int] = []
    total_steps = 0
    for episode in range(15):
        print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}")
        step = None
        for step, obs in enumerate(env):
            print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}")
            assert step <= max_episode_steps
            env.send(env.action_space.sample())
            total_steps += 1
        assert step is not None
        # NOTE: Here we have the last 'step' as 9.
        episode_steps.append(step)

        assert total_steps <= max_steps
        if total_steps == max_steps:
            break

    assert env.is_closed()
    assert sum(step + 1 for step in episode_steps) == max_steps


@pytest.mark.xfail(
    reason="FIXME: Shouldn't use CartPole env for this test since episodes aren't "
    "always longer than 10."
)
def test_ActionLimit_of_EnvDataset():
    max_episode_steps = 10
    max_steps = 100
    env = gym.make("CartPole-v0")
    env = TimeLimit(env, max_episode_steps=max_episode_steps)
    env = EnvDataset(env)
    env = ActionLimit(env, max_steps=max_steps)
    env.seed(123)
    done = False
    episode_steps: List[int] = []
    for episode in range(10):
        print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}")
        step = 0
        for step, obs in enumerate(env):
            print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}")
            assert step <= max_episode_steps
            env.send(env.action_space.sample())
        assert step > 0
        # NOTE: Here we have the last 'step' as 9.
        episode_steps.append(step)

    assert env.is_closed()
    assert sum(step + 1 for step in episode_steps) == max_steps


from sequoia.settings.sl.wrappers.measure_performance_test import with_is_last


@pytest.mark.xfail(
    reason=(
        "BUG: Why is the BaseMethod working fine on a `TraditionalRLSetting, but "
        "not on an IncrementalRLSetting? Seems like the 'max_steps' isn't enforced the "
        " same way in both somehow."
    )
)
def test_delayed_EnvDataset_of_ActionLimit():
    """Same test as above, however introduce a delay (like what's happening in the pl.Trainer)
    between the items sent by the trainer and the rewards returned by the env.

    """

    max_episode_steps = 10
    max_steps = 100
    env = gym.make("CartPole-v0")
    env = TimeLimit(env, max_episode_steps=max_episode_steps)
    env = EnvDataset(env)
    env = ActionLimit(env, max_steps=max_steps)
    done = False

    episode_steps: List[int] = []
    for episode in range(10):
        print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}")
        step = 0
        for step, (obs, is_last) in enumerate(with_is_last(env)):
            print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}")
            assert step <= max_episode_steps
            env.send(env.action_space.sample())
            if step == max_episode_steps - 1:
                assert is_last
        assert step > 0
        # NOTE: Here we have the last 'step' as 9.
        episode_steps.append(step)

    assert env.is_closed()
    assert sum(step + 1 for step in episode_steps) == max_steps


================================================
FILE: sequoia/common/gym_wrappers/add_done.py
================================================
""" Wrapper that adds 'done' as part of the environment's observations.
"""
from dataclasses import is_dataclass, replace
from functools import singledispatch
from typing import Any, Dict, Sequence, Tuple, TypeVar, Union

import gym
import numpy as np
from gym import Space, spaces
from gym.vector.utils import batch_space
from torch import Tensor

from sequoia.common.spaces import TypedDictSpace

from .utils import IterableWrapper

T = TypeVar("T")
Bool = TypeVar("Bool", bound=Union[bool, Sequence[bool]])
K = TypeVar("K")
V = TypeVar("V")


@singledispatch
def add_done(observation: Any, done: Any) -> Any:
    """Generic function that adds the provided `done` value to an observation.
    Returns the modified observation, which might not always be of the same type.
    """
    if is_dataclass(observation):
        return replace(observation, done=done)
    raise NotImplementedError(
        f"Function add_done has no handler registered for observations of type "
        f"{type(observation)}."
    )


@add_done.register(int)
@add_done.register(float)
@add_done.register(Tensor)
@add_done.register(np.ndarray)
def _add_done_to_array_obs(observation: T, done: bool) -> Dict[str, Union[T, bool]]:
    # TODO: use 'x' or 'observation'?
    return {"x": observation, "done": done}


@add_done.register(tuple)
def _add_done_to_tuple_obs(observation: Tuple, done: bool) -> Tuple:
    return observation + (done,)


@add_done.register(dict)
def _add_done_to_dict_obs(observation: Dict[K, V], done: bool) -> Dict[K, Union[V, bool]]:
    assert "done" not in observation
    observation["done"] = done
    return observation


@add_done.register
def add_done_to_space(observation: Space, done: Space) -> Space:
    """Adds the space of the 'done' value to the given space.

    By default, `done` corresponds to what you'd get from a single
    (i.e. non-vectorized) environment.
    """
    raise NotImplementedError(
        f"No handler registered for spaces of type {type(observation)}. "
        f"(value = {observation}, done={done})"
    )


@add_done.register(spaces.Discrete)
@add_done.register(spaces.MultiDiscrete)
@add_done.register(spaces.MultiBinary)
@add_done.register(spaces.Box)
def _add_done_to_box_space(observation: Space, done: Space) -> spaces.Dict:
    # TODO: Use 'x' or 'observation' as the key?
    return TypedDictSpace(
        x=observation,
        done=done,
    )


@add_done.register
def _add_done_to_tuple_space(observation: spaces.Tuple, done: Space) -> spaces.Tuple:
    return spaces.Tuple(
        [
            *observation.spaces,
            done,
        ]
    )


@add_done.register
def _add_done_to_dict_space(observation: spaces.Dict, done: Space) -> spaces.Dict:
    new_spaces = observation.spaces.copy()
    assert "done" not in new_spaces, "space shouldn't already have a 'done' key."
    new_spaces["done"] = done
    return type(observation)(new_spaces)


class AddDoneToObservation(IterableWrapper):
    """Wrapper that adds the 'done' from step to the
    Need to add the 'done' vector to the observation, so we can
    get access to the 'end of episode' signal in the shared_step, since
    when iterating over the env like a dataloader, the yielded items only
    have the observations, and dont have the 'done' vector. (so as to be
    consistent with supervised learning).

    NOTE: NEVER use this *BEFORE* batching, because of how the 'reset' works in
    all VectorEnvs, the observations will always be the 'new' ones, so `done`
    (in the obs) will always be False!
    """

    def __init__(self, env: gym.Env, done_space: Space = None):
        super().__init__(env)
        # boolean value. (0 or 1)
        if done_space is None:
            done_space = spaces.Box(0, 1, (), dtype=np.bool)
            if self.is_vectorized:
                self.single_observation_space = add_done(self.single_observation_space, done_space)
                done_space = batch_space(done_space, self.env.num_envs)
        self.done_space = done_space
        self.observation_space = add_done(self.env.observation_space, self.done_space)

    def reset(self, **kwargs):
        observation = self.env.reset()
        if self.is_vectorized:
            done = self.done_space.low
        else:
            done = False
        return add_done(observation, done)

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        observation = add_done(observation, done)
        return observation, reward, done, info


================================================
FILE: sequoia/common/gym_wrappers/add_info.py
================================================
""" Wrapper that adds the 'info' as a part of the environment's observations.
"""
from dataclasses import is_dataclass, replace
from functools import singledispatch
from typing import Dict, Sequence, Tuple, TypeVar, Union

import gym
import numpy as np
from gym import Space, spaces
from gym.vector import VectorEnv
from gym.vector.utils import batch_space
from torch import Tensor

from .utils import IterableWrapper

Info = TypeVar("Info", bound=Union[Dict, Sequence[Dict]])
K = TypeVar("K")
V = TypeVar("V")


@singledispatch
def add_info(observation, info):
    """Generic function that adds the provided `info` value to an observation.
    Returns the modified observation, which might not always be of the same type.

    NOTE: Can also be applied to spaces.
    """
    if is_dataclass(observation):
        # TODO: This assumes that the dataclass already has the 'info' field, if
        # that dataclass is frozen.
        return replace(observation, info=info)
    raise NotImplementedError(
        f"Function add_info has no handler registered for inputs of type " f"{type(observation)}."
    )


@add_info.register(Tensor)
@add_info.register(np.ndarray)
def _add_info_to_array_obs(observation: np.ndarray, info: Info) -> Tuple[np.ndarray, Info]:
    return (observation, info)


@add_info.register(tuple)
def _add_info_to_tuple_obs(observation: Tuple, info: Info) -> Tuple:
    return observation + (info,)


@add_info.register(dict)
def _add_info_to_dict_obs(observation: Dict[K, V], info: Info) -> Dict[K, Union[V, Info]]:
    assert "info" not in observation
    observation["info"] = info
    return observation


@add_info.register(spaces.Space)
def add_info_to_space(observation: Space, info: Space) -> Space:
    """Adds the space of the 'info' value from the env to this observation
    space.
    """
    raise NotImplementedError(
        f"No handler registered for spaces of type {type(observation)}. " f"(value = {observation})"
    )


@add_info.register
def _add_info_to_box_space(observation: spaces.Box, info: Space) -> spaces.Tuple:
    return spaces.Tuple(
        [
            observation,
            info,
        ]
    )


@add_info.register
def _add_info_to_tuple_space(observation: spaces.Tuple, info: Space) -> spaces.Tuple:
    return spaces.Tuple(
        [
            *observation.spaces,
            info,
        ]
    )


@add_info.register
def _add_info_to_dict_space(observation: spaces.Dict, info: Space) -> spaces.Dict:
    new_spaces = observation.spaces.copy()
    assert "info" not in new_spaces, "space shouldn't already have an 'info' key."
    new_spaces["info"] = info
    return type(observation)(new_spaces)


class AddInfoToObservation(IterableWrapper):
    # TODO: Need to add the 'info' dict to the Observation, so we can have
    # access to the final observation (which gets stored in the info dict at key
    # 'final_state'.
    # Do we through?

    # TODO: Should we also add the 'final state' to the observations as well?

    def __init__(self, env: gym.Env, info_space: spaces.Space = None):
        super().__init__(env)
        self.is_vectorized = isinstance(env.unwrapped, VectorEnv)
        # TODO: Should we make 'info_space' mandatory here?
        if info_space is None:
            # TODO: There seems to be some issues if we have an empty info space
            # before the batching.
            info_space = spaces.Dict({})
            if self.is_vectorized:
                info_space = batch_space(info_space, self.env.num_envs)
        self.info_space = info_space
        self.observation = add_info(self.env.observation_space, self.info_space)

    def reset(self, **kwargs):
        observation = self.env.reset()
        info = {}
        if self.is_vectorized:
            info = np.array([{} for _ in range(self.env.num_envs)])
        obs = add_info(observation, info)
        return obs

    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        observation = add_info(observation, info)
        return observation, reward, done, info


================================================
FILE: sequoia/common/gym_wrappers/convert_tensors.py
================================================
from dataclasses import is_dataclass, replace
import dataclasses
from functools import singledispatch, wraps
from typing import Any, Dict, Tuple, TypeVar, Union

import gym
import numpy as np
import torch
from gym import Space, spaces
from torch import Tensor

from sequoia.common.spaces.image import Image, ImageTensorSpace
from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.common.spaces.typed_dict import TypedDictSpace

from sequoia.utils.generic_functions import from_tensor, move  # , to_tensor
from sequoia.utils.logging_utils import get_logger

from .utils import IterableWrapper


@singledispatch
def to_tensor(v, device: torch.device = None) -> Union[Tensor, Any]:
    """Converts `v` into a tensor if `v` is a value, otherwise convert the items of `v` to tensors.

    - If `v` is a list, tuple, or dict, then the items are converted to tensors recursively.
    - If `v` is a dataclass, converts the fields to Tensors using `to_tensor` recursively.
    Otherwise, just uses `torch.as_tensor(v, device=device)`.
    """
    if v is None:
        return None
    if dataclasses.is_dataclass(v):
        return type(v)(
            **{
                field.name: to_tensor(getattr(v, field.name), device=device)
                for field in dataclasses.fields(v)
            }
        )
    return torch.as_tensor(v, device=device)


@to_tensor.register(tuple)
def _(
    v,
    device: torch.device = None,
):
    # NOTE: Choosing to convert tuples of things into tuples of tensor things, rather than torch
    # tensors.
    return tuple(to_tensor(v_i, device=device) for v_i in v)


@to_tensor.register(dict)
def _(v: Dict, device: torch.device = None) -> Dict:
    return type(v)(**{k: to_tensor(v_i, device=device) for k, v_i in v.items()})


logger = get_logger(__name__)

T = TypeVar("T")
S = TypeVar("S", bound=Space)
# TODO: Add 'TensorSpace' space which wraps a given space, doing the same kinda thing
# as in Sparse.


class ConvertToFromTensors(IterableWrapper):
    """Wrapper that converts Tensors into samples/ndarrays and vice versa.

    Whatever comes into the env is converted into np.ndarrays or samples from
    the action space, and whatever comes out of the environment (observations,
    rewards, dones, etc.) get converted to Tensors.

    Also supports Dict/Tuple/etc observation/action spaces.

    Also makes it so the `sample` methods of both the observation and
    action spaces return Tensors, and that their `contains` methods also accept
    Tensors as an input.

    If `device` is given, created Tensors are moved to the provided device.
    """

    def __init__(self, env: gym.Env, device: Union[torch.device, str] = None):
        super().__init__(env=env)
        self.device = device
        self.observation_space: Space = add_tensor_support(
            self.env.observation_space, device=device
        )
        self.action_space: Space = add_tensor_support(self.env.action_space, device=device)
        self.reward_space: Space
        if hasattr(self.env, "reward_space"):
            self.reward_space = self.env.reward_space
        else:
            reward_range = getattr(self.env, "reward_range", (-np.inf, np.inf))
            reward_shape: Tuple[int, ...] = ()
            if self.is_vectorized:
                reward_shape = (self.env.num_envs,)
            self.reward_space = spaces.Box(
                reward_range[0], reward_range[1], reward_shape, np.float32
            )
        self.reward_space = add_tensor_support(self.reward_space, device=device)

    def reset(self, *args, **kwargs):
        obs = self.env.reset(*args, **kwargs)
        return self.observation(obs)

    def observation(self, observation):
        return to_tensor(observation, device=self.device)

    def action(self, action):
        if isinstance(self.action_space, spaces.MultiDiscrete) and is_dataclass(action):
            # TODO: Fixme, the actions don't currently fit their space!
            action_np = replace(action, y_pred=from_tensor(self.action_space, action.y_pred))
            # FIXME: for now, unwrapping the actions
            action = action_np["y_pred"]
            return action
        return from_tensor(self.action_space, action)

    def reward(self, reward):
        return to_tensor(reward, device=self.device)

    def step(self, action):
        action = self.action(action)
        assert action in self.env.action_space, (action, self.env.action_space)

        result = self.env.step(action)
        observation, reward, done, info = result
        observation = self.observation(observation)
        reward = self.reward(reward)
        # NOTE: Not sure this is useful, actually!
        # done = torch.as_tensor(done, device=self.device)

        # We could actually do this!
        # info = np.ndarray(info)
        return observation, reward, done, info


def supports_tensors(space: S) -> bool:
    # TODO: Remove this, instead use a generic function
    return getattr(space, "_supports_tensors", False)


def has_tensor_support(space: S) -> bool:
    return supports_tensors(space)


def _mark_supports_tensors(space: S) -> None:
    # TODO: Remove this!
    setattr(space, "_supports_tensors", True)


@singledispatch
def add_tensor_support(space: S, device: torch.device = None) -> S:
    """Modifies `space` so its `sample()` method produces Tensors, and its
    `contains` method also accepts Tensors.

    For Dict and Tuple spaces, all the subspaces are also modified recursively.

    Returns the modified Space.
    """
    # Save the original methods so we can use them.
    sample = space.sample
    contains = space.contains
    if supports_tensors(space):
        # logger.debug(f"Space {space} already supports Tensors.")
        return space

    @wraps(space.sample)
    def _sample(*args, **kwargs):
        samples = sample(*args, **kwargs)
        samples = to_tensor(space, samples)
        if device:
            samples = move(samples, device)
        return samples

    @wraps(space.contains)
    def _contains(x: Union[Tensor, Any]) -> bool:
        x = from_tensor(space, x)
        return contains(x)

    space.sample = _sample
    space.contains = _contains
    _mark_supports_tensors(space)
    assert has_tensor_support(space)
    return space


@add_tensor_support.register
def _(space: Image, device: torch.device = None) -> Image:
    tensor_box = TensorBox(
        space.low, space.high, shape=space.shape, dtype=space.dtype, device=device
    )
    return ImageTensorSpace.from_box(tensor_box)


@add_tensor_support.register
def _(space: spaces.Dict, device: torch.device = None) -> spaces.Dict:
    space = type(space)(
        **{key: add_tensor_support(value, device=device) for key, value in space.spaces.items()}
    )
    # TODO: Remove this '_mark_supports_tensors' and instead use a generic function.
    _mark_supports_tensors(space)
    return space


@add_tensor_support.register
def _(space: TypedDictSpace, device: torch.device = None) -> TypedDictSpace:
    space = type(space)(
        {key: add_tensor_support(value, device=device) for key, value in space.spaces.items()},
        dtype=space.dtype,
    )
    _mark_supports_tensors(space)
    return space


@add_tensor_support.register(NamedTupleSpace)
def _(space: Dict, device: torch.device = None) -> Dict:
    space = type(space)(
        **{key: add_tensor_support(value, device=device) for key, value in space.items()},
        dtype=space.dtype,
    )
    _mark_supports_tensors(space)
    return space


@add_tensor_support.register(spaces.Tuple)
def _(space: Dict, device: torch.device = None) -> Dict:
    space = type(space)([add_tensor_support(value, device=device) for value in space.spaces])
    _mark_supports_tensors(space)
    return space


# TODO: Should this be moved to the place where these are defined instead?
from sequoia.common.spaces.tensor_spaces import TensorBox, TensorDiscrete, TensorMultiDiscrete


@add_tensor_support.register
def _(space: spaces.Box, device: torch.device = None) -> spaces.Box:
    space = TensorBox(space.low, space.high, shape=space.shape, dtype=space.dtype, device=device)
    _mark_supports_tensors(space)
    return space


@add_tensor_support.register
def _(space: spaces.Discrete, device: torch.device = None) -> spaces.Box:
    space = TensorDiscrete(n=space.n, device=device)
    _mark_supports_tensors(space)
    return space


@add_tensor_support.register
def _(space: spaces.MultiDiscrete, device: torch.device = None) -> spaces.Box:
    space = TensorMultiDiscrete(nvec=space.nvec, device=device)
    _mark_supports_tensors(space)
    return space


================================================
FILE: sequoia/common/gym_wrappers/convert_tensors_test.py
================================================
from typing import Union

import gym
import pytest
import torch
from gym import spaces
from torch import Tensor

from sequoia.conftest import skipif_param

from .convert_tensors import ConvertToFromTensors, add_tensor_support


@pytest.mark.parametrize(
    "device",
    [
        None,
        "cpu",
        skipif_param(
            not torch.cuda.is_available(),
            "cuda",
            reason="Cuda is required for this test",
        ),
    ],
)
def test_convert_tensors_wrapper(device: Union[str, torch.device]):
    env_name = "CartPole-v0"
    env = gym.make(env_name)
    env = ConvertToFromTensors(env, device=device)
    obs = env.reset()
    assert isinstance(obs, Tensor)
    if device:
        assert obs.device.type == device

    action = env.action_space.sample()
    obs, reward, done, info = env.step(torch.as_tensor(action))
    assert isinstance(obs, Tensor)
    assert isinstance(reward, Tensor)
    # TODO: Not quite sure this is the best thing to do:
    # assert isinstance(done, Tensor) # not sure this is useful!
    if device:
        assert obs.device.type == device
        assert reward.device.type == device
        # assert done.device.type == device


from dataclasses import dataclass
from typing import Optional

from sequoia.common.batch import Batch
from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace


@dataclass(frozen=True)
class Foo(Batch):
    x: Tensor
    task_labels: Optional[Tensor]


def test_preserves_dtype_of_namedtuple_space():
    input_space = NamedTupleSpace(
        x=spaces.Box(0, 1, [32, 123, 123, 3]),
        task_labels=spaces.MultiDiscrete([5 for _ in range(32)]),
        dtype=Foo,
    )

    output_space = add_tensor_support(input_space)
    assert output_space.dtype is input_space.dtype


def test_preserves_dtype_of_typeddict_space():
    input_space = TypedDictSpace(
        x=spaces.Box(0, 1, [32, 123, 123, 3]),
        task_labels=spaces.MultiDiscrete([5 for _ in range(32)]),
        dtype=Foo,
    )
    output_space = add_tensor_support(input_space)
    assert output_space.dtype is input_space.dtype


================================================
FILE: sequoia/common/gym_wrappers/env_dataset.py
================================================
""" Creates an IterableDataset from a Gym Environment.
"""
import warnings
from typing import Dict, Generic, Iterable, Iterator, Optional, Sequence, Tuple, TypeVar, Union

import gym
from gym.vector import VectorEnv
from torch import Tensor
from torch.utils.data import IterableDataset

from sequoia.utils.logging_utils import get_logger

from .utils import ActionType
from .utils import MayCloseEarly as CloseableWrapper
from .utils import ObservationType, RewardType, StepResult

# from sequoia.settings.base.objects import Observations, Rewards, Actions
logger = get_logger(__name__)


Item = TypeVar("Item")


class EnvDataset(
    CloseableWrapper,
    IterableDataset,
    Generic[ObservationType, ActionType, RewardType, Item],
    Iterable[Item],
):
    """Wrapper that exposes a Gym environment as an IterableDataset.

    This makes it possible to iterate over a gym env with an Active DataLoader.

    One pass through __iter__ is one episode. The __iter__ method can be called
    at most `max_episodes` times.
    """

    def __init__(
        self,
        env: gym.Env,
        max_steps: Optional[int] = None,
        max_episodes: Optional[int] = None,
        max_steps_per_episode: Optional[int] = None,
    ):
        # TODO: Remove these options
        if max_steps:
            from .action_limit import ActionLimit

            env = ActionLimit(env, max_steps=max_steps)
        self._max_steps = max_steps
        if max_episodes:
            from .episode_limit import EpisodeLimit

            env = EpisodeLimit(env, max_episodes=max_episodes)
        self._max_episodes = max_episodes

        super().__init__(env=env)
        if isinstance(env.unwrapped, VectorEnv):
            if not max_steps_per_episode:
                warnings.warn(
                    UserWarning(
                        "Iterations through the dataset (episodes) could be "
                        "infinitely long, since the env is a VectorEnv and "
                        "max_steps_per_episode wasn't given!"
                    )
                )

        # Maximum number of episodes
        # self._max_episodes = None
        # Maximum number of steps per iteration.
        # self._max_steps = None
        self._max_steps_per_episode = max_steps_per_episode

        # Number of steps performed in the current episode.
        self.n_steps_in_episode_: int = 0

        # Total number of steps performed so far.
        self.n_steps_: int = 0
        # Number of episodes performed in the environment.
        # Starts at -1 so the initial was_reset doesn't count as the end of an episode.
        self.n_episodes_: int = 0
        # Number of times the `send` method was called.
        self.n_sends_: int = 0

        self.observation_: Optional[ObservationType] = None
        self.action_: Optional[ActionType] = None
        self.reward_: Optional[RewardType] = None
        self.done_: Optional[Union[bool, Sequence[bool]]] = None
        self.info_: Optional[Union[Dict, Sequence[Dict]]] = None

        self.closed_: bool = False
        self.reset_: bool = False

        self.current_step_result_: StepResult = None
        self.previous_step_result_: StepResult = None

    def reset_counters(self):
        self.n_steps_ = 0
        self.n_episodes_ = 0
        self.n_sends_ = 0
        self.n_steps_in_episode_ = 0

    def observation(self, observation):
        return observation

    def action(self, action):
        return action

    def reward(self, reward):
        return reward

    def step(self, action) -> StepResult:
        if self.closed_ or self.is_closed():
            if self.reached_episode_limit:
                raise gym.error.ClosedEnvironmentError(
                    f"Env has already reached episode limit ({self._max_episodes}) and is closed."
                )
            elif self.reached_step_limit:
                raise gym.error.ClosedEnvironmentError(
                    f"Env has already reached step limit ({self._max_steps}) and is closed."
                )
            else:
                raise gym.error.ClosedEnvironmentError(
                    f"Can't call step on closed env. ({self.n_steps_})"
                )
        # Here we add calls to the (potentially overwritten) 'observation',
        # 'action' and 'reward' methods.
        action = self.action(action)
        if isinstance(action, Tensor) and action.requires_grad:
            action = action.detach()
        observation, reward, done, info = super().step(action)
        observation = self.observation(observation)
        reward = self.reward(reward)
        self.n_steps_ += 1
        self.n_steps_in_episode_ += 1

        result = StepResult(observation, reward, done, info)
        self.previous_step_result_ = self.current_step_result_
        self.current_step_result_ = result
        return result

    def __next__(
        self,
    ) -> Tuple[ObservationType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]]:
        """Produces the next observations, or raises StopIteration.

        Returns
        -------
        Tuple[ObservationType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]]
            [description]

        Raises
        ------
        gym.error.ClosedEnvironmentError
            If the env is already closed.
        gym.error.ResetNeeded
            If the env hasn't been reset before this is called.
        StopIteration
            When the step limit has been reached.
        StopIteration
            When the episode limit has been reached.
        RuntimeError
            When an action wasn't passed through 'send', and a default policy
            isn't set.
        """
        # logger.debug(f"__next__ is being called at step {self.n_steps_}.")

        if self.closed_:
            raise gym.error.ClosedEnvironmentError("Env is closed.")

        if self.reached_episode_limit:
            logger.debug("Reached episode limit, raising StopIteration.")
            raise StopIteration
        if self.reached_step_limit:
            logger.debug("Reached step limit, raising StopIteration.")
            raise StopIteration
        if self.reached_episode_length_limit:
            logger.debug("Reached episode length limit, raising StopIteration.")
            raise StopIteration

        if not self.reset_:
            raise gym.error.ResetNeeded("Need to reset the env before you can call __next__")

        if self.action_ is None:
            raise RuntimeError("You have to send an action using send() between every observation.")
        if hasattr(self.action_, "detach"):
            self.action_ = self.action_.detach()
        self.observation_, self.reward_, self.done_, self.info_ = self.step(self.action_)
        return self.observation_

    def send(self, action: ActionType) -> RewardType:
        """Sends an action to the environment, returning a reward.
        This can raise the same errors as calling __next__, namely,
        StopIteration, ResetNeeded,  raise an error when if not called without
        """
        assert action is not None, "Don't send a None action!"
        self.action_ = action
        self.observation_, self.reward_, self.done_, self.info_ = self.step(action)
        # self.observation_ = self.__next__()
        self.n_sends_ += 1
        return self.reward_

    def __iter__(self) -> Iterator[ObservationType]:
        """Iterator for an episode in the environment, which uses the 'active
        dataset' style with __iter__ and send.

        TODO: BUG: Wrappers applied on top of the EnvDataset won't have an
        effect on the values yielded by this iterator. Currently trying to fix
        this inside the IterableWrapper base class, but it's not that simple.

        TODO: To allow wrappers to also be iterable, we need to rename all the
        "private" attributes to "public" names, so that they can call something
        like:
        type(self.env).__iter__(self) (from within the wrapper).

        Yields
        -------
        Observations
            Observations from the environment.

        Raises
        ------
        RuntimeError
            [description]
        """
        if self.closed_ or self.is_closed():
            if self.reached_episode_limit:
                raise gym.error.ClosedEnvironmentError(
                    f"Env has already reached episode limit ({self._max_episodes}) and is closed."
                )
            elif self.reached_step_limit:
                raise gym.error.ClosedEnvironmentError(
                    f"Env has already reached step limit ({self._max_steps}) and is closed."
                )
            else:
                raise gym.error.ClosedEnvironmentError(f"Env is closed, can't iterate over it.")

        # First step reset automatically before iterating, if needed.
        if not self.reset_:
            self.observation_ = self.reset()

        self.done_ = False
        self.action_ = None
        self.reward_ = None

        assert self.observation_ is not None
        # Yield the first observation_.
        # TODO: What do we want to yield, actually? Just observations?
        yield self.observation_

        if self.action_ is None:
            raise RuntimeError(
                f"You have to send an action using send() between every "
                f"observation. (env = {self})"
            )

        # logger.debug(f"episode {self.n_episodes_}/{self._max_episodes}")

        while not any(
            [
                self.done_is_true(),
                self.reached_step_limit,
                self.reached_episode_length_limit,
                self.is_closed(),
            ]
        ):
            # logger.debug(f"step {self.n_steps_}/{self._max_steps},  (episode {self.n_episodes_})")

            # Set those to None to force the user to call .send()
            self.action_ = None
            self.reward_ = None
            yield self.observation_

            if self.action_ is None:
                raise RuntimeError(
                    f"You have to send an action using send() between every "
                    f"observation. (env = {self})"
                )

        # Force the user to call reset() between episodes.
        self.reset_ = False
        self.n_episodes_ += 1

        # logger.debug(f"self.n_steps: {self.n_steps_} self.n_episodes: {self.n_episodes_}")
        # logger.debug(f"Reached step limit: {self.reached_step_limit}")
        # logger.debug(f"Reached episode limit: {self.reached_episode_limit}")
        # logger.debug(f"Reached episode length limit: {self.reached_episode_length_limit}")

        if self.reached_episode_limit or self.reached_step_limit:
            logger.debug("Done iterating, closing the env.")
            self.close()

    @property
    def reached_step_limit(self) -> bool:
        if self._max_steps is None:
            return False
        return self.n_steps_ >= self._max_steps

    @property
    def reached_episode_limit(self) -> bool:
        if self._max_episodes is None:
            return False
        return self.n_episodes_ >= self._max_episodes

    @property
    def reached_episode_length_limit(self) -> bool:
        if self._max_steps_per_episode is None:
            return False
        return self.n_steps_in_episode_ >= self._max_steps_per_episode

    # @property
    def done_is_true(self) -> bool:
        """Returns wether self.done_ is True.

        This will always return False if the wrapped env is a VectorEnv,
        regardless of if the some of the values in the self.done_ array are
        true. This is because the VectorEnvs already reset the underlying envs
        when they have done=True.

        Returns
        -------
        bool
            Wether the episode is considered "done" based on self.done_.
        """
        if isinstance(self.done_, bool):
            return self.done_
        if isinstance(self.env.unwrapped, VectorEnv):
            # VectorEnvs reset themselves, so we consider the "_done" as False,
            # regarless
            return False
        if isinstance(self.done_, Tensor) and not self.done_.shape:
            return bool(self.done_)
        raise RuntimeError(
            f"'done' should be a single boolean, but got "
            f"{self.done_} of type {type(self.done_)})"
        )

        raise RuntimeError(f"Can't tell if we're done: self.done_={self.done_}")

    def reset(self, **kwargs) -> ObservationType:
        observation = self.env.reset(**kwargs)
        self.observation_ = self.observation(observation)
        self.reset_ = True
        self.n_steps_in_episode_ = 0
        # self.n_episodes_ += 1
        return self.observation_

    def close(self) -> None:
        # This will stop the iterator on the next step.
        # self._max_steps = 0
        self.closed_ = True
        self.action_ = None
        self.observation_ = None
        self.reward_ = None
        super().close()

    # TODO: calling `len` on an RL environment probably shouldn't work! (it should
    # behave the same exact way as an IterableDataset)

    # def __len__(self) -> Optional[int]:
    #     if self._max_steps is None:
    #         raise RuntimeError(f"The dataset has no length when max_steps is None.")
    #     return self._max_steps

    def __add__(self, other):
        from sequoia.utils.generic_functions import concatenate

        return concatenate(self, other)


================================================
FILE: sequoia/common/gym_wrappers/env_dataset_test.py
================================================
from functools import partial
from typing import ClassVar, Type

import gym
import numpy as np
import pytest
from gym import spaces

from sequoia.common.transforms import Transforms
from sequoia.conftest import DummyEnvironment, atari_py_required
from sequoia.settings.rl.continual.make_env import make_batched_env

from .env_dataset import EnvDataset
from .transform_wrappers import TransformObservation


class TestEnvDataset:
    # NOTE: We do this so that other tests for potential subclasses or wrappers around
    # an env dataset can reuse this while changing the type of wrapper used (for example
    # in the tests for `EnvProxy`).
    EnvDataset: ClassVar[Type[EnvDataset]] = EnvDataset

    @pytest.fixture()
    def dummy_env_fn(self):
        return DummyEnvironment

    def test_step_normally_works_fine(self, dummy_env_fn: Type[DummyEnvironment]):
        env = dummy_env_fn()
        env = self.EnvDataset(env)
        env.seed(123)

        obs = env.reset()
        assert obs == 0

        obs, reward, done, info = env.step(0)
        assert (obs, reward, done, info) == (0, 5, False, {})
        obs, reward, done, info = env.step(1)
        assert (obs, reward, done, info) == (1, 4, False, {})
        obs, reward, done, info = env.step(1)
        assert (obs, reward, done, info) == (2, 3, False, {})
        obs, reward, done, info = env.step(2)
        assert (obs, reward, done, info) == (1, 4, False, {})
        obs, reward, done, info = env.step(1)
        assert (obs, reward, done, info) == (2, 3, False, {})
        obs, reward, done, info = env.step(1)
        assert (obs, reward, done, info) == (3, 2, False, {})
        obs, reward, done, info = env.step(1)
        assert (obs, reward, done, info) == (4, 1, False, {})

        obs, reward, done, info = env.step(1)
        assert (obs, reward, done, info) == (5, 0, True, {})

        env.reset()
        obs, reward, done, info = env.step(0)
        assert (obs, reward, done, info) == (0, 5, False, {})

    def test_iterating_with_send(self, dummy_env_fn: Type[DummyEnvironment]):
        env = dummy_env_fn(target=5)
        env = self.EnvDataset(env)
        env.seed(123)

        actions = [0, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0]
        expected_obs = [0, 0, 1, 2, 1, 2, 3, 4, 5]
        expected_rewards = [5, 4, 3, 4, 3, 2, 1, 0]
        expected_dones = [False, False, False, False, False, False, False, True]

        reset_obs = 0
        # obs = env.reset()
        # assert obs == reset_obs
        n_calls = 0

        for i, observation in enumerate(env):
            print(f"Step {i}: batch: {observation}")
            assert observation == expected_obs[i]

            action = actions[i]
            reward = env.send(action)
            assert reward == expected_rewards[i]
        # TODO: The episode will end as soon as 'done' is encountered, which means
        # that we will never be given the 'final' observation. In this case, the
        # DummyEnvironment will set done=True when the state is state = target = 5
        # in this case.
        assert observation == 4

    def test_raise_error_when_missing_action(self, dummy_env_fn: Type[DummyEnvironment]):
        env = dummy_env_fn()
        with self.EnvDataset(env) as env:
            env.reset()
            env.seed(123)

            with pytest.raises(RuntimeError):
                for i, observation in zip(range(5), env):
                    pass

    def test_doesnt_raise_error_when_action_sent(self, dummy_env_fn: Type[DummyEnvironment]):
        env = dummy_env_fn()
        with self.EnvDataset(env) as env:
            env.reset()
            env.seed(123)

            for i, obs in zip(range(5), env):
                assert obs in env.observation_space
                reward = env.send(env.action_space.sample())

    def test_max_episodes(self):
        max_episodes = 3
        env = self.EnvDataset(
            env=gym.make("CartPole-v0"),
            max_episodes=max_episodes,
        )
        env.seed(123)
        for episode in range(max_episodes):
            # This makes use of the fact that given this seed, the episode should only
            # last a set number of frames.
            for i, observation in enumerate(env):
                print(f"step {i} {observation}")
                action = 0
                reward = env.send(action)
                if i >= 50:
                    assert False, "The episode should never be longer than about 10 steps!"

        with pytest.raises(gym.error.ClosedEnvironmentError):
            for i, observation in enumerate(env):
                print(f"step {i} {observation}")
                env.send(env.action_space.sample())

    def test_max_steps(self):
        epochs = 3
        max_steps = 5
        env = self.EnvDataset(
            env=gym.make("CartPole-v0"),
            max_steps=max_steps,
        )
        all_rewards = []
        all_observations = []
        with env:
            # TODO: Should we could what is given back by 'reset' as an observation?
            all_observations.append(env.reset())

            for i, batch in enumerate(env):
                assert i < max_steps, f"Max steps should have been respected: {i}"
                rewards = env.send(env.action_space.sample())
                all_rewards.append(rewards)
            assert len(all_rewards) == max_steps

            with pytest.raises(gym.error.ClosedEnvironmentError):
                env.reset()

            with pytest.raises(gym.error.ClosedEnvironmentError):
                for i in range(10):
                    print(i)
                    observation = next(env)
                    rewards = env.send(env.action_space.sample())
                    all_rewards.append(rewards)

        assert len(all_rewards) == max_steps

    def test_max_steps_per_episode(self):
        n_episodes = 4
        max_steps_per_episode = 5
        env = self.EnvDataset(
            env=gym.make("CartPole-v0"),
            max_steps_per_episode=max_steps_per_episode,
        )
        all_observations = []
        with env:
            for episode in range(n_episodes):
                env.reset()
                for i, batch in enumerate(env):
                    assert (
                        i < max_steps_per_episode
                    ), f"Max steps per episode should have been respected: {i}"
                    rewards = env.send(env.action_space.sample())
                assert i == max_steps_per_episode - 1

    @pytest.mark.parametrize("env_name", ["CartPole-v0"])
    @pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
    def test_not_setting_max_steps_per_episode_with_vector_env_raises_warning(
        self, env_name: str, batch_size: int
    ):
        from functools import partial

        from gym.vector import SyncVectorEnv

        env = SyncVectorEnv([partial(gym.make, env_name) for i in range(batch_size)])
        with pytest.warns(UserWarning):
            dataset = self.EnvDataset(env)

        env.close()

    @atari_py_required
    def test_observation_wrapper_applies_to_yielded_objects(self):
        """Test that when an TransformObservation wrapper (or any wrapper that
        changes the Observations) is applied on the env, the observations that are
        yielded by the GymDataLoader are also transformed, in the same way as those
        returned by step() or reset().
        """
        env_name = "ALE/Breakout-v5"
        batch_size = 10
        num_workers = 4
        max_steps_per_episode = 100
        wrapper = partial(TransformObservation, f=Transforms.channels_first)

        vector_env = make_batched_env(env_name, batch_size=batch_size, num_workers=num_workers)
        env = self.EnvDataset(vector_env, max_steps_per_episode=max_steps_per_episode)

        assert env.observation_space == spaces.Box(0, 255, (10, 210, 160, 3), np.uint8)

        env = TransformObservation(env, f=Transforms.channels_first)
        # env = wrapper(env)
        assert env.observation_space == spaces.Box(0, 255, (10, 3, 210, 160), np.uint8)

        # env = DummyWrapper(env)
        # assert env.observation_space == spaces.Box(0, 255 // 2, (10, 210, 160, 3), np.uint8)

        print("Before reset")
        reset_obs = env.reset()
        assert reset_obs in env.observation_space

        print("Before step")
        step_obs, _, _, _ = env.step(env.action_space.sample())
        assert step_obs in env.observation_space

        # We need to send an action before we can do this.
        action = env.action_space.sample()
        print(f"Before send")
        reward = env.send(action)

        # TODO: Perhaps going to drop this API, because if really complicates the
        # wrappers.
        print("Before __next__")
        next_obs = next(env)

        assert next_obs.shape == env.observation_space.shape
        assert next_obs in env.observation_space

        print(f"Before iterating")
        # TODO: This still doesn't call the right .observation() method!

        for i, iter_obs in zip(range(3), env):
            assert iter_obs.shape == env.observation_space.shape
            assert iter_obs in env.observation_space

            action = env.action_space.sample()
            reward = env.send(action)

        env.close()

    @atari_py_required
    def test_iteration_with_more_than_one_wrapper(self):
        """Same as above, but with more than one wrapper applied on top of the
        EnvDataset.
        """
        env_name = "ALE/Breakout-v5"
        batch_size = 10
        num_workers = 4
        max_steps_per_episode = 100

        vector_env = make_batched_env(env_name, batch_size=batch_size, num_workers=num_workers)
        env = self.EnvDataset(vector_env, max_steps_per_episode=max_steps_per_episode)

        assert env.observation_space == spaces.Box(0, 255, (10, 210, 160, 3), np.uint8)

        env = TransformObservation(env, f=Transforms.channels_first)
        assert env.observation_space == spaces.Box(0, 255, (10, 3, 210, 160), np.uint8)

        env = TransformObservation(env, f=[Transforms.to_tensor, Transforms.resize_64x64])
        assert env.observation_space == spaces.Box(0, 1.0, (10, 3, 64, 64), np.float32)
        # env = DummyWrapper(env)
        # assert env.observation_space == spaces.Box(0, 255 // 2, (10, 210, 160, 3), np.uint8)

        print("Before reset")
        reset_obs = env.reset().numpy()
        assert reset_obs in env.observation_space

        print("Before step")
        step_obs, _, _, _ = env.step(env.action_space.sample())
        assert step_obs.numpy() in env.observation_space

        # We need to send an action before we can do this.
        action = env.action_space.sample()
        print(f"Before send")
        reward = env.send(action)

        print("Before __next__")
        next_obs = next(env).numpy()
        assert next_obs in env.observation_space

        print(f"Before iterating")
        # TODO: This still doesn't call the right .observation() method!

        for i, iter_obs in zip(range(3), env):
            assert iter_obs.shape == env.observation_space.shape
            assert iter_obs.numpy() in env.observation_space

            action = env.action_space.sample()
            reward = env.send(action)

        env.close()


================================================
FILE: sequoia/common/gym_wrappers/episode_limit.py
================================================
# IDEA: Limit the total number of episodes, even in vectorized
# environments!
import warnings
from typing import Sequence, Union

import gym
import numpy as np
from gym.error import ClosedEnvironmentError
from gym.utils import colorize

from sequoia.utils import get_logger

from .utils import IterableWrapper

logger = get_logger(__name__)


class EpisodeCounter(IterableWrapper):
    """Closes the environment when a given number of episodes is performed.

    NOTE: This also applies to vectorized environments, i.e. the episode counter
    is incremented for when every individual environment reaches the end of an
    episode.
    """

    def __init__(self, env: gym.Env):
        super().__init__(env=env)
        self._episode_counter: int = 0  # -1 to account for the initial reset?
        self._done: Union[bool, Sequence[bool]] = False
        if self.is_vectorized:
            self._done = np.zeros(self.env.num_envs, dtype=bool)
        self._initial_reset: bool = False

    def episode_count(self) -> int:
        return self._episode_counter

    def reset(self):
        obs = super().reset()

        if self._episode_counter >= self._max_episodes:
            raise ClosedEnvironmentError(
                f"Env reached max number of episodes ({self._max_episodes})"
            )

        if self.is_vectorized:
            if not self._initial_reset:
                self._initial_reset = True
                self._episode_counter = 0
            else:
                # Resetting all envs.
                n_unfinished_envs: int = (self._done == False).sum()
                self._episode_counter += n_unfinished_envs
                self._done[:] = False
        else:
            # Increment every time for non-vectorized env, or just once for
            # VectorEnvs.
            self._episode_counter += 1

        return obs

    def step(self, action):
        obs, reward, done, info = self.env.step(action)

        if self.is_vectorized:
            self._episode_counter += (done == True).sum()
        else:
            # NOTE: We don't increment the episode counter based on `done` here
            # with non-vectorized environments. Instead, we cound the number of
            # calls to the `reset()` method.
            pass
            # if done:
            #     self._episode_counter += 1
        return obs, reward, done, info


class EpisodeLimit(EpisodeCounter):
    """Closes the environment when a given number of episodes is performed.

    NOTE: This also applies to vectorized environments, i.e. the episode counter
    is incremented for when every individual environment reaches the end of an
    episode.
    """

    def __init__(self, env: gym.Env, max_episodes: int):
        super().__init__(env=env)
        self._max_episodes = max_episodes

    @property
    def max_episodes(self) -> int:
        return self._max_episodes

    def closed_error_message(self) -> str:
        """Return the error message to use when attempting to use the closed env.

        This can be useful for wrappers that close when a given condition is reached,
        e.g. a number of episodes has been performed, which could return a more relevant
        message here.
        """
        return f"Env reached max number of episodes ({self.max_episodes})"

    def reset(self):
        # NOTE: MayCloseEarly.reset() will raise a ClosedEnvironmentError if
        # self.is_closed() is True, which will always be the case if we exceed the
        # limit.
        obs = super().reset()
        assert not self.is_closed()

        if self.is_vectorized:
            n_unfinished_envs: int = (~self._done).sum()
            if self._episode_counter != 0 and n_unfinished_envs:
                # Wasting some steps in unfinished environments!
                w = UserWarning(
                    f"Calling .reset() on a VectorEnv resets all the envs, "
                    f"ending episodes prematurely. This env has a limit of "
                    f"{self._max_episodes} episodes in total, so by calling "
                    f"reset() here, you could be wasting {n_unfinished_envs} "
                    f"episodes from your budget!"
                )
                warnings.warn(colorize(f"WARN: {w}", "yellow"))

        logger.debug(f"Starting episode  {self._episode_counter}/{self._max_episodes})")
        if self._episode_counter == self._max_episodes:
            logger.warning("Beware, entering last episode")
        return obs

    def __iter__(self):
        return super().__iter__()

    def step(self, action):
        if self.is_closed():
            if self._episode_counter >= self._max_episodes:
                raise ClosedEnvironmentError(
                    f"Env reached max number of episodes ({self._max_episodes})"
                )
            raise ClosedEnvironmentError("Can't step through closed env.")

        obs, reward, done, info = super().step(action)

        if self.is_vectorized:
            # BUG: This can be reached while in the last 'send' (which uses self.send)
            # of the previous epoch while iterating
            if any(done) and self._episode_counter >= self.max_episodes:
                logger.info(f"Closing the envs since we reached the max number of episodes.")
                self.close()
                done[:] = True
        else:
            if done and self._episode_counter == self._max_episodes:
                logger.info(f"Closing the env since we reached the max number of episodes.")
                self.close()

        return obs, reward, done, info


================================================
FILE: sequoia/common/gym_wrappers/episode_limit_test.py
================================================
from functools import partial

import gym
import numpy as np
import pytest
from gym.vector import SyncVectorEnv
from gym.wrappers import TimeLimit

from sequoia.conftest import DummyEnvironment

from .env_dataset import EnvDataset
from .episode_limit import EpisodeLimit


def test_basics():
    env = TimeLimit(gym.make("CartPole-v0"), max_episode_steps=10)
    env = EnvDataset(env)
    env = EpisodeLimit(env, max_episodes=3)
    env.seed(123)

    for episode in range(3):
        obs = env.reset()
        done = False
        step = 0
        while not done:
            print(f"step {step}")
            obs, reward, done, info = env.step(env.action_space.sample())
            step += 1

    assert env.is_closed()
    with pytest.raises(gym.error.ClosedEnvironmentError):
        _ = env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        _ = env.step(env.action_space.sample())

    with pytest.raises(gym.error.ClosedEnvironmentError):
        for _ in env:
            break


@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_episode_limit_with_single_env(env_name: str):
    """EpisodeLimit should close the env when a given number of episodes is
    reached.
    """
    env = gym.make(env_name)
    env = EpisodeLimit(env, max_episodes=3)
    env.seed(123)

    done = False
    assert env.episode_count() == 0
    # First episode.
    obs = env.reset()
    while not done:
        obs, reward, done, info = env.step(env.action_space.sample())
    assert env.episode_count() == 1

    # Second episode.
    obs = env.reset()
    done = False
    while not done:
        obs, reward, done, info = env.step(env.action_space.sample())

    assert env.episode_count() == 2

    # Third episode.
    obs = env.reset()
    done = False
    while not done:
        obs, reward, done, info = env.step(env.action_space.sample())

    assert env.episode_count() == 3
    assert env.is_closed()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        obs = env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        _ = env.step(env.action_space.sample())


@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_episode_limit_with_single_env_dataset(env_name: str):
    """EpisodeLimit should close the env when a given number of episodes is
    reached when iterating through the env.
    """
    env = gym.make(env_name)
    env = EpisodeLimit(env, max_episodes=2)
    env = EnvDataset(env)
    # TODO: The reverse ordering doesn't work: (EnvDataset(EpisodeLimit))
    # TODO: There's a warning that doing this steps even though done = True?
    env.seed(123)

    done = False
    # First episode.
    for obs in env:
        print("in loop:", env.episode_count())
        reward = env.send(env.action_space.sample())

    print("between loops", env.episode_count())
    # Second episode.
    for i, obs in enumerate(env):
        print("Second loop", env.episode_count())
        reward = env.send(env.action_space.sample())

    # Trying to start a third episode should fail:
    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.reset()
        for obs in env:
            assert False


@pytest.mark.parametrize("batch_size", [3, 5])
def test_episode_limit_with_vectorized_env(batch_size):
    """Test that when adding the EpisodeLimit wrapper on top of a vectorized
    environment, the episode limit is with respect to each individual env rather
    than the batched env.
    """
    starting_values = [0 for i in range(batch_size)]
    targets = [10 for i in range(batch_size)]

    env = SyncVectorEnv(
        [
            partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
            for start, target in zip(starting_values, targets)
        ]
    )
    env = EpisodeLimit(env, max_episodes=2 * batch_size)

    obs = env.reset()
    assert obs.tolist() == starting_values
    print("reset obs: ", obs)
    for i in range(10):
        print(i, obs)
        actions = np.ones(batch_size)
        obs, reward, done, info = env.step(actions)
    # all episodes end at step 10
    assert all(done)

    # Because of how VectorEnvs work, the obs are the new 'reset' obs, rather
    # than the final obs in the episode.
    assert obs.tolist() == starting_values

    assert obs.tolist() == starting_values
    print("reset obs: ", obs)
    for i in range(10):
        print(i, obs)
        actions = np.ones(batch_size)
        obs, reward, done, info = env.step(actions)

    # all episodes end at step 10
    assert all(done)
    assert env.is_closed
    assert obs.tolist() == starting_values
    with pytest.raises(gym.error.ClosedEnvironmentError):
        actions = np.ones(batch_size)
        obs, reward, done, info = env.step(actions)


# @pytest.mark.xfail(reason="TODO: Fix the bugs in the interaction between "
#                           "EnvDataset and EpisodeLimit.")
@pytest.mark.parametrize("batch_size", [3, 5])
def test_episode_limit_with_vectorized_env_dataset(batch_size):
    """Test that when adding the EpisodeLimit wrapper on top of a vectorized
    environment, the episode limit is with respect to each individual env rather
    than the batched env.
    """
    start = 0
    target = 10
    starting_values = [start for i in range(batch_size)]
    targets = [target for i in range(batch_size)]

    env = SyncVectorEnv(
        [
            partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
            for start, target in zip(starting_values, targets)
        ]
    )

    max_episodes = 2
    # TODO: For some reason the reverse order doesn't work!
    env = EpisodeLimit(env, max_episodes=max_episodes * batch_size)
    env = EnvDataset(env)

    for i, obs in enumerate(env):
        print(i, obs)
        actions = np.ones(batch_size)
        reward = env.send(actions)

    assert i == max_episodes * target - 1

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        for i, obs in enumerate(env):
            print(i, obs)
            actions = np.ones(batch_size)
            reward = env.send(actions)

    # all episodes end at step 10


# @pytest.mark.xfail(reason=f"BUG in EnvDataset, it doesn't finish ")
@pytest.mark.parametrize("batch_size", [3, 5])
def test_reset_vectorenv_with_unfinished_episodes_raises_warning(batch_size):
    """Test that when adding the EpisodeLimit wrapper on top of a vectorized
    environment, the episode limit is with respect to each individual env rather
    than the batched env.
    """
    start = 0
    target = 10
    starting_values = [start for i in range(batch_size)]
    targets = [target for i in range(batch_size)]

    env = SyncVectorEnv(
        [
            partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
            for start, target in zip(starting_values, targets)
        ]
    )
    env = EpisodeLimit(env, max_episodes=3 * batch_size)

    obs = env.reset()
    _ = env.step(env.action_space.sample())
    _ = env.step(env.action_space.sample())
    with pytest.warns(UserWarning) as record:
        env.reset()


================================================
FILE: sequoia/common/gym_wrappers/measure_performance.py
================================================
""" Abstract base class for a Wrapper that gets applied onto the environment in order to
measure the online training performance.

The concrete versions of this wrapper are located.
"""
from abc import ABC
from typing import Dict, Generic, List, Optional

from sequoia.common.gym_wrappers.utils import EnvType, IterableWrapper
from sequoia.common.metrics import MetricsType
from sequoia.settings.base import Environment


class MeasurePerformanceWrapper(IterableWrapper[EnvType], Generic[EnvType, MetricsType], ABC):
    def __init__(self, env: Environment):
        super().__init__(env)
        self._metrics: Dict[int, MetricsType] = {}

    def get_online_performance(self) -> Dict[int, List[MetricsType]]:
        """Returns the online performance over the evaluation period.

        Returns
        -------
        Dict[int, MetricsType]
            A dict mapping from step number to the Metrics object captured at that step.
        """
        return dict(self._metrics.copy())

    def get_average_online_performance(self) -> Optional[MetricsType]:
        """Returns the average online performance over the evaluation period, or None
        if the env was not iterated over / interacted with.

        Returns
        -------
        Optional[MetricsType]
            Metrics
        """
        if not self._metrics:
            return None
        return sum(self._metrics.values())


================================================
FILE: sequoia/common/gym_wrappers/multi_task_environment.py
================================================
import bisect
import dataclasses
from functools import singledispatch
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, TypeVar, Union

import gym
import numpy as np
from gym import spaces
from gym.envs.classic_control import CartPoleEnv
from torch import Tensor

from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.utils.logging_utils import get_logger

from .utils import MayCloseEarly

task_param_names: Dict[Union[Type[gym.Env], str], List[str]] = {
    CartPoleEnv: ["gravity", "masscart", "masspole", "length", "force_mag", "tau"]
    # TODO: Add more of the classic control envs here.
}
logger = get_logger(__name__)


X = TypeVar("X")
T = TypeVar("T")
K = TypeVar("K")
V = TypeVar("V")


def make_env_attributes_task(
    env: gym.Env,
    task_params: Union[List[str], Dict[str, Any]],
    seed: int = None,
    rng: np.random.Generator = None,
    noise_std: float = 0.2,
) -> Dict[str, Any]:
    task: Dict[str, Any] = {}
    rng: np.random.Generator = rng or np.random.default_rng(seed)

    if isinstance(task_params, list):
        task_params = {param: getattr(env.unwrapped, param) for param in task_params}

    for attribute, default_value in task_params.items():
        new_value = default_value

        if isinstance(default_value, (int, float, np.ndarray)):
            new_value *= rng.normal(1.0, noise_std)
            # Clip the value to be in the [0.1*default, 10*default] range.
            new_value = max(0.1 * default_value, new_value)
            new_value = min(10 * default_value, new_value)
            if isinstance(default_value, int):
                new_value = round(new_value)

        elif isinstance(default_value, bool):
            new_value = rng.choice([True, False])
        else:
            raise NotImplementedError(
                f"TODO: Don't yet know how to sample a random value for "
                f"attribute {attribute} with default value {default_value} of type "
                f" {type(default_value)}."
            )
        task[attribute] = new_value
    return task


# class ObservationsAndTaskLabels(NamedTuple):
#     x: Any
#     task_labels: Any


@singledispatch
def add_task_labels(observation: Any, task_labels: Any) -> Any:
    raise NotImplementedError(observation, task_labels)


@add_task_labels.register(int)
@add_task_labels.register(float)
@add_task_labels.register(Tensor)
@add_task_labels.register(np.ndarray)
def _add_task_labels_to_single_obs(observation: X, task_labels: T) -> Tuple[X, T]:
    return {
        "x": observation,
        "task_labels": task_labels,
    }
    # return ObservationsAndTaskLabels(observation, task_labels)


from sequoia.common.batch import Batch


@add_task_labels.register(Batch)
def _add_task_labels_to_batch(observation: Batch, task_labels: T) -> Batch:
    return dataclasses.replace(observation, task_labels=task_labels)


from sequoia.common.spaces import TypedDictSpace


@add_task_labels.register(spaces.Space)
def _add_task_labels_to_space(observation: spaces.Space, task_labels: T) -> spaces.Dict:
    # TODO: Return a dict or NamedTuple at some point:
    return TypedDictSpace(
        x=observation,
        task_labels=task_labels,
    )
    # return NamedTupleSpace(
    #     x=observation, task_labels=task_labels, dtype=ObservationsAndTaskLabels,
    # )


@add_task_labels.register(NamedTupleSpace)
def _add_task_labels_to_namedtuple(
    observation: NamedTupleSpace, task_labels: gym.Space
) -> NamedTupleSpace:
    assert "task_labels" not in observation._spaces, "space already has task labels!"
    return type(observation)(
        **observation._spaces, task_labels=task_labels, dtype=observation.dtype
    )


@add_task_labels.register(spaces.Tuple)
@add_task_labels.register(tuple)
def _add_task_labels_to_tuple(observation: Tuple, task_labels: T) -> Tuple:
    return type(observation)([*observation, task_labels])


@add_task_labels.register(spaces.Dict)
def _add_task_labels_to_dict_space(observation: spaces.Dict, task_labels: T) -> spaces.Dict:
    assert "task_labels" not in observation.spaces
    d_spaces = observation.spaces.copy()
    d_spaces["task_labels"] = task_labels
    return type(observation)(**d_spaces)


@add_task_labels.register(TypedDictSpace)
def _add_task_labels_to_typed_dict_space(
    observation: TypedDictSpace, task_labels: T
) -> TypedDictSpace:
    # TODO: Raise a warning instead?
    # assert "task_labels" not in observation.spaces, observation
    d_spaces = observation.spaces.copy()
    d_spaces["task_labels"] = task_labels
    # NOTE: We assume here that the `dtype` of the typed dict space (e.g. the
    # `Observations` class, usually) can handle having a `task_labels` field.
    return type(observation)(**d_spaces, dtype=observation.dtype)


@add_task_labels.register(dict)
def _add_task_labels_to_dict(observation: Dict[str, V], task_labels: T) -> Dict[str, Union[V, T]]:
    new: Dict[str, Union[V, T]] = {key: value for key, value in observation.items()}
    # TODO: Raise a warning instead?
    # assert "task_labels" not in new
    new["task_labels"] = task_labels
    return type(observation)(**new)  # type: ignore


class MultiTaskEnvironment(MayCloseEarly):
    """Creates 'tasks' by modifying attributes or applying functions to the wrapped env.

    This wrapper accepts a `task_schedule` dictionary, which maps from a given
    step to either:
    - dicts of attributes that are to be set on the (unwrapped) env at that step, or
    - callables to apply to the wrapped environment at the given steps.

    For example, when wrapping the "CartPole-v0" environment, we could vary any
    of the "gravity", "masscart", "masspole", "length", "force_mag" or "tau"
    attributes like so:
    ```
    env = gym.make("CartPole-v0")
    env = MultiTaskEnvironment(env, task_schedule={
        # step -> attributes to set on the environment when step is reached.
        10: dict(length=2.0),
        20: dict(length=1.0, gravity=20.0),
        30: dict(length=0.5, gravity=5.0),
    })
    env.seed(123)
    env.reset()
    ```
    During steps 0-9, the environment is unchanged (length = 0.5).
    At step 10, the length of the pole will be set to 2.0
    At step 20, the length of the pole will be set to 1.0, and the gravity will
        be changed from its default value (9.8) to 20.
    etc.

    TODO: Might be more accurate to call this a `TaskIncrementalEnvironment`, rather
    than `MultiTaskEnvironemnt`, which is more related to the `new_random_task_on_reset`
    behaviour anyway.
    TODOs:
    - Copy this to a `incremental_environment.py` or something similar
    - Remove all references to this `new_random_task_on_reset` stuff.
    - Rename "smooth_environment" to "nonstationary_environment"?
    """

    def __init__(
        self,
        env: gym.Env,
        task_schedule: Dict[int, Union[Dict[str, float], Callable[[gym.Env], Any]]] = None,
        task_params: List[str] = None,
        noise_std: float = 0.2,
        add_task_dict_to_info: bool = False,
        add_task_id_to_obs: bool = False,
        new_random_task_on_reset: bool = False,
        starting_step: int = 0,
        nb_tasks: int = None,
        max_steps: int = None,
        seed: int = None,
    ):
        """Wraps an environment, allowing it to be 'multi-task'.

        NOTE: Assumes that all the attributes in 'task_param_names' are floats
        for now.

        TODO: Check the case where a task boundary is reached and the episode is not
        done yet.

        Args:
            env (gym.Env): The environment to wrap.
            task_param_names (List[str], optional): The attributes of the
                environment that will be allowed to change. Defaults to None.
            task_schedule (Dict[int, Dict[str, float]], optional): Schedule
                mapping from a given step number to the state that will be set
                at that time.
            noise_std (float, optional): The standard deviation of the noise
                used to create the different tasks.
        """
        super().__init__(env=env)
        self.env: gym.Env
        self.noise_std = noise_std

        if not task_params:
            unwrapped_type = type(env.unwrapped)
            if unwrapped_type in task_param_names:
                task_params = task_param_names[unwrapped_type]
            elif task_schedule:
                if not any(isinstance(v, dict) for v in task_schedule.values()):
                    task_params: List[str] = None
                    for value in task_schedule.values():
                        if not isinstance(value, dict):
                            continue
                        if task_params is None:
                            task_params = list(value.keys())
                        elif not task_params == list(value.keys()):
                            raise NotImplementedError(
                                "All tasks need to have the same keys for now."
                            )
            else:
                logger.warning(
                    UserWarning(
                        f"You didn't pass any 'task params', and the task "
                        f"parameters aren't known for this type of environment "
                        f"({unwrapped_type}), so we can't make it multi-task with "
                        f"this wrapper."
                    )
                )

        self._max_steps: Optional[int] = max_steps
        self._starting_step: int = starting_step
        self._steps: int = self._starting_step
        self._episodes: int = 0

        self._current_task: Dict = {}
        self._task_schedule: Dict[int, Dict[str, Any]] = task_schedule or {}

        self.task_params: List[str] = task_params or []
        self.default_task: np.ndarray = self.current_task.copy()
        self.task_schedule = task_schedule or {}

        self.new_random_task_on_reset: bool = new_random_task_on_reset
        # Wether we will add a task id to the observation.
        self.add_task_id_to_obs = add_task_id_to_obs
        # Wether we will add the task dict (the values of the attributes) to the
        # 'info' dict.
        self.add_task_dict_to_info = add_task_dict_to_info

        if 0 not in self.task_schedule:
            self.task_schedule[0] = self.default_task

        # TODO: Need to do a major refactor of this wrapper.
        # Need to clean this up: passing the task schedule to the env and having it "mean" different
        # things depending on the value other arguments (discrete vs continuous, etc) is very ugly.
        nb_tasks = nb_tasks if nb_tasks is not None else len(self.task_schedule)

        if self.add_task_id_to_obs:
            self.observation_space = add_task_labels(
                self.env.observation_space,
                spaces.Discrete(n=nb_tasks),
            )
            # self.observation_space = spaces.Tuple([
            #     self.env.observation_space,
            #     spaces.Discrete(n=n_tasks)
            # ])
        # self._closed = False

        self._on_task_switch_callback: Optional[Callable[[int], None]] = None

        self.np_random: np.random.Generator
        self.seed(seed)

    @property
    def current_task_id(self) -> int:
        """Returns the 'index' of the current task within the task schedule."""
        if self.new_random_task_on_reset:
            # The task id is the index of the key that corresponds to the current task.
            return self._current_task_id
        current_step = self._steps
        assert current_step >= 0
        task_steps: List[int] = sorted(self.task_schedule.keys())
        assert 0 in task_steps
        insertion_index = bisect.bisect_right(task_steps, current_step)
        # The current task id is the insertion index - 1
        current_task_index = insertion_index - 1
        return current_task_index

    @current_task_id.setter
    def current_task_id(self, value: int) -> None:
        self._current_task_id = value

    def set_on_task_switch_callback(self, callback: Callable[[int], None]) -> None:
        self._on_task_switch_callback = callback

    def on_task_switch(self, task_id: int):
        if task_id != self.current_task_id:
            logger.debug(f"Switching from {self.current_task_id} -> {task_id}.")
            # TODO: We could maybe use this to call the method's 'on_task_switch'
            # callback?
            if self._on_task_switch_callback:
                self._on_task_switch_callback(task_id)

    def step(self, *args, **kwargs):
        # If we reach a step in the task schedule, then we change the task to
        # that given step.
        # if self._closed:
        #     raise gym.error.ClosedEnvironmentError("Can't step in closed env.")

        if self.steps in self.task_schedule and not self.new_random_task_on_reset:
            self.current_task = self.task_schedule[self.steps]
            logger.debug(f"New task at step {self.steps}: {self.current_task}")
            # Adding this on_task_switch, since it could maybe be easier than
            # having to add a callback wrapper to use.
            task_id = sorted(self.task_schedule.keys()).index(self.steps)
            self.on_task_switch(task_id)

        # elif self.new_random_task_on_reset:
        #     self.current_task_id

        observation, rewards, done, info = super().step(*args, **kwargs)
        if self.add_task_id_to_obs:
            observation = add_task_labels(observation, self.current_task_id)
        if self.add_task_dict_to_info:
            info.update(self.current_task)

        self.steps += 1
        return observation, rewards, done, info

    # def close(self, **kwargs) -> None:
    #     return super().close(**kwargs)

    def reset(self, new_random_task: bool = None, **kwargs):
        """Resets the wrapped environment.

        If `new_random_task` is True, this also sets a new random task as the
        current task.

        NOTE: This resets the wrapped env, but doesn't reset the number of steps
        taken, hence the 'task' progression according to the task_schedule
        doesn't change.
        """
        if new_random_task is None:
            new_random_task = self.new_random_task_on_reset

        # if self._closed:
        #     raise gym.error.ClosedEnvironmentError("Can't reset closed env.")

        if new_random_task:
            prev_task_id = self.current_task_id
            previous_task = self.current_task
            self.current_task = self.random_task()
            episode = self._episodes
            step = self._steps
            if previous_task != self.current_task:
                logger.debug(
                    f"Switching tasks at step {step} (end of episode {episode}): "
                    f"{prev_task_id} -> {self.current_task_id} {self.current_task}"
                )

        observation = self.env.reset(**kwargs)
        if self.add_task_id_to_obs:
            observation = add_task_labels(observation, self.current_task_id)

        self._episodes += 1
        return observation

    @property
    def steps(self) -> int:
        return self._steps

    @steps.setter
    def steps(self, value: int) -> None:
        if value < self._starting_step:
            value = self._starting_step
        if self._max_steps is not None and value > self._max_steps:
            # Reached the maximum number of steps, stagnate.
            # TODO: What exactly should we do in this case? Should we close
            # the env? Or just stay at the same 'step' in the task schedule
            # forever?
            # TODO: Is this the "correct" way to limit the number of steps in
            # an environment?
            value = self._max_steps
        self._steps = value

    @property
    def current_task(self) -> Dict[str, Any]:
        # NOTE: This caching mechanism assumes that we are the only source
        # of potential change for these attributes.
        # At the moment, We're not really concerned with performance, so we
        # could turn it off it if misbehaves or causes bugs.
        if not self._current_task:
            # NOTE: We get the attributes from the unwrapped environment, which
            # effectively bypasses any wrappers. Don't know if this is good
            # practice, but oh well.
            self._current_task = {
                name: getattr(self.env.unwrapped, name) for name in self.task_params
            }
        # Double-checking that the attributes didn't change somehow without us
        # knowing.
        # TODO: Maybe remove this when done debugging/testing this since it's a
        # little bit of a waste of compute.
        for attribute, value_in_dict in self._current_task.items():
            current_env_value = getattr(self.env.unwrapped, attribute)
            if value_in_dict != current_env_value:
                raise RuntimeError(
                    f"The value of the attribute '{attribute}' was changed from "
                    f"somewhere else! (value in _current_task: {value_in_dict}, "
                    f"value on env: {current_env_value})"
                )
        return self._current_task

    @current_task.setter
    def current_task(self, task: Union[Dict[str, float], Sequence[float], Callable]):
        # logger.debug(f"(_step: {self.steps}): Setting the current task to {task}.")

        if isinstance(task, (list, np.ndarray)):
            assert len(task) == len(self.task_params), "lengths should match!"
            task_dict = {}
            for k, value in zip(self.task_params, task):
                task_dict[k] = value
            task = task_dict
        if task in self.task_schedule.values():
            self._current_task_id = [
                i for i, (k, v) in enumerate(self.task_schedule.items()) if v == task
            ][0]
            # assert False, f"Hey, this task is in the values at index {self._current_task_id}"
        if callable(task):
            task(self.env)
        elif isinstance(task, dict):
            self._current_task.clear()
            self._current_task.update(self.default_task)

            if isinstance(task, dict):
                for k, value in task.items():
                    assert isinstance(k, str), "The task dict should have str keys."
                    self._current_task[k] = value

            # Actually change the value of the task attributes in the environment.
            for name, param_value in self._current_task.items():
                assert hasattr(
                    self.env.unwrapped, name
                ), f"the unwrapped environment doesn't have a {name} attribute!"
                setattr(self.env.unwrapped, name, param_value)
        else:
            raise RuntimeError(
                f"don't know how to set task {task}! (tasks must be "
                f"either callables or dicts mapping attributes to "
                f"values. "
            )

    def random_task(self) -> Dict:
        """Samples a random 'task'.

        If the wrapper already has a task schedule, then one of the tasks (values of the
        task schedule dict) is selected at random.

        How the random value for an attribute is sampled depends on the type of
        its default value in the envionment:

        - `int`, `float`, or `np.ndarray` attributes are sampled by multiplying
            the default value by a N(mean=1., std=`self.noise_std`). `int`
            attributes are then rounded to the nearest value.

        - `bool` attributes are sampled randomly from `True` and `False`.

        TODO: It might be cool to give an option for passing a prior that could
        be used for a given attribute, but it would add a bit too much
        complexity and isn't really needed atm.

        Raises:
            NotImplementedError: If the default value has an unsupported type.

        Returns:
            Dict: A dict of the attribute name, and the value that would be set
                for that attribute.
        """
        if self.new_random_task_on_reset:
            return self.np_random.choice(list(self.task_schedule.values()))
        return make_env_attributes_task(
            self,
            task_params=self.default_task,
            rng=self.np_random,
            noise_std=self.noise_std,
        )

    def update_task(self, values: Dict = None, **kwargs):
        """Updates the current task with the params from values or kwargs.

        Important: Use this method to update properties of the current task,
        instead of trying modifying the `current_task` dictionary. For example,
        `env.current_task["length"] = 2.0` will NOT update the length of
        the pole in CartPole, whereas using `env.update_task(length=2.0)` will!

        NOTE: When passing a dictionary, any missing param is kept at its
        current value (not reset to the default value).
        """
        current_task = self.current_task.copy()
        if isinstance(values, dict):
            current_task.update(values)
        elif values is not None:
            raise RuntimeError(f"values can only be a dict or None (received {values}).")
        if kwargs:
            current_task.update(kwargs)
        self.current_task = current_task

    def seed(self, seed: Optional[int] = None) -> List[int]:
        self.np_random = np.random.default_rng(seed)
        self.action_space.seed(seed)
        self.observation_space.seed(seed)
        return self.env.seed(seed)

    def task_dict(self, task_array: np.ndarray) -> Dict[str, float]:
        assert len(task_array) == len(
            self.task_params
        ), "Lengths should match the number of task parameters."
        return dict(zip(self.task_params, task_array))

    @property
    def task_schedule(self) -> Dict:
        return self._task_schedule

    @task_schedule.setter
    def task_schedule(self, value: Dict[str, Any]):
        self._task_schedule = {}
        if 0 not in value:
            self._task_schedule[0] = self.default_task.copy()

        for step, task in sorted(value.items()):
            # Convert any numpy arrays or lists in the task schedule to dicts
            # mapping from attribute name to value to be set.
            if isinstance(task, (list, np.ndarray)):
                task = self.task_dict(task)
            if not (isinstance(task, dict) or callable(task)):
                raise RuntimeError(
                    f"Task schedule can only contain dicts, lists, numpy arrays or"
                    f"callables, but got {task}!"
                )
            self._task_schedule[step] = task

        if self._steps in self._task_schedule:
            self.current_task = self._task_schedule[self._steps]


================================================
FILE: sequoia/common/gym_wrappers/multi_task_environment_test.py
================================================
from typing import Dict, List, Tuple

import gym
import matplotlib.pyplot as plt
import pytest
from gym import spaces
from gym.envs.classic_control import CartPoleEnv
from gym.vector import SyncVectorEnv
from gym.wrappers import TimeLimit

from sequoia.common.gym_wrappers import MultiTaskEnvironment
from sequoia.conftest import atari_py_required, monsterkong_required, param_requires_monsterkong
from sequoia.utils.utils import dict_union

from .multi_task_environment import MultiTaskEnvironment

supported_environments: List[str] = ["CartPole-v0"]


def test_task_schedule():
    original: CartPoleEnv = gym.make("CartPole-v0")
    starting_length = original.length
    starting_gravity = original.gravity

    task_schedule = {
        10: dict(length=0.1),
        20: dict(length=0.2, gravity=-12.0),
        30: dict(gravity=0.9),
    }
    env = MultiTaskEnvironment(original, task_schedule=task_schedule)
    env.seed(123)
    env.reset()
    for step in range(100):
        _, _, done, _ = env.step(env.action_space.sample())
        # env.render()
        if done:
            env.reset()

        if 0 <= step < 10:
            assert env.length == starting_length and env.gravity == starting_gravity
        elif 10 <= step < 20:
            assert env.length == 0.1
        elif 20 <= step < 30:
            assert env.length == 0.2 and env.gravity == -12.0
        elif step >= 30:
            assert env.length == starting_length and env.gravity == 0.9

    env.close()


@pytest.mark.parametrize("environment_name", supported_environments)
def test_multi_task(environment_name: str):
    original = gym.make(environment_name)
    env = MultiTaskEnvironment(original)
    env.reset()
    env.seed(123)
    plt.ion()
    default_task = env.default_task
    for task_id in range(5):
        for i in range(20):
            observation, reward, done, info = env.step(env.action_space.sample())
            # env.render()
        env.reset(new_random_task=True)
        print(f"New task: {env.current_task}")
    env.close()
    plt.ioff()
    plt.close()


@pytest.mark.skip(reason="This generates some output, uncomment this to run it.")
@pytest.mark.parametrize("environment_name", supported_environments)
def test_monitor_env(environment_name):
    original = gym.make(environment_name)
    # original = CartPoleEnv()
    env = MultiTaskEnvironment(original)
    env = gym.wrappers.Monitor(
        env,
        f"recordings/multi_task_{environment_name}",
        force=True,
        write_upon_reset=False,
    )
    env.seed(123)
    env.reset()

    plt.ion()

    task_param_values: List[Dict] = []
    default_length: float = env.length

    for task_id in range(20):
        for i in range(100):
            observation, reward, done, info = env.step(env.action_space.sample())
            # env.render()
            if done:
                env.reset(new_task=False)

            task_param_values.append(env.current_task.copy())
            # env.update_task(length=(i + 1) / 100 * 2 * default_length)
        env.update_task()
        print(f"New task: {env.current_task.copy()}")
    env.close()
    plt.ioff()
    plt.close()


def test_update_task():
    """Test that using update_task changes the given values in the environment
    and in the current_task dict, and that when a value isn't passed to
    update_task, it isn't reset to its default but instead keeps its previous
    value.
    """
    original = gym.make("CartPole-v0")
    env = MultiTaskEnvironment(original)
    env.reset()
    env.seed(123)

    assert env.length == original.length
    env.update_task(length=1.0)
    assert env.current_task["length"] == env.length == 1.0
    env.update_task(gravity=20.0)
    assert env.length == 1.0
    assert env.current_task["gravity"] == env.gravity == 20.0
    env.close()


def test_add_task_dict_to_info():
    """Test that the 'info' dict contains the task dict."""
    original: CartPoleEnv = gym.make("CartPole-v0")
    starting_length = original.length
    starting_gravity = original.gravity

    task_schedule = {
        10: dict(length=0.1),
        20: dict(length=0.2, gravity=-12.0),
        30: dict(gravity=0.9),
    }
    env = MultiTaskEnvironment(
        original,
        task_schedule=task_schedule,
        add_task_dict_to_info=True,
    )
    env.seed(123)
    env.reset()
    for step in range(100):
        _, _, done, info = env.step(env.action_space.sample())
        # env.render()
        if done:
            env.reset()

        if 0 <= step < 10:
            assert env.length == starting_length and env.gravity == starting_gravity
            assert info == env.default_task
        elif 10 <= step < 20:
            assert env.length == 0.1
            assert info == dict_union(env.default_task, task_schedule[10])
        elif 20 <= step < 30:
            assert env.length == 0.2 and env.gravity == -12.0
            assert info == dict_union(env.default_task, task_schedule[20])
        elif step >= 30:
            assert env.length == starting_length and env.gravity == 0.9
            assert info == dict_union(env.default_task, task_schedule[30])

    env.close()


def test_add_task_id_to_obs():
    """Test that the 'info' dict contains the task dict."""
    original: CartPoleEnv = gym.make("CartPole-v0")
    starting_length = original.length
    starting_gravity = original.gravity

    task_schedule = {
        10: dict(length=0.1),
        20: dict(length=0.2, gravity=-12.0),
        30: dict(gravity=0.9),
    }
    env = MultiTaskEnvironment(
        original,
        task_schedule=task_schedule,
        add_task_id_to_obs=True,
    )
    env.seed(123)
    env.reset()

    assert env.observation_space == spaces.Dict(
        x=original.observation_space,
        task_labels=spaces.Discrete(4),
    )

    for step in range(100):
        obs, _, done, info = env.step(env.action_space.sample())
        # env.render()

        x, task_id = obs["x"], obs["task_labels"]

        if 0 <= step < 10:
            assert env.length == starting_length and env.gravity == starting_gravity
            assert task_id == 0, step

        elif 10 <= step < 20:
            assert env.length == 0.1
            assert task_id == 1, step

        elif 20 <= step < 30:
            assert env.length == 0.2 and env.gravity == -12.0
            assert task_id == 2, step

        elif step >= 30:
            assert env.length == starting_length and env.gravity == 0.9
            assert task_id == 3, step

        if done:
            obs = env.reset()
            assert isinstance(obs, dict)

    env.close()


def test_starting_step_and_max_step():
    """Test that when start_step and max_step arg given, the env stays within
    the [start_step, max_step] portion of the task schedule.
    """
    original: CartPoleEnv = gym.make("CartPole-v0")
    starting_length = original.length
    starting_gravity = original.gravity

    task_schedule = {
        10: dict(length=0.1),
        20: dict(length=0.2, gravity=-12.0),
        30: dict(gravity=0.9),
    }
    env = MultiTaskEnvironment(
        original,
        task_schedule=task_schedule,
        add_task_id_to_obs=True,
        starting_step=10,
        max_steps=19,
    )
    env.seed(123)
    env.reset()

    assert env.observation_space == spaces.Dict(
        x=original.observation_space,
        task_labels=spaces.Discrete(4),
    )

    # Trying to set the 'steps' to something smaller than the starting step
    # doesn't work.
    env.steps = -123
    assert env.steps == 10

    # Trying to set the 'steps' to something greater than the max_steps
    # doesn't work.
    env.steps = 50
    assert env.steps == 19

    # Here we reset the steps to 10, and also check that this works.
    env.steps = 10
    assert env.steps == 10

    for step in range(0, 100):
        # The environment started at an offset of 10.
        assert env.steps == max(min(step + 10, 19), 10)

        obs, _, done, info = env.step(env.action_space.sample())
        # env.render()

        x, task_id = obs["x"], obs["task_labels"]

        # Check that we're always stuck between 10 and 20
        assert 10 <= env.steps < 20
        assert env.length == 0.1
        assert task_id == 1, step

        if done:
            print(f"Resetting on step {step}")
            obs = env.reset()
            assert isinstance(obs, dict)

    env.close()


@atari_py_required
def test_task_id_is_added_even_when_no_known_task_schedule():
    """Test that even when the env is unknown or there are no task params, the
    task_id is still added correctly and is zero at all times.
    """
    # Breakout doesn't have default task params.
    original: CartPoleEnv = gym.make("ALE/Breakout-v5")
    env = MultiTaskEnvironment(
        original,
        add_task_id_to_obs=True,
    )
    env.seed(123)
    env.reset()

    assert env.observation_space == spaces.Dict(
        x=original.observation_space,
        task_labels=spaces.Discrete(1),
    )
    for step in range(0, 100):
        obs, _, done, info = env.step(env.action_space.sample())
        # env.render()

        x, task_id = obs["x"], obs["task_labels"]
        assert task_id == 0

        if done:
            x, task_id = env.reset()
            assert task_id == 0
    env.close()


@monsterkong_required
def test_task_schedule_monsterkong():
    env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1")
    from gym.wrappers import TimeLimit

    env = TimeLimit(env, max_episode_steps=10)
    env = MultiTaskEnvironment(
        env,
        task_schedule={
            0: {"level": 0},
            100: {"level": 1},
            200: {"level": 2},
            300: {"level": 3},
            400: {"level": 4},
        },
        add_task_id_to_obs=True,
    )
    obs = env.reset()

    img, task_labels = obs["x"], obs["task_labels"]
    assert task_labels == 0
    assert env.get_level() == 0

    for i in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
        assert obs["task_labels"] == i // 100
        assert env.level == i // 100
        env.render()
        assert isinstance(done, bool)
        if done:
            print(f"End of episode at step {i}")
            obs = env.reset()

    assert obs["task_labels"] == 4
    assert env.level == 4
    # level stays the same even after reaching that objective.
    for i in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
        assert obs["task_labels"] == 4
        assert env.level == 4
        env.render()
        if done:
            print(f"End of episode at step {i}")
            obs = env.reset()

    env.close()


@monsterkong_required
def test_task_schedule_with_callables():
    """Apply functions to the env at a given step."""
    env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1")
    from gym.wrappers import TimeLimit

    env = TimeLimit(env, max_episode_steps=10)

    from operator import methodcaller

    env = MultiTaskEnvironment(
        env,
        task_schedule={
            0: methodcaller("set_level", 0),
            100: methodcaller("set_level", 1),
            200: methodcaller("set_level", 2),
            300: methodcaller("set_level", 3),
            400: methodcaller("set_level", 4),
        },
        add_task_id_to_obs=True,
    )
    obs = env.reset()

    # img, task_labels = obs
    assert obs["task_labels"] == 0
    assert env.get_level() == 0

    for i in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
        assert obs["task_labels"] == i // 100
        assert env.level == i // 100
        env.render()
        assert isinstance(done, bool)
        if done:
            print(f"End of episode at step {i}")
            obs = env.reset()

    assert obs["task_labels"] == 4
    assert env.level == 4
    # level stays the same even after reaching that objective.
    for i in range(500):
        obs, reward, done, info = env.step(env.action_space.sample())
        assert obs["task_labels"] == 4
        assert env.level == 4
        env.render()
        if done:
            print(f"End of episode at step {i}")
            obs = env.reset()


@monsterkong_required
def test_random_task_on_each_episode():
    env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1")
    from gym.wrappers import TimeLimit

    env = TimeLimit(env, max_episode_steps=10)
    env = MultiTaskEnvironment(
        env,
        task_schedule={
            0: {"level": 0},
            5: {"level": 1},
            200: {"level": 2},
            300: {"level": 3},
            400: {"level": 4},
        },
        add_task_id_to_obs=True,
        new_random_task_on_reset=True,
    )
    task_labels = []
    for i in range(10):
        obs = env.reset()
        task_labels.append(obs["task_labels"])
    assert len(set(task_labels)) > 1

    # Episodes only last 10 steps. Tasks don't have anything to do with the task
    # schedule.
    obs = env.reset()
    start_task_label = obs["task_labels"]
    for i in range(10):
        obs, reward, done, info = env.step(env.action_space.sample())
        assert obs["task_labels"] == start_task_label
        if i == 9:
            assert done
        else:
            assert not done

    env.close()


from sequoia.conftest import monsterkong_required


def test_random_task_on_each_episode_and_only_one_task_in_schedule():
    """BUG: When the goal is to have only one task, it instead keeps sampling a new
    task from the 'distribution', in the case of cartpole!
    """
    env: MetaMonsterKongEnv = gym.make("CartPole-v1")
    from gym.wrappers import TimeLimit

    env = TimeLimit(env, max_episode_steps=10)
    env = MultiTaskEnvironment(
        env,
        task_schedule={
            0: {"length": 0.1},
        },
        add_task_id_to_obs=True,
        new_random_task_on_reset=True,
    )
    task_labels = []
    lengths = []
    for i in range(10):
        obs = env.reset()
        task_labels.append(obs["task_labels"])
        lengths.append(env.length)
        done = False
        while not done:
            obs, reward, done, info = env.step(env.action_space.sample())
            task_labels.append(obs["task_labels"])
            lengths.append(env.length)

    assert set(task_labels) == {0}
    assert set(lengths) == {0.1}


def env_fn_monsterkong() -> gym.Env:
    env = gym.make("MetaMonsterKong-v0")
    env = TimeLimit(env, max_episode_steps=10)
    env = MultiTaskEnvironment(
        env,
        task_schedule={
            0: {"level": 1},
            100: {"level": 2},
            200: {"level": 3},
            300: {"level": 4},
            400: {"level": 5},
        },
        add_task_id_to_obs=True,
        new_random_task_on_reset=True,
    )
    return env


def env_fn_cartpole() -> gym.Env:
    env = gym.make("CartPole-v0")
    env = TimeLimit(env, max_episode_steps=10)
    env = MultiTaskEnvironment(
        env,
        task_schedule={
            0: {"length": 0.1},
            100: {"length": 0.2},
            200: {"length": 0.3},
            300: {"length": 0.4},
            400: {"length": 0.5},
        },
        add_task_id_to_obs=True,
        new_random_task_on_reset=True,
    )
    return env


@pytest.mark.parametrize("env_id", ["cartpole", param_requires_monsterkong("monsterkong")])
def test_task_sequence_is_reproducible(env_id: str):
    """Test that the multi-task setup is seeded correctly, i.e. that the task sequence
    is reproducible given the same seed.
    """
    if env_id == "cartpole":
        env_fn = env_fn_cartpole
    elif env_id == "monsterkong":
        env_fn = env_fn_monsterkong
    else:
        assert False, f"just testing on cartpole and monsterkong for now, but got env {env_id}"

    first_results: List[Tuple[int, int]] = []
    n_runs = 5
    n_episodes_per_run = 10

    for run_number in range(n_runs):
        print(f"starting run {run_number} / {n_runs}")
        # For each 'run', we record the task sequence and how long each task lasted for.
        # Then, we want to check that each run was indentical, for a given seed.
        env = env_fn()
        env.seed(123)

        task_ids: List[int] = []
        task_lengths: List[int] = []
        for episode in range(n_episodes_per_run):
            print(f"Episode {episode} / {n_episodes_per_run}")
            obs = env.reset()
            task_id: int = obs["task_labels"]
            task_length = 0
            done = False
            while not done:
                obs, _, done, _ = env.step(env.action_space.sample())
                task_length += 1
            task_ids.append(task_id)
            task_lengths.append(task_length)

        task_ids_and_lengths = list(zip(task_ids, task_lengths))
        print(f"Task ids and length of each one: {task_ids_and_lengths}")

        assert len(set(task_ids)) > 1, "should have been more than just one task!"

        if not first_results:
            first_results = task_ids_and_lengths
        else:
            # Make sure that the results from this run are equivalent to the others with
            # the same seed:
            assert task_ids_and_lengths == first_results


from sequoia.common.gym_wrappers import EnvDataset
from sequoia.utils.utils import unique_consecutive_with_index


def test_iteration():
    nb_tasks = 5
    steps_per_task = 10
    task_schedule = task_schedule = {
        i * steps_per_task: dict(length=0.1 + i * 0.2) for i in range(5)
    }
    env = gym.make("CartPole-v0")
    env = MultiTaskEnvironment(env, task_schedule=task_schedule)
    env = TimeLimit(env, max_episode_steps=14)
    env = EnvDataset(env)
    lengths = []
    total_steps = 0
    for episode in range(10):
        for step, obs in enumerate(env):
            # print(total_steps, episode, step, obs, env.length)
            lengths.append(env.length)
            rewards = env.send(env.action_space.sample())
            total_steps += 1

        if total_steps > 100:
            break

    actual_task_schedule = dict(unique_consecutive_with_index(lengths))
    # NOTE: The keys won't necessarily be the same, since episodes might be shorter
    # than `n_steps_per_task`.
    length_schedule = {k: v["length"] for k, v in task_schedule.items()}
    assert list(actual_task_schedule.values()) == list(length_schedule.values())
    # assert False, actual_task_schedule


================================================
FILE: sequoia/common/gym_wrappers/observation_limit.py
================================================
""" IDEA: same as EpisodeLimit, for for the number of total observations.
"""

import gym
from gym.error import ClosedEnvironmentError

from sequoia.utils import get_logger

from .utils import IterableWrapper

logger = get_logger(__name__)


class ObservationLimit(IterableWrapper):
    """Closes the env when `max_steps` steps have been performed *in total*.

    For vectorized environments, each step consumes up to `num_envs` from this
    total budget, i.e. the step counter is incremented by the batch size at
    each step.
    """

    def __init__(self, env: gym.Env, max_steps: int):
        super().__init__(env=env)
        self._max_obs = max_steps
        self._obs_counter: int = 0
        self._initial_reset = False
        self._is_closed: bool = False

    def reset(self):
        if self._is_closed:
            if self._obs_counter >= self._max_obs:
                raise ClosedEnvironmentError(
                    f"Env reached max number of observations ({self._max_obs})"
                )
            raise ClosedEnvironmentError("Can't step through closed env.")

        # Resetting actually gives you an observation, so we count it here.
        self._obs_counter += self.env.num_envs if self.is_vectorized else 1
        logger.debug(f"(observation {self._obs_counter}/{self._max_obs})")

        obs = self.env.reset()

        if self._obs_counter >= self._max_obs:
            self.close()

        return obs

    @property
    def is_closed(self) -> bool:
        return self._is_closed

    def step(self, action):
        if self._is_closed:
            if self._obs_counter >= self._max_obs:
                raise ClosedEnvironmentError(
                    f"Env reached max number of observations ({self._max_obs})"
                )
            raise ClosedEnvironmentError("Can't step through closed env.")

        obs, reward, done, info = self.env.step(action)

        self._obs_counter += self.env.num_envs if self.is_vectorized else 1
        logger.debug(f"(observation {self._obs_counter}/{self._max_obs})")

        # BUG: If we dont use >=, then iteration with EnvDataset doesn't work.
        if self._obs_counter >= self._max_obs:
            self.close()

        return obs, reward, done, info

    def close(self):
        self.env.close()
        self._is_closed = True


================================================
FILE: sequoia/common/gym_wrappers/observation_limit_test.py
================================================
from functools import partial

import gym
import pytest
from gym.vector import SyncVectorEnv

from sequoia.conftest import DummyEnvironment

from .env_dataset import EnvDataset
from .observation_limit import ObservationLimit


@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_step_limit_with_single_env(env_name: str):
    """Env should close when a given number of observations have been produced"""
    env = gym.make(env_name)
    env = ObservationLimit(env, max_steps=5)
    env.seed(123)

    done = False
    # First episode.
    obs = env.reset()
    obs, reward, done, info = env.step(env.action_space.sample())
    obs, reward, done, info = env.step(env.action_space.sample())
    obs = env.reset()
    obs, reward, done, info = env.step(env.action_space.sample())
    assert env.is_closed

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.step(env.action_space.sample())


@pytest.mark.xfail(
    reason="TODO: Fix the bugs in the interaction between " "EnvDataset and ObservationLimit."
)
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_step_limit_with_single_env_dataset(env_name: str):
    env = gym.make(env_name)
    start = 0
    target = 10
    env = DummyEnvironment(start=start, target=target, max_value=10 * 2)
    env = EnvDataset(env)

    max_steps = 5

    env = ObservationLimit(env, max_steps=max_steps)
    env.seed(123)
    values = []
    for i, obs in zip(range(100), env):
        values.append(obs)
        _ = env.send(1)
    assert values == list(range(start, max_steps))

    assert env.is_closed

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.step(env.action_space.sample())

    with pytest.raises(gym.error.ClosedEnvironmentError):
        for i, _ in zip(range(5), env):
            assert False


@pytest.mark.parametrize("batch_size", [3, 5])
def test_step_limit_with_vectorized_env(batch_size):
    start = 0
    target = 10
    starting_values = [start for i in range(batch_size)]
    targets = [target for i in range(batch_size)]

    env = SyncVectorEnv(
        [
            partial(DummyEnvironment, start=start, target=target, max_value=target * 2)
            for start, target in zip(starting_values, targets)
        ]
    )
    env = ObservationLimit(env, max_steps=3 * batch_size)

    obs = env.reset()
    obs, reward, done, info = env.step(env.action_space.sample())
    # obs, reward, done, info = env.step(env.action_space.sample())
    obs = env.reset()
    assert env.is_closed

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        _ = env.step(env.action_space.sample())


@pytest.mark.parametrize("batch_size", [3, 5])
def test_step_limit_with_vectorized_env_partial_final_batch(batch_size):
    """In the case where the batch size isn't a multiple of the max
    observations, the env returns ceil(max_obs / batch_size) * batch_size
    observations in total.

    TODO: If we ever get to few-shot learning or something like that, we might
    have to care about this.
    """
    start = 0
    target = 10
    starting_values = [start for i in range(batch_size)]
    targets = [target for i in range(batch_size)]

    env = SyncVectorEnv(
        [
            partial(DummyEnvironment, start=start, target=target, max_value=target * 2)
            for start, target in zip(starting_values, targets)
        ]
    )
    env = ObservationLimit(env, max_steps=3 * batch_size + 1)

    obs = env.reset()
    assert not env.is_closed

    obs, reward, done, info = env.step(env.action_space.sample())
    obs, reward, done, info = env.step(env.action_space.sample())
    assert not env.is_closed

    # obs, reward, done, info = env.step(env.action_space.sample())
    obs = env.reset()
    assert env.is_closed

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        _ = env.step(env.action_space.sample())


================================================
FILE: sequoia/common/gym_wrappers/pixel_observation.py
================================================
""" Fixes some of the annoying things about the PixelObservationWrapper. """
from typing import Union

import gym
import numpy as np
from gym.wrappers.pixel_observation import PixelObservationWrapper as PixelObservationWrapper_

from sequoia.common.spaces.image import Image

from .utils import IterableWrapper


class PixelObservationWrapper(PixelObservationWrapper_):
    """Less annoying version of gym's `PixelObservationWrapper`:

    - Resets the environment before calling the constructor (fixes crash).
    - Makes the popup window non-visible when rendering with mode="rgb_array".
    - State is always pixels instead of dict with pixels at key 'pixels'
        - TODO: What if we wanted to also have access to the state? We might
          have to revert this change at some point.
    - `reset()` returns the pixels.
    """

    def __init__(self, env: Union[str, gym.Env]):
        if isinstance(env, str):
            env = gym.make(env)
        env.reset()
        super().__init__(env)
        pixel_space = self.observation_space["pixels"]
        self.observation_space = Image.from_box(pixel_space)
        from gym.envs.classic_control.rendering import Viewer

        self.viewer: Viewer
        if self.env.viewer is None:
            self.env.render(mode="rgb_array")

        if self.env.viewer is not None:
            self.viewer: Viewer = env.viewer
            self.viewer.window.set_visible(False)

    def step(self, *args, **kwargs):
        state, reward, done, info = super().step(*args, **kwargs)
        state = state["pixels"]
        state = self.to_array(state)
        return state, reward, done, info

    def reset(self, *args, **kwargs):
        self.state = super().reset()["pixels"]
        self.state = self.to_array(self.state)
        return self.state

    def render(self, mode: str = "human", **kwargs):
        if mode == "human" and self.viewer and not self.viewer.window.visible:
            self.viewer.window.set_visible(True)
        return super().render(mode=mode, **kwargs)

    def to_array(self, image) -> np.ndarray:
        if not isinstance(image, np.ndarray):
            # TODO: There is something weird happening here, something to do
            # with the image having a negative stride dimension or something
            # like that. Also, ideally, we would return a numpy array (without
            # depending on pytorch here)
            from sequoia.common.transforms.to_tensor import to_tensor

            return to_tensor(image)
            return np.array(image.copy())
        return image


class ImageObservations(IterableWrapper):
    def __init__(self, env: gym.Env):
        super().__init__(env=env)
        self.observation_space = Image.wrap(self.env.observation_space)


================================================
FILE: sequoia/common/gym_wrappers/pixel_observation_test.py
================================================
import gym
import numpy as np
import pytest

from .pixel_observation import PixelObservationWrapper

pyglet = pytest.importorskip("pyglet")


def test_passing_string_to_constructor():
    env = PixelObservationWrapper("CartPole-v0")
    assert env.observation_space.shape == (400, 600, 3)


def test_observation_space():
    env = PixelObservationWrapper(gym.make("CartPole-v0"))
    assert env.observation_space.shape == (400, 600, 3)


def test_reset_gives_pixels():
    with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
        start_state = env.reset()
        assert start_state.shape == (400, 600, 3)
        assert start_state.dtype == np.uint8


def test_step_obs_is_pixels():
    with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
        env.reset()
        obs, _, _, _ = env.step(env.action_space.sample())
        assert obs.shape == (400, 600, 3)
        assert obs.dtype == np.uint8


def test_state_attribute_is_pixels():
    with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
        env.reset()
        assert env.state.shape == (400, 600, 3)
        assert env.state.dtype == np.uint8


def test_render_rgb_array():
    with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
        window = env.viewer.window
        for i in range(50):
            obs, _, done, _ = env.step(env.action_space.sample())
            state = env.render(mode="rgb_array")
            assert state.shape == (400, 600, 3)
            assert state.dtype == np.uint8
            if done:
                env.reset()


def test_render_with_human_mode():
    with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
        window = env.viewer.window
        for i in range(50):
            obs, _, done, _ = env.step(env.action_space.sample())
            env.render(mode="human")
            assert obs.shape == (400, 600, 3)
            if done:
                env.reset()
        assert env.viewer.window is window


def test_render_with_human_mode_with_env_dataset():
    from .env_dataset import EnvDataset

    with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
        env = EnvDataset(env)
        window = env.viewer.window
        obs = env.reset()

        for i, batch in zip(range(500), env):
            obs = batch
            env.render(mode="human")
            assert obs.shape == (400, 600, 3)
            action = env.action_space.sample()
            rewards = env.send(action)
        assert env.viewer.window is window


================================================
FILE: sequoia/common/gym_wrappers/policy_env.py
================================================
"""TODO: Idea: create a wrapper that accepts a 'policy' which will decide an
action to take whenever the `action` argument to the `step` method is None.

This policy should then accept the 'state' or something like that.
"""
from dataclasses import dataclass
from typing import Any, Callable, Dict, Generic, Iterable, Iterator, Optional, Tuple, TypeVar

import gym
from torch.utils.data import IterableDataset

from sequoia.common.batch import Batch
from sequoia.utils.logging_utils import get_logger

from .utils import StepResult

logger = get_logger(__name__)
# from sequoia.settings.base.environment import Environment
# from sequoia.settings.base.objects import (ActionType, ObservationType, RewardType)
ObservationType = TypeVar("ObservationType")
ActionType = TypeVar("ActionType")
RewardType = TypeVar("RewardType")

# Just for type hinting purposes.


class Environment(gym.Env, Generic[ObservationType, ActionType, RewardType]):
    def step(self, action: ActionType) -> Tuple[ObservationType, RewardType, bool, Dict]:
        raise NotImplementedError

    def reset(self) -> ObservationType:
        raise NotImplementedError


DatasetItem = TypeVar("DatasetItem")

# Type annotation for functions that will create the items of the
# IterableDataset below, given the current 'Context',
DatasetItemCreator = Callable[
    [
        ObservationType,  # 'current' state
        ActionType,  # actions applied on the 'current' state
        ObservationType,  # resulting 'next' state
        RewardType,  # rewards associated with the transition above
        bool,  # Wether the 'next' state is final (i.e. the last in an episode)
        Dict,  # the 'info' dict associated with the 'next' state (from Env.step)
    ],
    DatasetItem,
]


@dataclass(frozen=True)
class StateTransition(Batch, Generic[ObservationType, ActionType]):
    observation: ObservationType
    action: ActionType
    next_observation: ObservationType

    # IDEA: Instead of creating extra properties like this, we could have fields
    # like 'field(aliases="bob")', and getattr and setattr would get/set the
    # corresponding attribute when an alias is used instead of the actual name.
    @property
    def state(self) -> ObservationType:
        return self.observation

    @property
    def next_state(self) -> ObservationType:
        return self.next_observation


# By default, the PolicyEnv will yield this kind of item:
DefaultDatasetItem = Tuple[StateTransition, RewardType]


def default_dataset_item_creator(
    observations: ObservationType,
    actions: ActionType,
    next_observations: ObservationType,
    rewards: RewardType,
    done: bool,
    info: Dict = None,
) -> DefaultDatasetItem:
    """Create an item of the IterableDataset below, given the current 'context'.

    Parameters
    ----------
    observations : Observations
        The 'starting' observations/state.
    actions : Actions
        The actions that were taken in the 'starting' state.
    next_observations : Observations
        The resulting observations in the 'end' state.
    rewards : Rewards
        The reward associated with that state transition and action.
    done : bool
        Wether the 'end' observations/state are the last of an episode.
    info : Dict, optional
        Info dict associated with the 'next' observation, by default None.

    Returns
    -------
    Tuple[StateTransition, Rewards]
        A Tuple of the form
        `Tuple[Tuple[Observations, Actions, Observations], Rewards]`.

    NOTE: `done` and `info` aren't used here, but you could use them in your own
    version of this function that you'd then pass to the PolicyEnv constructor
    or to the `set_policy` method.
    """
    state_transition = StateTransition(observations, actions, next_observations)
    return state_transition, rewards


class PolicyEnv(gym.Wrapper, IterableDataset, Iterable[DatasetItem]):
    """Wrapper for an environment that adds the following capabilities:
    1. Makes it possible to call step(None), in which case the policy will be
       used to determine the action to take given the current observation and
       the action space.
    2. Creates an 'IterableDataset' from the env, where one iteration over the
       dataset is equivalent to one episode/trajectory in the environment.

       The types of items yielded by this iterator can be customized by passing
       a different callable to `make_dataset_item`.
       The default items are of type `Tuple[StateTransition, Rewards]`, where
       `StateTransition` is a tuple-like object of the form
       `Tuple<observations, actions, next_observations>`.
    """

    def __init__(
        self,
        env: Environment[ObservationType, ActionType, RewardType],
        policy: Optional[Callable[[Tuple], Any]] = None,
        make_dataset_item: DatasetItemCreator = default_dataset_item_creator,
    ):
        super().__init__(env)
        self.make_dataset_item = make_dataset_item
        self.policy = policy
        self._step_result: Optional[StepResult] = None
        self._closed = False
        self._reset = False
        self._n_episodes: int = 0
        self._n_steps: int = 0
        self._n_steps_in_episode: int = 0
        self._observation: Optional[Observations] = None
        self._action: Optional[Actions] = None

    def set_policy(self, policy: Callable[[ObservationType, gym.Space], ActionType]) -> None:
        """Sets a new policy to be used to generate missing actions."""
        self.policy = policy

    def step(self, action: Optional[Any] = None) -> StepResult:
        if action is None:
            if self.policy is None:
                raise RuntimeError("Need to have a policy set, since action is None.")
            if self._observation is None:
                raise RuntimeError("Reset should have been called before calling step")
            # Get the 'filler' action using the current policy.
            action = self.policy(self._observation, self.action_space)
            if action not in self.action_space:
                raise RuntimeError(
                    f"The policy returned an action which isn't " f"in the action space: {action}"
                )
        step_result = StepResult(*self.env.step(action))
        self._observation = step_result[0]
        self._n_steps += 1
        self._n_steps_in_episode += 1
        return step_result

    def close(self) -> None:
        self.env.close()
        self._reset = False
        self._closed = True
        self._observation = None

    def reset(self, *args, **kwargs) -> None:
        self._observation = self.env.reset(*args, **kwargs)
        self._reset = True
        self._n_steps_in_episode = 0
        return self._observation

    def __iter__(self) -> Iterator[DatasetItem]:
        """Iterator for an episode/trajectory in the env.

        This uses the policy to iteratively perform an episode in the env, and
        yields items at each step, which are the result of the
        `make_dataset_item` function. By default, these items are of the form
        `Tuple<Tuple<observations, actions, next_observation>, rewards>`.

        Returns
        -------
        Iterable[DatasetItem]
            Iterable for a 'trajectory' in the env.

        Yields
        -------
        DatasetItem
            The result of `make_dataset_item(current_context)`, by default a
            tuple of <StateTransition, RewardType>.

        Raises
        ------
        RuntimeError
            If no policy is set.
        """
        if not self.policy:
            raise RuntimeError("Need to have a policy set in order to iterate " "on this env.")

        if not self._reset:
            # Reset the env, if needed.
            previous_observations = self.reset()
        else:
            # The env was just reset, so the observation was set to
            # self._observation.
            assert self._observation is not None
            previous_observations = self._observation

        logger.debug(f"Start of episode {self._n_episodes}")

        done = False
        while not done:
            logger.debug(f"steps (episode): {self._n_steps_in_episode}, total: {self._n_steps}")
            # Get the batch of actions using the policy.
            actions = self.policy(previous_observations, self.action_space)

            observations, rewards, done, info = self.step(actions)

            # TODO: Need to figure out what to yield here..
            yield self.make_dataset_item(
                observations=previous_observations,
                actions=actions,
                next_observations=observations,
                rewards=rewards,
                done=done,
                info=info,
            )
            # Update the 'previous' observation.
            previous_observations = observations

            if not isinstance(done, bool):
                if any(done):
                    raise RuntimeError(
                        "done should either be a bool or always false, since "
                        "we can't do partial resets."
                    )
                done = False

            self._n_episodes += 1

        logger.debug(f"Episode has ended.")
        self._reset = False


================================================
FILE: sequoia/common/gym_wrappers/policy_env_test.py
================================================
from typing import List

from sequoia.conftest import DummyEnvironment

from .policy_env import PolicyEnv, StateTransition


def test_iterating_with_policy():
    env = DummyEnvironment()
    env = PolicyEnv(env)
    env.seed(123)

    actions = [0, 1, 1, 2, 1, 1, 1, 1]
    expected_obs = [0, 0, 1, 2, 1, 2, 3, 4, 5]
    expected_rewards = [5, 4, 3, 4, 3, 2, 1, 0]
    expected_dones = [False, False, False, False, False, False, False, True]

    # Expect the transitions to have this form.
    expected_transitions = list(zip(expected_obs[0:], actions[0:], expected_obs[1:]))

    reset_obs = 0
    # obs = env.reset()
    # assert obs == reset_obs

    n_calls = 0

    def custom_policy(observations, action_space):
        # Deteministic policy used for testing purposes.
        nonlocal n_calls
        action = actions[n_calls]
        n_calls += 1
        return action

    n_expected_transitions = len(actions)
    env.set_policy(custom_policy)
    actual_transitions: List[StateTransition] = []

    i = 0
    for i, batch in enumerate(env):
        print(f"Step {i}: batch: {batch}")
        state_transition, reward = batch
        actual_transitions.append(state_transition)

        observation, action, next_observation = state_transition.as_tuple()

        assert observation == expected_obs[i]
        assert next_observation == expected_obs[i + 1]
        assert action == actions[i]
        assert reward == expected_rewards[i]

    assert i == n_expected_transitions - 1
    assert len(actual_transitions) == n_expected_transitions
    assert [v.as_tuple() for v in actual_transitions] == expected_transitions


================================================
FILE: sequoia/common/gym_wrappers/smooth_environment.py
================================================
"""TODO: A Wrapper that creates smooth transitions between tasks.
Could be based on the MultiTaskEnvironment, but with a moving average update of
the task, rather than setting a brand new random task.

There could also be some kind of 'task_duration' parameter, and the model does
linear or smoothed-out transitions between them depending on the step number?
"""
from typing import Any, Callable, Dict, List, Optional, Union

import gym
import numpy as np
from gym import spaces

from sequoia.common.spaces.sparse import Sparse
from sequoia.utils.logging_utils import get_logger

from .multi_task_environment import MultiTaskEnvironment, add_task_labels

logger = get_logger(__name__)


## TODO (@lebrice): Really cool idea!: Create a TaskSchedule class that inherits
# from Dict and when you __getitem__ a missing key, returns an interpolation!


class SmoothTransitions(MultiTaskEnvironment):
    """Extends MultiTaskEnvironment to support smooth task boudaries.

    Same as `MultiTaskEnvironment`, but when in between two tasks, the
    environment will have its values set to a linear interpolation of the
    attributes from the two neighbouring tasks.
    ```
    env = gym.make("CartPole-v0")
    env = SmoothTransitions(env, task_schedule={
        10: dict(length=1.0),
        20: dict(length=2.0),
    })
    env.seed(123)
    env.reset()
    ```

    At step 0, the length is the default value (0.5)
    at step 1, the length is 0.5 + (1 / 10) * (1.0-0.5) = 0.55
    at step 2, the length is 0.5 + (2 / 10) * (1.0-0.5) = 0.60,
    etc.

    NOTE: This only works with float attributes at the moment.

    """

    def __init__(
        self,
        env: gym.Env,
        task_schedule: Dict[int, Dict[str, float]] = None,
        task_params: List[str] = None,
        noise_std: float = 0.2,
        add_task_dict_to_info: bool = False,
        add_task_id_to_obs: bool = False,
        new_random_task_on_reset: bool = False,
        starting_step: int = 0,
        nb_tasks: int = None,
        max_steps: int = None,
        seed: int = None,
        only_update_on_episode_end: bool = False,
    ):
        """Wraps the environment, allowing for smooth task transitions.

        Same as `MultiTaskEnvironment`, but when in between two tasks, the
        environment will have its values set to a linear interpolation of the
        attributes from the two neighbouring tasks.


        TODO: Should we update the task paramers only on resets? or at each
        step? Might save a little bit of compute to only do it on resets, but
        then it's not exactly as 'smooth' as we would like it to be, especially
        if a single episode can be very long!

        NOTE: Assumes that the attributes are floats for now.

        Args:
            env (gym.Env): The gym environment to wrap.
            task_schedule (Dict[int, Dict[str, float]], optional) (Same as
                `MultiTaskEnvironment`): Dict mapping from a given step
                to the attributes to be set at that time. Interpolations
                between the two neighbouring tasks will be used between task
                transitions.
            only_update_on_episode_end (bool, optional): When `False` (default),
                update the attributes of the environment smoothly after each
                step. When `True`, only update at the end of episodes (when
                `reset()` is called).
        """
        if task_schedule:
            if not all(isinstance(value, dict) for value in task_schedule.values()):
                raise RuntimeError("Task schedule values should be dicts of attributes to change.")
            task_params = list(
                set().union(*[task_dict.keys() for task_dict in task_schedule.values()])
            )
        elif not task_params:
            raise RuntimeError(
                "This wrapper needs either a `task_schedule` or `task_params` (the environment "
                "attributes to modify)"
            )

        super().__init__(
            env,
            task_schedule=task_schedule,
            task_params=task_params,
            noise_std=noise_std,
            add_task_dict_to_info=add_task_dict_to_info,
            add_task_id_to_obs=add_task_id_to_obs,
            new_random_task_on_reset=new_random_task_on_reset,
            starting_step=starting_step,
            nb_tasks=nb_tasks,
            max_steps=max_steps,
            seed=seed,
        )
        self.only_update_on_episode_end = only_update_on_episode_end
        if self._max_steps is None and len(self.task_schedule) > 1:
            # TODO: DO we want to prevent going past the 'task step' in the task schedule?
            pass

        if isinstance(self.env.unwrapped, gym.vector.VectorEnv):
            raise NotImplementedError(
                "This isn't really supposed to be applied on top of a "
                "vectorized environment, rather, it should be used within each"
                " individual env."
            )

        if self.add_task_id_to_obs:
            nb_tasks = nb_tasks if nb_tasks is not None else len(self.task_schedule)
            self.observation_space = add_task_labels(
                self.env.observation_space,
                Sparse(spaces.Discrete(n=nb_tasks), sparsity=1.0),
            )

    def step(self, *args, **kwargs):
        if not self.only_update_on_episode_end:
            self.smooth_update()
        results = super().step(*args, **kwargs)
        return results

    def reset(self, **kwargs):
        # TODO: test this out.
        if self.only_update_on_episode_end:
            self.smooth_update()
        return super().reset(**kwargs)

    @property
    def current_task_id(self) -> Optional[int]:
        """Returns the 'index' of the current task within the task schedule.

        In this case, we return None, since there aren't clear task boundaries.
        """
        return None

    def task_array(self, task: Dict[str, float]) -> np.ndarray:
        return np.array([task.get(k, self.default_task[k]) for k in self.task_params])

    def smooth_update(self) -> None:
        """Update the curren_task at every step, based on a smooth mix of the
        previous and the next task. Every time we reach a _step that is in the
        task schedule, we update the 'prev_task_step' and 'next_task_step'
        attributes.
        """

        current_task: Dict[str, float] = {}
        for attr in self.task_params:
            steps: List[int] = []
            # list of the
            fixed_points: List[float] = []
            for step, task in sorted(self.task_schedule.items()):
                steps.append(step)
                fixed_points.append(task.get(attr, self.default_task[attr]))
            # logger.debug(f"{attr}: steps={steps}, fp={fixed_points}")
            interpolated_value: float = np.interp(
                x=self.steps,
                xp=steps,
                fp=fixed_points,
            )
            current_task[attr] = interpolated_value
            # logger.debug(f"interpolated value of {attr} at step {self.step}: {interpolated_value}")
        # logger.debug(f"Updating task at step {self.step}: {current_task}")
        self.current_task = current_task


================================================
FILE: sequoia/common/gym_wrappers/smooth_environment_test.py
================================================
from typing import Dict

import gym
import matplotlib.pyplot as plt
import numpy as np

from .smooth_environment import SmoothTransitions


def test_task_schedule():
    environment_name = "CartPole-v0"
    # wandb.init(name="SSCL/RL_testing/smooth", monitor_gym=True)
    original = gym.make(environment_name)
    starting_length = original.length
    starting_gravity = original.gravity

    end_length = 5 * starting_length
    end_gravity = 5 * starting_gravity
    total_steps = 100
    # Increase the length linearly up to 3 times the starting value.
    # Increase the gravity linearly up to 5 times the starting value.
    task_schedule: Dict[int, Dict[str, float]] = {
        # 0: dict(length=starting_length, gravity=starting_gravity),
        total_steps: dict(length=end_length, gravity=end_gravity),
    }
    env = SmoothTransitions(
        original,
        task_schedule=task_schedule,
    )
    # env = gym.wrappers.Monitor(env, f"recordings/smooth_{environment_name}", force=True)
    env.seed(123)
    env.reset()

    assert env.gravity == starting_gravity
    assert env.length == starting_length
    # plt.ion()

    params: Dict[int, Dict[str, float]] = {}

    for step in range(total_steps):
        expected_steps = starting_length + (step / total_steps) * (end_length - starting_length)
        expected_gravity = starting_gravity + (step / total_steps) * (
            end_gravity - starting_gravity
        )

        _, reward, done, _ = env.step(env.action_space.sample())
        assert np.isclose(env.length, expected_steps)
        assert np.isclose(env.gravity, expected_gravity)

        # env.render()
        # if done:
        #     env.reset()

        params[step] = env.current_task.copy()

        # print(f"New task: {env.current_task_dict()}")

    # assert False, params[step]
    env.close()
    # plt.ioff()
    plt.close()


def test_update_only_on_reset():
    """Test that when using the 'only_update_on_episode_end' argument with a
    value of True, the smooth updates don't occur during the episodes, but only
    once after an episode has ended (when `reset()` is called).
    """
    total_steps = 100
    original = gym.make("CartPole-v0")
    start_length = original.length
    end_length = 10.0
    task_schedule = {total_steps: dict(length=end_length)}
    env = SmoothTransitions(
        original,
        task_schedule=task_schedule,
        only_update_on_episode_end=True,
    )
    env.reset()
    env.seed(123)
    expected_length = start_length
    for i in range(total_steps):
        assert env.steps == i
        _, _, done, _ = env.step(env.action_space.sample())
        assert env.steps == i + 1
        if done:
            _ = env.reset()
            expected_length = start_length + ((i + 1) / total_steps) * (end_length - start_length)
        assert np.isclose(env.length, expected_length)


def test_task_id_is_always_None():
    total_steps = 100
    original = gym.make("CartPole-v0")
    start_length = original.length
    end_length = 10.0
    task_schedule = {total_steps: dict(length=end_length)}
    env = SmoothTransitions(
        original,
        task_schedule=task_schedule,
        only_update_on_episode_end=True,
        add_task_id_to_obs=True,
        add_task_dict_to_info=True,
    )

    for observation in (env.observation_space.sample() for i in range(100)):
        x, task_id = observation["x"], observation["task_labels"]
        assert task_id is None

    env.reset()
    env.seed(123)
    expected_length = start_length
    for i in range(total_steps):
        assert env.steps == i
        obs, _, done, _ = env.step(env.action_space.sample())

        x, task_id = obs["x"], obs["task_labels"]
        assert task_id is None

        assert env.steps == i + 1
        if done:
            obs = env.reset()
            x, task_id = obs["x"], obs["task_labels"]
            assert task_id is None

            expected_length = start_length + ((i + 1) / total_steps) * (end_length - start_length)
        assert np.isclose(env.length, expected_length)


================================================
FILE: sequoia/common/gym_wrappers/step_callback_wrapper.py
================================================
"""TODO: Make a wrapper that calls a given function/callback when a given step is reached.
"""
from abc import ABC, abstractmethod
from typing import Callable, List, Tuple, Union

import gym

from .utils import IterableWrapper


class Callback(Callable[[int, gym.Env], None], ABC):
    @abstractmethod
    def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None:
        raise NotImplementedError()


class StepCallback(Callback, ABC):
    def __init__(self, step: int, func: Callable[[int, gym.Env, Tuple], None] = None):
        self.step = step
        self.func = func

    def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None:
        if self.func:
            return self.func(step, env, step_results)
        raise NotImplementedError("Create your own callback or pass a func to use.")


class PeriodicCallback(Callback):
    def __init__(self, period: int, offset: int = 0, func: Callable[[int, gym.Env], None] = None):
        self.period = period
        self.offset = offset
        self.func = func

    def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None:
        if self.func:
            return self.func(step, env, step_results)
        raise NotImplementedError("Create your own callback or pass a func to use.")


class StepCallbackWrapper(IterableWrapper):
    """Wrapper that will execute some callbacks when certain steps are reached."""

    def __init__(
        self,
        env: gym.Env,
        callbacks: List[Callback] = None,
    ):
        super().__init__(env)
        self._steps = 0
        self.callbacks = callbacks or []

    def add_callback(self, callback: Union[Callback]) -> None:
        self.callbacks.append(callback)

    def add_step_callback(self, step: int, callback: Callable[[int, gym.Env], None]):
        if isinstance(callback, StepCallback):
            assert step == callback.step
        else:
            callback = StepCallback(step=step, func=callback)
        self.add_callback(callback)

    def add_periodic_callback(self, period: int, callback: StepCallback, offset: int = 0):
        if isinstance(callback, PeriodicCallback):
            assert period == callback.period
            assert offset == callback.offset
        else:
            callback = PeriodicCallback(period=period, offset=offset, func=callback)
        self.add_callback(callback)

    def step(self, action):
        step_results = super().step(action)
        for callback in self.callbacks:
            if isinstance(callback, StepCallback):
                if callback.step == self._steps:
                    callback(self._steps, self, step_results)
            elif isinstance(callback, PeriodicCallback):
                if (
                    self._steps >= callback.offset
                    and (self._steps - callback.offset) % callback.period == 0
                ):
                    callback(self._steps, self, step_results)
            else:
                # if it's a callable, just call it all the time, assuming that
                # it will use some condition in it's __call__ to check wether
                # it should be executed or not.
                callback(self._steps, self, step_results)
        self._steps += 1
        return step_results


================================================
FILE: sequoia/common/gym_wrappers/step_callback_wrapper_test.py
================================================
from typing import Tuple

import gym

from .step_callback_wrapper import PeriodicCallback, StepCallback, StepCallbackWrapper

i: int = 0


def increment_i(step: int, env: gym.Env, step_results: Tuple):
    global i
    print(f"Incrementing i at step {step}: ({i} -> {i+1})")
    i += 1


def decrement_i(step: int, env: gym.Env, step_results: Tuple):
    global i
    print(f"Decrementing i at step {step}: ({i} -> {i-1})")
    i -= 1


def test_step_callback():
    callback = StepCallback(step=7, func=increment_i)
    env = StepCallbackWrapper(gym.make("CartPole-v0"), callbacks=[callback])
    env.reset()
    global i
    i = 0
    for step in range(10):
        obs, reward, done, info = env.step(env.action_space.sample())

        if step < 7:
            assert i == 0
        else:
            assert i == 1
        if done:
            env.reset()
    env.close()


def test_periodic_callback():
    global i
    i = 0
    inc_callback = PeriodicCallback(period=5, func=increment_i)
    dec_callback = PeriodicCallback(period=5, func=decrement_i, offset=2)
    env = StepCallbackWrapper(gym.make("CartPole-v0"), callbacks=[inc_callback, dec_callback])
    env.reset()

    def _next(env) -> int:
        obs, reward, done, info = env.step(env.action_space.sample())
        if done:
            env.reset()
        return i

    assert _next(env) == 1
    assert _next(env) == 1
    assert _next(env) == 0
    assert _next(env) == 0
    assert _next(env) == 0

    assert _next(env) == 1
    assert _next(env) == 1
    assert _next(env) == 0
    assert _next(env) == 0
    assert _next(env) == 0

    env.close()


================================================
FILE: sequoia/common/gym_wrappers/transform_wrappers.py
================================================
from typing import Callable, Union
import typing

import gym
from gym import Space, spaces
from gym.wrappers import TransformObservation as TransformObservation_
from gym.wrappers import TransformReward as TransformReward_

from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support, has_tensor_support
from sequoia.common.transforms.compose import Compose
from sequoia.common.transforms.transform import Transform

# if typing.TYPE_CHECKING:
#     from sequoia.common.transforms.transform import Transform
from sequoia.utils.logging_utils import get_logger

from .utils import IterableWrapper

logger = get_logger(__name__)


class TransformObservation(TransformObservation_, IterableWrapper):
    def __init__(self, env: gym.Env, f: Union[Callable, Compose]):
        if isinstance(f, list) and not callable(f):
            f = Compose(f)
        super().__init__(env, f=f)
        self.f: "Transform"
        # try:
        self.observation_space = self(self.env.observation_space)
        if has_tensor_support(self.env.observation_space):
            self.observation_space = add_tensor_support(self.observation_space)

        # except Exception as e:
        # logger.warning(UserWarning(
        #     f"Don't know how the transform {self.f} will impact the "
        #     f"observation space! (Exception: {e})"
        # ))

    def __call__(self, *args, **kwargs):
        return self.f(*args, **kwargs)

    def __iter__(self):
        if self.wrapping_passive_env:
            # TODO: For now, we assume that the passive environment has already
            # split stuff correctly for us to use.
            for obs, rewards in self.env:
                yield self(obs), rewards
        else:
            return super().__iter__()


class TransformReward(TransformReward_, IterableWrapper):
    def __init__(self, env: gym.Env, f: Union[Callable, Compose]):
        if isinstance(f, list) and not callable(f):
            f = Compose(f)
        super().__init__(env, f=f)
        self.f: Compose
        # Modify the reward space, if it exists.
        if hasattr(self.env, "reward_space"):
            self.reward_space = self.env.reward_space
        else:
            self.reward_space = spaces.Box(
                low=self.env.reward_range[0],
                high=self.env.reward_range[1],
                shape=(),
            )

        try:
            self.reward_space = self.f(self.reward_space)
            logger.debug(f"New reward space after transform: {self.reward_space}")
        except Exception as e:
            logger.warning(
                UserWarning(
                    f"Don't know how the transform {self.f} will impact the "
                    f"reward space! (Exception: {e})"
                )
            )


class TransformAction(IterableWrapper):
    def __init__(self, env: gym.Env, f: Callable[[Union[gym.Env, Space]], Union[gym.Env, Space]]):
        if isinstance(f, list) and not callable(f):
            f = Compose(f)
        super().__init__(env)
        self.f: Compose = f
        # Modify the action space by applying the transform onto it.
        self.action_space = self.env.action_space

        if isinstance(self.f, Transform):
            self.action_space = self.f(self.env.action_space)
            # logger.debug(f"New action space after transform: {self.observation_space}")

    def step(self, action):
        return self.env.step(self.action(action))

    def action(self, action):
        return self.f(action)


================================================
FILE: sequoia/common/gym_wrappers/transform_wrappers_test.py
================================================
import gym
import numpy as np

from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms
from sequoia.conftest import monsterkong_required

from .transform_wrappers import TransformObservation


@monsterkong_required
def test_compose_on_image_space():
    in_space = Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
    transform = Compose([Transforms.to_tensor, Transforms.three_channels])
    expected = Image(0, 1.0, shape=(3, 64, 64), dtype=np.float32)
    actual = transform(in_space)

    assert actual == expected
    env = gym.make("MetaMonsterKong-v0")
    assert env.observation_space == gym.spaces.Box(0, 255, (64, 64, 3), np.uint8)
    assert env.observation_space == in_space
    wrapped_env = TransformObservation(env, transform)
    assert wrapped_env.observation_space == expected


import pytest
import torch
from torchvision.datasets import MNIST

from sequoia.common.transforms import Compose


@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need cuda for this test.")
def test_move_wrapper_and_iteration():
    batch_size = 1
    transforms = Compose([Transforms.to_tensor])
    dataset = MNIST("data", transform=transforms)
    obs_space = Image(0, 255, (1, 28, 28), np.uint8)
    obs_space = transforms(obs_space)
    from sequoia.settings.sl.environment import PassiveEnvironment

    env = PassiveEnvironment(
        dataset,
        batch_size=batch_size,
        n_classes=10,
        observation_space=obs_space,
    )

    from functools import partial

    from sequoia.utils.generic_functions import move

    from .transform_wrappers import TransformReward

    env = TransformObservation(env, partial(move, device="cuda"))
    env = TransformReward(env, partial(move, device="cuda"))

    obs, rewards_next = next(iter(env))
    rewards_send = env.send(env.action_space.sample())
    assert obs.device.type == "cuda"
    assert rewards_next.device.type == "cuda"
    assert rewards_send.device.type == "cuda"


================================================
FILE: sequoia/common/gym_wrappers/utils.py
================================================
import inspect
from abc import ABC
from functools import partial
from typing import (
    Any,
    Callable,
    Dict,
    Generic,
    Iterator,
    NamedTuple,
    Optional,
    Sequence,
    Tuple,
    Type,
    TypeVar,
    Union,
)
import warnings

import gym
import numpy as np
from gym.envs import registry
from gym.envs.classic_control import (
    AcrobotEnv,
    CartPoleEnv,
    Continuous_MountainCarEnv,
    MountainCarEnv,
    PendulumEnv,
)
from gym.envs.registration import load
from gym.vector import VectorEnv
from torch.utils.data import IterableDataset

from sequoia.utils.logging_utils import get_logger

classic_control_envs = (
    AcrobotEnv,
    CartPoleEnv,
    PendulumEnv,
    MountainCarEnv,
    Continuous_MountainCarEnv,
)

classic_control_env_prefixes: Tuple[str, ...] = (
    "CartPole",
    "Pendulum",
    "Acrobot",
    "MountainCar",
    "MountainCarContinuous",
)
logger = get_logger(__name__)


def is_classic_control_env(env: Union[str, gym.Env, Type[gym.Env]]) -> bool:
    """Returns `True` if the given env id, env class, or env instance is a
    classic-control env.

    Parameters
    ----------
    env : Union[str, gym.Env]
        Env id, or env class, or env instance.

    Returns
    -------
    bool
        Wether the given env is a classic-control env from Gym.

    Examples:

    >>> import gym
    >>> is_classic_control_env("CartPole-v0")
    True
    >>> is_classic_control_env("Breakout-v1")
    False
    >>> is_classic_control_env("bob")
    False
    >>> from gym.envs.classic_control import CartPoleEnv
    >>> is_classic_control_env(CartPoleEnv)
    True
    """
    if isinstance(env, partial):
        if env.func is gym.make and isinstance(env.args[0], str):
            logger.warning(
                RuntimeWarning(
                    "Don't pass partial(gym.make, 'some_env'), just use the env string instead."
                )
            )
            env = env.args[0]
    if isinstance(env, str):
        try:
            spec = registry.spec(env)
            if isinstance(spec.entry_point, str):
                return "gym.envs.classic_control" in spec.entry_point
            if inspect.isclass(spec.entry_point):
                env = spec.entry_point
        except gym.error.Error as e:
            # malformed env id, for instance.
            logger.debug(f"can't tell if env id {env} is a classic-control env! ({e})")
            return False

    if inspect.isclass(env):
        return issubclass(env, classic_control_envs)
    if isinstance(env, gym.Env):
        return isinstance(env.unwrapped, classic_control_envs)
    return False


def is_proxy_to(env, env_type_or_types: Union[Type[gym.Env], Tuple[Type[gym.Env], ...]]) -> bool:
    """Returns wether `env` is a proxy to an env of the given type or types."""
    from sequoia.client.env_proxy import EnvironmentProxy

    return isinstance(env.unwrapped, EnvironmentProxy) and issubclass(
        env.unwrapped._environment_type, env_type_or_types
    )


def is_atari_env(env: Union[str, gym.Env]) -> bool:
    """Returns `True` if the given env id, env class, or env instance is a
    Atari environment.

    Parameters
    ----------
    env : Union[str, gym.Env]
        Env id, or env class, or env instance.

    Returns
    -------
    bool
        Wether the given env is an Atari env from Gym.

    Examples:
    >>> import gym
    >>> is_atari_env("CartPole-v0")
    False
    >>> is_atari_env("bob")
    False
    >>> # is_atari_env("ALE/Breakout-v5")
    # True
    >>> # is_atari_env("Breakout-v0")
    # True

    NOTE: Removing this doctest, since recent changes to gym have changed this a bit.
    >>> #from gym.envs import atari
    >>> #is_atari_env(atari.AtariEnv) # requires atari_py to be installed
    # True
    """
    from sequoia.settings.rl.envs import ATARI_PY_INSTALLED

    if not isinstance(env, (str, gym.Env)):
        raise RuntimeError(f"`env` needs to be either a str or gym env, not {env}")
    if isinstance(env, str):
        try:
            spec = registry.spec(env)
        except gym.error.NameNotFound:
            return False
        except gym.error.NamespaceNotFound:
            return False
        if spec.namespace is None:
            return False
        return spec.namespace is "ALE"
    if not ATARI_PY_INSTALLED:
        return False
    raise NotImplementedError(f"TODO: Check if isinstance(env.unwrapped, AtariEnv)")

    if isinstance(env, partial):
        if env.func is gym.make and isinstance(env.args[0], str):
            logger.warning(
                RuntimeWarning(
                    "Don't pass partial(gym.make, 'some_env'), just use the env string instead."
                )
            )
            env = env.args[0]
    # assert False, [env_spec for env_spec in registry.all()]
    if isinstance(env, str):  # and env.startswith("Breakout"):
        try:
            spec = registry.spec(env)
            if isinstance(spec.entry_point, str):
                return "gym.envs.atari" in spec.entry_point or "ale_py" in spec.entry_point
            if inspect.isclass(spec.entry_point):
                env = spec.entry_point
        except gym.error.Error as e:
            # malformed env id, for instance.
            logger.debug(f"can't tell if env id {env} is an atari env! ({e})")
            return False

    try:
        from gym.envs import atari

        AtariEnv = atari.AtariEnv
        if inspect.isclass(env) and issubclass(env, AtariEnv):
            return True
        return isinstance(env, gym.Env) and isinstance(env.unwrapped, AtariEnv)
    except (ImportError, gym.error.DependencyNotInstalled):
        return False
    return False


def get_env_class(env: Union[str, gym.Env, Type[gym.Env], Callable[[], gym.Env]]) -> Type[gym.Env]:
    if isinstance(env, partial):
        if env.func is gym.make and isinstance(env.args[0], str):
            return get_env_class(env.args[0])
        return get_env_class(env.func)
    if isinstance(env, str):
        return load(env)
    if isinstance(env, gym.Wrapper):
        return type(env.unwrapped)
    if isinstance(env, gym.Env):
        return type(env)
    if inspect.isclass(env) and issubclass(env, gym.Env):
        return env
    raise NotImplementedError(f"Don't know how to get the class of env being used by {env}!")


def is_monsterkong_env(env: Union[str, gym.Env, Callable[[], gym.Env]]) -> bool:
    if isinstance(env, str):
        return env.lower().startswith(("metamonsterkong", "monsterkong"))
    try:
        from meta_monsterkong.make_env import MetaMonsterKongEnv

        if inspect.isclass(env):
            return issubclass(env, MetaMonsterKongEnv)
        if isinstance(env, gym.Env):
            return isinstance(env, MetaMonsterKongEnv)
        return False
    except ImportError:
        return False


logger = get_logger(__name__)

EnvType = TypeVar("EnvType", bound=gym.Env)
ObservationType = TypeVar("ObservationType")
ActionType = TypeVar("ActionType")
RewardType = TypeVar("RewardType")


class StepResult(NamedTuple):
    observation: ObservationType
    reward: RewardType
    done: Union[bool, Sequence[bool]]
    info: Union[Dict, Sequence[Dict]]


def has_wrapper(
    env: gym.Wrapper,
    wrapper_type_or_types: Union[Type[gym.Wrapper], Tuple[Type[gym.Wrapper], ...]],
) -> bool:
    """Returns wether the given `env` has a wrapper of type `wrapper_type`.

    Args:
        env (gym.Wrapper): a gym.Wrapper or a gym environment.
        wrapper_type (Type[gym.Wrapper]): A type of Wrapper to check for.

    Returns:
        bool: Wether there is a wrapper of that type wrapping `env`.
    """
    # avoid cycles, although that would be very weird to encounter.
    while hasattr(env, "env") and env.env is not env:
        if isinstance(env, wrapper_type_or_types):
            return True
        env = env.env
    return isinstance(env, wrapper_type_or_types)


class MayCloseEarly(gym.Wrapper, ABC):
    """ABC for Wrappers that may close an environment early depending on some
    conditions.

    WIP: Also prevents calling `step` and `reset` on a closed env.
    """

    def __init__(self, env: gym.Env):
        super().__init__(env)
        self._is_closed: bool = False

    def is_closed(self) -> bool:
        # First, make sure that we're not 'overriding' the 'is_closed' of the
        # wrapped environment.
        if hasattr(self.env, "is_closed"):
            assert callable(self.env.is_closed)
            self._is_closed = self.env.is_closed()
        return self._is_closed

    def closed_error_message(self) -> str:
        """Return the error message to use when attempting to use the closed env.

        This can be useful for wrappers that close when a given condition is reached,
        e.g. a number of episodes has been performed, which could return a more relevant
        message here.
        """
        return "Env is closed"

    def reset(self, **kwargs):
        if self.is_closed():
            raise gym.error.ClosedEnvironmentError(
                f"Can't call `reset()`: {self.closed_error_message()}"
            )
        return super().reset(**kwargs)

    def step(self, action):
        if self.is_closed():
            raise gym.error.ClosedEnvironmentError(
                f"Can't call `step()`: {self.closed_error_message()}"
            )
        return super().step(action)

    def close(self) -> None:
        if self.is_closed():
            # TODO: Prevent closing an environment twice?
            return
            # raise gym.error.ClosedEnvironmentError(self.closed_error_message())
        self.env.close()
        self._is_closed = True


from .env_dataset import EnvDataset


class IterableWrapper(MayCloseEarly, IterableDataset, Generic[EnvType], ABC):
    """ABC for a gym Wrapper that supports iterating over the environment.

    This allows us to wrap dataloader-based Environments and still use the gym
    wrapper conventions, as well as iterate over a gym environment as in the
    Active-dataloader case.

    NOTE: We have IterableDataset as a base class here so that we can pass a wrapped env
    to the DataLoader function. This wrapper however doesn't perform the actual
    iteration, and instead depends on the wrapped environment already supporting
    iteration.
    """

    def __init__(self, env: gym.Env):
        super().__init__(env)
        from sequoia.settings.sl import PassiveEnvironment

        self.wrapping_passive_env = isinstance(self.unwrapped, PassiveEnvironment)

    @property
    def is_vectorized(self) -> bool:
        """Returns wether this wrapper is wrapping a vectorized environment."""
        return isinstance(self.unwrapped, VectorEnv)

    def __next__(self):
        # TODO: This is tricky. We want the wrapped env to use *our* step,
        # reset(), action(), observation(), reward() methods, instead of its own!
        # Otherwise if we are transforming observations for example, those won't
        # be affected.
        # logger.debug(f"Wrapped env {self.env} isnt a PolicyEnv or an EnvDataset")
        # return type(self.env).__next__(self)
        from sequoia.settings.rl.environment import ActiveDataLoader

        # from sequoia.settings.sl.environment import PassiveEnvironment

        if has_wrapper(self.env, EnvDataset) or is_proxy_to(
            self.env, (EnvDataset, ActiveDataLoader)
        ):
            obs, reward, done, info = self.step(self.unwrapped.action_)
            return obs
            # raise RuntimeError(f"WIP: Dropping this '__next__' API in RL.")
            # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.__iter__.")
            # return EnvDataset.__next__(self)
            # return EnvDataset.__next__(self)
        return self.env.__next__()
        # return self.observation(obs)

    def observation(self, observation):
        # logger.debug(f"Observation won't be transformed.")
        return observation

    def action(self, action):
        return action

    def reward(self, reward):
        return reward

    # def __len__(self):
    #     return self.env.__len__()

    def get_length(self) -> Optional[int]:
        """Attempts to return the "length" (in number of steps/batches) of this env.

        When not possible, returns None.

        NOTE: This is a bit ugly, but the idea seems alright.
        """
        try:
            # Try to call self.__len__() without recursing into the wrapped env:
            return len(self)
        except TypeError:
            pass
        try:
            # Try to call self.env.__len__() without recursing into the wrapped^2 env:
            return len(self.env)
        except TypeError:
            pass
        try:
            # Try to call self.env.__len__(), allowing recursing down the chain:
            return self.env.__len__()
        except TypeError:
            pass
        try:
            # If all else fails, delegate to the wrapped env's length() method, if any:
            return self.env.get_length()
        except AttributeError:
            pass
        # In the worst case, return None, meaning that we don't have a length.
        return None

    def send(self, action):
        # TODO: Make `send` use `self.step`, that way wrappers can apply the same way to
        # RL and SL environments.
        if self.wrapping_passive_env:
            action = self.action(action)
            reward = self.env.send(action)
            reward = self.reward(reward)
            return reward

        self.unwrapped.action_ = action
        (
            self.unwrapped.observation_,
            self.unwrapped.reward_,
            self.unwrapped.done_,
            self.unwrapped.info_,
        ) = self.step(action)
        return self.unwrapped.reward_

        # (Option 1 below)
        # return self.env.send(action)
        # (Option 2 below)
        # return self.env.send(self.action(action))

        # (Option 3 below)
        # return type(self.env).send(self, action)

        # (Following option 4 below)
        # if has_wrapper(self.env, EnvDataset):
        #     # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.send.")
        #     return EnvDataset.send(self, action)

        # if hasattr(self.env, "send"):
        #     action = self.action(action)
        #     reward = self.env.send(action)
        #     reward = self.reward(reward)
        #     return reward

    def __iter__(self) -> Iterator:
        # TODO: Pretty sure this could be greatly simplified by just always using the loop from EnvDataset.
        if self.wrapping_passive_env:
            # NOTE: Also applies the `self.observation` `self.reward` methods while
            # iterating.
            for obs, rewards in self.env:
                obs = self.observation(obs)
                if rewards is not None:
                    rewards = self.reward(rewards)
                yield obs, rewards
        else:
            self.unwrapped.observation_ = self.reset()
            self.unwrapped.done_ = False
            self.unwrapped.action_ = None
            self.unwrapped.reward_ = None

            # Yield the first observation_.
            yield self.unwrapped.observation_

            if self.unwrapped.action_ is None:
                raise RuntimeError(
                    f"You have to send an action using send() between every "
                    f"observation. (env = {self})"
                )

            def done_is_true(done: Union[bool, np.ndarray, Sequence[bool]]) -> bool:
                return done if isinstance(done, bool) or not done.shape else all(done)

            while not any([done_is_true(self.unwrapped.done_), self.is_closed()]):
                # logger.debug(f"step {self.n_steps_}/{self.max_steps},  (episode {self.n_episodes_})")

                # Set those to None to force the user to call .send()
                self.unwrapped.action_ = None
                self.unwrapped.reward_ = None
                yield self.unwrapped.observation_

                if self.unwrapped.action_ is None:
                    raise RuntimeError(
                        f"You have to send an action using send() between every "
                        f"observation. (env = {self})"
                    )

        # assert False, "WIP"

        # Option 1: Return the iterator from the wrapped env. This ignores
        # everything in the wrapper.
        # return self.env.__iter__()

        # Option 2: apply the transformations on the items yielded by the
        # iterator of the wrapped env (this doesn't use the self.observaion(), self.action())
        # from .transform_wrappers import TransformObservation, TransformAction, TransformReward
        # return map(self.observation, self.env.__iter__())

        # Option 3: Calling the method on the wrapped env, but with `self` being
        # the wrapper, rather than the wrapped env:
        # return type(self.env).__iter__(self)

        # Option 4: Slight variation on option 3: We cut straight to the
        # EnvDataset iterator.

        # from sequoia.settings.rl.environment import ActiveDataLoader
        # from sequoia.settings.sl.environment import PassiveEnvironment

        # if has_wrapper(self.env, EnvDataset) or is_proxy_to(
        #     self.env, (EnvDataset, ActiveDataLoader)
        # ):
        #     # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.__iter__ with the wrapper as `self`.")
        #     return EnvDataset.__iter__(self)

        # # TODO: Should probably remove this since we don't actually use this 'PolicyEnv'.
        # if has_wrapper(self.env, PolicyEnv) or is_proxy_to(self.env, PolicyEnv):
        #     # logger.debug(f"Wrapped env is a PolicyEnv, will use PolicyEnv.__iter__ with the wrapper as `self`.")
        #     return PolicyEnv.__iter__(self)

        # # NOTE: This works even though IterableDataset isn't a gym.Wrapper.
        # if not has_wrapper(self.env, IterableDataset) and not isinstance(
        #     self.env, DataLoader
        # ):
        #     logger.warning(
        #         UserWarning(
        #             f"Will try to iterate on a wrapper for env {self.env} which "
        #             f"doesn't have the EnvDataset or PolicyEnv wrappers and isn't "
        #             f"an IterableDataset."
        #         )
        #     )
        # # if isinstance(self.env, DataLoader):
        # #     return self.env.__iter__()
        # # raise NotImplementedError(f"Wrapper {self} doesn't know how to iterate on {self.env}.")
        # return self.env.__iter__()

    # @property
    # def wrapping_passive_env(self) -> bool:
    #     """ Returns wether this wrapper is applied over a 'passive' env, in which case
    #     iterating over the env will yield (up to) 2 items, rather than just 1.
    #     """
    #     from sequoia.settings.sl.environment import PassiveEnvironment

    #     return isinstance(self.unwrapped, PassiveEnvironment) or is_proxy_to(
    #         self, PassiveEnvironment
    #     )

    # def __setattr__(self, attr, value):
    #     """
    #     TODO: Remove/replace this:

    #     Redirect the __setattr__ of attributes 'owned' by the EnvDataset to
    #     the EnvDataset.

    #     We need to do this because we change the value of `self` and call
    #     EnvDataset.__iter__(self), which might get and set attributes to/from
    #     `self`, which is what you'd expect normally. However when `self` is a
    #     wrapper over the env, rather than the env itself, then when attributes
    #     are set on `self` inside __iter__ or __next__ or send, etc, they are
    #     actually set on the wrapper, rather than on the env.

    #     We solve this by detecting when an attribute with a name ending with "_"
    #     and part of a given list of attributes is set.
    #     """
    #     if attr.endswith("_") and has_wrapper(self.env, EnvDataset):
    #         if attr in {
    #             "observation_",
    #             "action_",
    #             "reward_",
    #             "done_",
    #             "info_",
    #             "n_sends_",
    #         }:
    #             # logger.debug(f"Attribute {attr} will be set on the wrapped env rather than on the wrapper itself.")
    #             env = self.env
    #             while not isinstance(env, EnvDataset) and env.env is not env:
    #                 env = env.env
    #             assert isinstance(env, EnvDataset)
    #             setattr(env, attr, value)
    #     else:
    #         object.__setattr__(self, attr, value)


class RenderEnvWrapper(IterableWrapper):
    """Simple Wrapper that renders the env at each step."""

    def __init__(self, env: gym.Env, display: Any = None):
        super().__init__(env)
        # TODO: Maybe use the given display?

    def step(self, action):
        self.env.render("human")
        return self.env.step(action)


def tile_images(img_nhwc):
    """
    TAKEN FROM https://github.com/openai/gym/pull/1624/files

    Tile N images into one big PxQ image
    (P,Q) are chosen to be as close as possible, and if N
    is square, then P=Q.
    input: img_nhwc, list or array of images, ndim=4 once turned into array
        n = batch index, h = height, w = width, c = channel
    returns:
        bigim_HWc, ndarray with ndim=3
    """
    img_nhwc = np.asarray(img_nhwc)

    N, h, w, c = img_nhwc.shape
    if c not in {1, 3}:
        img_nhwc = img_nhwc.transpose([0, 2, 3, 1])
        N, h, w, c = img_nhwc.shape
    assert c in {1, 3}

    H = int(np.ceil(np.sqrt(N)))
    W = int(np.ceil(float(N) / H))
    img_nhwc = np.array(list(img_nhwc) + [img_nhwc[0] * 0 for _ in range(N, H * W)])
    img_HWhwc = img_nhwc.reshape(H, W, h, w, c)
    img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4)
    img_Hh_Ww_c = img_HhWwc.reshape(H * h, W * w, c)
    return img_Hh_Ww_c


if __name__ == "__main__":
    import doctest

    doctest.testmod()


================================================
FILE: sequoia/common/gym_wrappers/utils_test.py
================================================
import gym
import pytest
from gym.wrappers import ClipAction
from gym.wrappers.pixel_observation import PixelObservationWrapper

from sequoia.conftest import param_requires_pyglet

from .pixel_observation import PixelObservationWrapper
from .utils import has_wrapper


@pytest.mark.parametrize(
    "env,wrapper_type,result",
    [
        param_requires_pyglet(
            lambda: PixelObservationWrapper(gym.make("CartPole-v0")), ClipAction, False
        ),
        param_requires_pyglet(
            lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True
        ),
        param_requires_pyglet(
            lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True
        ),
        # param_requires_atari_py(AtariPreprocessing(gym.make("ALE/Breakout-v5")), ClipAction, True),
    ],
)
def test_has_wrapper(env, wrapper_type, result):
    assert has_wrapper(env(), wrapper_type) == result


================================================
FILE: sequoia/common/hparams/__init__.py
================================================
""" Utilities for creating hyper-parameter dataclasses and their fields. """
from simple_parsing.helpers.hparams import categorical, log_uniform, loguniform, uniform
from simple_parsing.helpers.hparams.hyperparameters import HyperParameters, Point


================================================
FILE: sequoia/common/layers.py
================================================
import math
from typing import Callable, List, Optional, Tuple, Union

import numpy as np
import torch
from gym import spaces
from torch import Tensor, nn

from sequoia.common.spaces.image import Image
from sequoia.utils.generic_functions import singledispatchmethod
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


class Lambda(nn.Module):
    def __init__(self, func: Callable):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)


class Reshape(nn.Module):
    def __init__(self, target_shape: Union[List[int], Tuple[int, ...]]):
        self.target_shape = target_shape
        super().__init__()

    def forward(self, inputs):
        return inputs.reshape([inputs.shape[0], *self.target_shape])


class ConvBlock(nn.Module):
    def __init__(
        self, in_channels: int, out_channels: int, kernel_size: int = 3, padding: int = 1, **kwargs
    ):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            padding=padding,
            **kwargs,
        )
        self.norm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        x = self.conv(x)
        x = self.norm(x)
        x = self.relu(x)
        return self.pool(x)


class DeConvBlock(nn.Module):
    """Block that performs:
    Upsample (2x)
    Conv
    BatchNorm2D
    Relu
    Conv
    BatchNorm2D
    Relu (optional)
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: Optional[int] = None,
        kernel_size: int = 3,
        padding: int = 1,
        last_relu: bool = True,
        **kwargs,
    ):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.hidden_channels = hidden_channels or out_channels
        self.kernel_size = kernel_size
        self.last_relu = last_relu
        super().__init__()
        self.upsample = nn.Upsample(scale_factor=2)
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=self.hidden_channels,
            kernel_size=kernel_size,
            padding=padding,
            **kwargs,
        )
        self.norm1 = nn.BatchNorm2d(self.hidden_channels)
        self.conv2 = nn.Conv2d(
            in_channels=self.hidden_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            padding=padding,
            **kwargs,
        )
        self.norm2 = nn.BatchNorm2d(self.hidden_channels)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.upsample(x)
        x = self.conv1(x)
        x = self.norm1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.norm2(x)
        if self.last_relu:
            x = self.relu(x)
        return x


def n_output_features(
    in_features: int, padding: int = 1, kernel_size: int = 3, stride: int = 1
) -> int:
    """Calculates the number of output features of a conv2d layer given its parameters."""
    return math.floor((in_features + 2 * padding - kernel_size) / stride) + 1


class Conv2d(nn.Conv2d):
    @singledispatchmethod
    def forward(self, input: Union[Image, Tensor]) -> Union[Tensor, Image]:
        return super().forward(input)

    @forward.register(Image)
    def _(self, input: Image) -> Image:
        assert input.channels_first, f"Need channels first inputs for conv2d: {input}"
        # NOTE: Not strictly necessary for computing the output space, but it would be
        # better for the input space to already have a batch size, since conv2d only
        # accepts 4-dimensional inputs.
        # assert input.batch_size, (
        #     f"Image space should be batched, since conv2d only accepts 4-dimensional "
        #     f"inputs. (input={input})"
        # )
        assert input.channels == self.in_channels, (
            f"Input space doesn't have the right number of channels: "
            f"input.channels: {input.channels} != self.in_channels: {self.in_channels}"
        )
        new_height = n_output_features(
            input.height,
            padding=self.padding[0],
            kernel_size=self.kernel_size[0],
            stride=self.stride[0],
        )
        new_width = n_output_features(
            input.width,
            padding=self.padding[1],
            kernel_size=self.kernel_size[1],
            stride=self.stride[1],
        )
        new_channels = self.out_channels

        new_shape = [new_channels, new_height, new_width]
        if input.batch_size:
            new_shape.insert(0, input.batch_size)

        output_space: Image = type(input)(low=-np.inf, high=np.inf, shape=new_shape)
        output_space.channels_first = True
        return output_space


class MaxPool2d(nn.MaxPool2d):
    @singledispatchmethod
    def forward(self, input: Union[Image, Tensor]) -> Union[Tensor, Image]:
        return super().forward(input)

    @forward.register(Image)
    def _(self, input: Image) -> Image:
        assert input.channels_first, f"Need channels first inputs: {input}"
        # assert not self.padding, "assuming no padding for now."
        padding = [self.padding] * 2 if isinstance(self.padding, int) else self.padding
        kernel_size = (
            [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
        )
        stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride

        new_height = n_output_features(
            input.height,
            padding=padding[0],
            kernel_size=kernel_size[0],
            stride=stride[0],
        )
        new_width = n_output_features(
            input.width,
            padding=padding[1],
            kernel_size=kernel_size[1],
            stride=stride[1],
        )

        new_channels = input.channels
        new_shape = [new_channels, new_height, new_width]
        if input.batch_size:
            new_shape.insert(0, input.batch_size)
        output_space: Image = type(input)(low=-np.inf, high=np.inf, shape=new_shape)
        output_space.channels_first = True
        # assert False, (self.forward(torch.as_tensor([input.sample()])).shape, output_space)
        return output_space


class Sequential(nn.Sequential):

    # NB: We can't really type check this function as the type of input
    # may change dynamically (as is tested in
    # TestScript.test_sequential_intermediary_types).  Cannot annotate
    # with Any as TorchScript expects a more precise type
    def forward(self, input):
        if isinstance(input, spaces.Space):
            space = input
            for module in self:
                try:
                    space = module(space)
                except:
                    if isinstance(space, (spaces.Box, Image)):
                        # Apply the module to a sample from the space, and create an
                        # output space of the same shape.
                        space = Image.from_box(space)
                        in_sample: Tensor = torch.as_tensor(space.sample())
                        if not space.batch_size:
                            in_sample = in_sample.unsqueeze(0)
                        out_sample = module(in_sample)
                        out_space = type(space)(low=-np.inf, high=np.inf, shape=out_sample.shape)
                        space = out_space
                    else:
                        logger.debug(
                            f"Unable to apply module {module} on space {space}: assuming that it doesn't change the space."
                        )
            return space
        return super().forward(input)


================================================
FILE: sequoia/common/loss.py
================================================
""" Module that defines a `Loss` class that holds losses and associated metrics.

This Loss object is used to bundle together the Loss and the Metrics.

Loss objects are used to simplify training with multiple "loss signals"
(e.g. in Self-Supervised Learning) by keeping track of the contribution of each
individual 'task' to the total loss, as well as their corresponding metrics.

For example:
>>> from pprint import pprint
>>> loss = Loss("total")
>>> loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
>>> loss += Loss("task_b", loss=torch.Tensor([2.10]))
>>> loss += Loss("task_c", loss=3.00)
>>> log_dict = loss.to_log_dict()
>>> pprint(log_dict)
{'total/loss': tensor([6.3300]),
 'total/task_a/accuracy': 0.95,
 'total/task_a/loss': 1.23,
 'total/task_b/loss': tensor([2.1000]),
 'total/task_c/loss': 3.0}

Another feature of Loss objects is that they can automatically generate
relevant metrics when the associated tensors are passed.

For example, consider a classification problem:

>>> # some fake classification logits.
>>> y_pred = torch.Tensor([
...     [.8, .1, .1],
...     [.0, .9, .1],
...     [.0, .1, .9],
... ])
>>> y = [0, 1, 1]
>>> loss = Loss("test", y_pred=y_pred, y=y)
>>> loss.metric
ClassificationMetrics(n_samples=3, accuracy=0.666667)

Or, consider a regression problem:
>>> y_true = [0.0, 1.0, 2.0, 3.0]
>>> y_pred = [0.0, 1.0, 2.0, 5.0] # mse = 1/4 * (5-3)**2 == 1.0
>>> reg_loss = Loss("test", y_pred=y_pred, y=y_true)
>>> reg_loss.metric
RegressionMetrics(n_samples=4, mse=tensor(1.), l1_error=tensor(0.5000))

See the `Loss` constructor for more info on which tensors are accepted.
"""
from collections.abc import Mapping as MappingABC
from dataclasses import InitVar, dataclass, fields
from typing import Any, ClassVar, Dict, Iterable, List, Optional, Tuple, Union

import torch
from simple_parsing import field
from simple_parsing.helpers import dict_field
from torch import Tensor

from sequoia.utils.logging_utils import cleanup, get_logger
from sequoia.utils.serialization import Serializable
from sequoia.utils.utils import add_dicts, add_prefix

from .metrics import ClassificationMetrics, Metrics, RegressionMetrics, get_metrics

logger = get_logger(__name__)


@dataclass
class Loss(Serializable, MappingABC):
    """Object used to store the losses and metrics.

    Used to simplify the return type of the different `get_loss` functions and
    also to help in debugging models that use a combination of different loss
    signals.

    TODO: Add some kind of histogram plot to show the relative contribution of
    each loss signal?
    TODO: Maybe create a `make_plots()` method to create wandb plots?
    """

    name: str
    loss: Tensor = 0.0  # type: ignore
    losses: Dict[str, "Loss"] = dict_field()
    # NOTE: By setting to_dict=False below, we don't include the tensors when
    # serializing the attributes.
    # TODO: Does that also mean that the tensors can't be pickled (moved) by
    # pytorch-lightning during training? Is there a case where that would be
    # useful?
    tensors: Dict[str, Tensor] = dict_field(repr=False, to_dict=False)
    # Dictionary of metrics related to this loss. For example, could be the Accuracy.
    # TODO: Test out using this with metrics from `torchmetrics`.
    metrics: Dict[str, Union[Metrics, Tensor]] = dict_field()
    # When multiplying the Loss by a value, this keep track of the coefficients
    # used, so that if we wanted to we could recover the 'unscaled' loss.
    _coefficient: Union[float, Tensor] = field(1.0, repr=False)

    x: InitVar[Optional[Tensor]] = None
    h_x: InitVar[Optional[Tensor]] = None
    y_pred: InitVar[Optional[Tensor]] = None
    y: InitVar[Optional[Tensor]] = None

    _field_names: ClassVar[Tuple[str, ...]]

    def __post_init__(
        self, x: Tensor = None, h_x: Tensor = None, y_pred: Tensor = None, y: Tensor = None
    ):
        if isinstance(self.name, dict):
            # TODO: ugly-ish 'hack', we need to do this because of the infamous
            # 'apply_to_collection' function, which does a Loss({k: v for k, v in loss.items()})
            # Check that all other fields are empty, so we're not overwriting anything.
            assert (isinstance(self.loss, float) or not self.loss.shape) and self.loss == 0.0
            assert not self.metrics
            assert not self.losses
            assert not self.tensors
            assert self._coefficient == 1.0

            field_values = self.name
            self.name = field_values.pop("name")
            for k, v in field_values.items():
                setattr(self, k, v)

        assert self.name, "Loss objects should be given a name!"
        if self.name not in self.metrics:
            # Create a Metrics object if given the necessary tensors.
            metrics = get_metrics(x=x, h_x=h_x, y_pred=y_pred, y=y)
            if metrics:
                self.metrics[self.name] = metrics
        self._device: torch.device = None
        for name in list(self.tensors.keys()):
            tensor = self.tensors[name]
            if not isinstance(tensor, Tensor):
                self.tensors[name] = torch.as_tensor(tensor)
            elif self._device is None:
                self._device = tensor.device

        if "_field_names" not in type(self).__dict__:
            type(self)._field_names = tuple(f.name for f in fields(self))

    def __contains__(self, key: str) -> bool:
        if isinstance(key, str):
            return key in type(self)._field_names
        return NotImplemented

    def __getitem__(self, key: str) -> Any:
        if key not in self:
            raise KeyError(key)
        return getattr(self, key)

    def __iter__(self) -> Iterable[str]:
        return type(self)._field_names

    def __len__(self) -> int:
        return len(type(self)._field_names)

    @property
    def total_loss(self) -> Tensor:
        return self.loss

    @property
    def requires_grad(self) -> bool:
        """Returns wether the loss tensor in this object requires grad."""
        return isinstance(self.loss, Tensor) and self.loss.requires_grad

    def backward(self, *args, **kwargs):
        """Calls `self.loss.backward(*args, **kwargs)`."""
        return self.loss.backward(*args, **kwargs)

    @property
    def metric(self) -> Optional[Metrics]:
        """Shortcut for `self.metrics[self.name]`.

        Returns:
            Optional[Metrics]: The main metrics associated with this Loss.
        """
        return self.metrics.get(self.name)

    @metric.setter
    def metric(self, value: Metrics) -> None:
        """Shortcut for `self.metrics[self.name] = value`.

        Parameters
        ----------
        value : Metrics
            The main metrics associated with this Loss.
        """
        assert self.name not in self.metrics, "There's already be a metric?"
        self.metrics[self.name] = value

    @property
    def accuracy(self) -> float:
        if isinstance(self.metric, ClassificationMetrics):
            return self.metric.accuracy

    @property
    def mse(self) -> Tensor:
        assert isinstance(self.metric, RegressionMetrics), self
        return self.metric.mse

    def __add__(self, other: Union["Loss", Any]) -> "Loss":
        """Adds two Loss instances together.

        Adds the losses, total loss and metrics. Overwrites the tensors.
        Keeps the name of the first one. This is useful when doing something
        like:

        ```
        loss = Loss("Test")
        for x, y in dataloader:
            loss += model.get_loss(x=x, y=y)
        ```

        Returns
        -------
        Loss
            The merged/summed up Loss.
        """
        if other == 0:
            return self
        if not isinstance(other, Loss):
            return NotImplemented
        name = self.name
        loss = self.loss + other.loss

        if self.name == other.name:
            losses = add_dicts(self.losses, other.losses)
            metrics = add_dicts(self.metrics, other.metrics)
        else:
            # IDEA: when the names don't match, store the entire Loss
            # object into the 'losses' dict, rather than a single loss tensor.
            losses = add_dicts(self.losses, {other.name: other})
            # TODO: setting in the 'metrics' dict, we are duplicating the
            # metrics, since they now reside in the `self.metrics[other.name]`
            # and `self.losses[other.name].metrics` attributes.
            metrics = self.metrics
            # metrics = add_dicts(self.metrics, {other.name: other.metrics})

        tensors = add_dicts(self.tensors, other.tensors, add_values=False)
        return Loss(
            name=name,
            loss=loss,
            losses=losses,
            tensors=tensors,
            metrics=metrics,
            _coefficient=self._coefficient,
        )

    def __iadd__(self, other: Union["Loss", Any]) -> "Loss":
        """Adds Loss to `self` in-place.

        Adds the losses, total loss and metrics. Overwrites the tensors.
        Keeps the name of the first one. This is useful when doing something
        like:

        ```
        loss = Loss("Test")
        for x, y in dataloader:
            loss += model.get_loss(x=x, y=y)
        ```

        Returns
        -------
        Loss
            `self`: The merged/summed up Loss.
        """
        self.loss = self.loss + other.loss

        if self.name == other.name:
            self.losses = add_dicts(self.losses, other.losses)
            self.metrics = add_dicts(self.metrics, other.metrics)
        else:
            # IDEA: when the names don't match, store the entire Loss
            # object into the 'losses' dict, rather than a single loss tensor.
            self.losses = add_dicts(self.losses, {other.name: other})

        self.tensors = add_dicts(self.tensors, other.tensors, add_values=False)
        return self

    def __radd__(self, other: Any):
        """Addition operator for when forward addition returned `NotImplemented`.

        For example, doing something like `None + Loss()` will use __radd__,
        whereas doing `Loss() + None` will use __add__.
        """
        if other is None:
            return self
        elif other == 0:
            return self
        if isinstance(other, Tensor):
            # TODO: Other could be a loss tensor, maybe create a Loss object for it?
            pass
        return NotImplemented

    def __mul__(self, factor: Union[float, Tensor]) -> "Loss":
        """Scale each loss tensor by `coefficient`.

        Returns
        -------
        Loss
            returns a scaled Loss instance.
        """
        result = Loss(
            name=self.name,
            loss=self.loss * factor,
            losses={k: value * factor for k, value in self.losses.items()},
            metrics=self.metrics,
            tensors=self.tensors,
            _coefficient=self._coefficient * factor,
        )
        return result

    def __rmul__(self, factor: Union[float, Tensor]) -> "Loss":
        # assert False, f"rmul: {factor}"
        return self.__mul__(factor)

    def __truediv__(self, coefficient: Union[float, Tensor]) -> "Loss":
        return self * (1 / coefficient)

    @property
    def unscaled_losses(self):
        """Recovers the 'unscaled' version of this loss.

        TODO: This isn't used anywhere. We could probably remove it.
        """
        return {k: value / self._coefficient for k, value in self.losses.items()}

    def to_log_dict(self, verbose: bool = False) -> Dict[str, Union[str, float, Dict]]:
        """Creates a dictionary to be logged (e.g. by `wandb.log`).

        Args:
            verbose (bool, optional): Wether to include a lot of information, or
            to only log the 'essential' stuff. See the `cleanup` function for
            more info. Defaults to False.

        Returns:
            Dict: A dict containing the things to be logged.
        """
        # TODO: Could also produce some wandb plots and stuff here when verbose?
        log_dict: Dict[str, Union[str, float, Dict, Tensor]] = {}
        # log_dict["loss"] = round(float(self.loss), 6)
        # Preserving the Torch Dtype, if present.
        log_dict["loss"] = self.loss

        for name, metric in self.metrics.items():
            if isinstance(metric, Serializable):
                log_dict[name] = metric.to_log_dict(verbose=verbose)
            else:
                log_dict[name] = metric

        for name, loss in self.losses.items():
            if isinstance(loss, Serializable):
                log_dict[name] = loss.to_log_dict(verbose=verbose)
            else:
                log_dict[name] = loss

        log_dict = add_prefix(log_dict, prefix=self.name, sep="/")
        keys_to_remove: List[str] = []
        if not verbose:
            # when NOT verbose, remove any entries with this matching key.
            # TODO: add/remove keys here if you want to customize what doesn't get logged to wandb.
            # TODO: Could maybe make this a class variable so that it could be
            # extended/overwritten, but that sounds like a bit too much rn.
            keys_to_remove = [
                "n_samples",
                "name",
                "confusion_matrix",
                "class_accuracy",
                "_coefficient",
            ]
        result = cleanup(log_dict, keys_to_remove=keys_to_remove, sep="/")
        return result

    def to_pbar_message(self) -> Dict[str, float]:
        """Smaller, less-detailed version of `to_log_dict()` for progress bars."""
        # NOTE: PL actually doesn't seem to accept strings as values
        message: Dict[str, Union[str, float]] = {}
        message["Loss"] = float(self.loss)

        for name, metric in self.metrics.items():
            if isinstance(metric, Metrics):
                message[name] = metric.to_pbar_message()
            else:
                message[name] = metric

        for name, loss_info in self.losses.items():
            message[name] = loss_info.to_pbar_message()

        message = add_prefix(message, prefix=self.name, sep=" ")

        return cleanup(message, sep=" ")

    def clear_tensors(self) -> None:
        """Clears the `tensors` attribute of `self` and of sublosses.

        NOTE: This could be useful if you want to save some space/compute, but
        it isn't being used atm, and there's no issue. You might want to call
        this if you are storing big tensors (or passing them to the constructor)
        """
        self.tensors.clear()
        for _, loss in self.losses.items():
            loss.clear_tensors()
        return self

    def absorb(self, other: "Loss") -> None:
        """Absorbs `other` into `self`, merging the losses and metrics.

        Args:
            other (Loss): Another loss to 'merge' into this one.
        """
        new_name = self.name
        old_name = other.name
        # Here we create a new 'other' and use __iadd__ to merge the attributes.
        new_other = Loss(name=new_name)
        new_other.loss = other.loss
        # We also replace the name in the keys, if present.
        new_other.metrics = {k.replace(old_name, new_name): v for k, v in other.metrics.items()}
        new_other.losses = {k.replace(old_name, new_name): v for k, v in other.losses.items()}
        self += new_other

    def all_metrics(self) -> Dict[str, Metrics]:
        """Returns a 'cleaned up' dictionary of all the Metrics objects."""
        assert self.name
        result: Dict[str, Metrics] = {}
        result.update(self.metrics)

        for name, loss in self.losses.items():
            # TODO: Aren't we potentially colliding with 'self.metrics' here?
            subloss_metrics = loss.all_metrics()
            for key, metric in subloss_metrics.items():
                assert key not in result, (
                    f"Collision in metric keys of subloss {name}: key={key}, " f"result={result}"
                )
                result[key] = metric
        result = add_prefix(result, prefix=self.name, sep="/")
        return result


if __name__ == "__main__":
    import doctest

    doctest.testmod()


================================================
FILE: sequoia/common/loss_test.py
================================================
"""
TODO: Write some tests that also help illustrate how the Loss class works.
"""
from .loss import Loss


def test_demo():
    """Simple test to demonstrate addition of Loss objects."""
    loss = Loss("total")
    loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
    loss += Loss("task_b", loss=2.10)
    loss += Loss("task_c", loss=3.00)
    # Get a dict to be logged, for example with wandb.
    loss_dict = loss.to_log_dict()
    assert loss_dict == {
        "total/loss": 6.33,
        "total/task_a/loss": 1.23,
        "total/task_a/accuracy": 0.95,
        "total/task_b/loss": 2.1,
        "total/task_c/loss": 3.0,
    }


def test_all_metrics():
    """Using `all_metrics()` gives a dict of all the metrics in the Loss."""
    loss = Loss("total")
    loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
    loss += Loss("task_b", loss=2.10)
    loss += Loss("task_c", loss=3.00)
    assert loss.all_metrics() == {
        "total/task_a/accuracy": 0.95,
    }


def test_to_log_dict_order():
    """Simple test to demonstrate addition of Loss objects."""
    task_a_loss = Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
    task_b_loss = Loss("task_b", loss=2.10)
    task_c_loss = Loss("task_c", loss=3.00)
    total_loss = Loss("total") + task_a_loss + task_b_loss + task_c_loss
    loss_dict = total_loss.to_log_dict()
    assert loss_dict == {
        "total/loss": 6.33,
        "total/task_a/loss": 1.23,
        "total/task_a/accuracy": 0.95,
        "total/task_b/loss": 2.1,
        "total/task_c/loss": 3.0,
    }


================================================
FILE: sequoia/common/metrics/__init__.py
================================================
from .classification import ClassificationMetrics
from .get_metrics import get_metrics
from .metrics import Metrics, MetricsType
from .metrics_utils import accuracy, class_accuracy, get_class_accuracy, get_confusion_matrix
from .regression import RegressionMetrics
from .rl_metrics import EpisodeMetrics, GradientUsageMetric


================================================
FILE: sequoia/common/metrics/classification.py
================================================
""" Metrics class for classification.

Gives the accuracy, the class accuracy, and the confusion matrix for a given set
of (raw/pre-activation) logits Tensor `y_pred` and the class labels `y`. 
"""
from dataclasses import InitVar, dataclass
from typing import Dict, Optional, Union

import numpy as np
import torch
from simple_parsing import field
from torch import Tensor

from sequoia.utils.serialization import detach, move

from .metrics import Metrics
from .metrics_utils import get_accuracy, get_class_accuracy, get_confusion_matrix

# TODO: Might be a good idea to add a `task` attribute to Metrics or
# Loss objects, in order to check that we aren't adding the class
# accuracies or confusion matrices from different tasks by accident.
# We could also maybe add them but fuse them properly, for instance by
# merging the class accuracies and confusion matrices?
#
# For example, if a first metric has class accuracy [0.1, 0.5]
# (n_samples=100) and from a task with classes [0, 1] is added to a
# second Metrics with class accuracy [0.9, 0.8] (n_samples=100) for task
# with classes [0,3], the resulting Metrics object would have a
# class_accuracy of [0.5 (from (0.1+0.9)/2 = 0.5), 0.5, 0 (no data), 0.8]
# n_samples would then also have to be split on a per-class basis.
# n_samples could maybe be just the sum of the confusion matrix entries?
#
# As for the confusion matrices, they could be first expanded to fit the
# range of both by adding empty columns/rows to each and then be added
# together.


@dataclass
class ClassificationMetrics(Metrics):
    # fields we generate from the confusion matrix (if provided) or from the
    # forward pass tensors.
    accuracy: float = 0.0
    confusion_matrix: Optional[Union[Tensor, np.ndarray]] = field(
        default=None, repr=False, compare=False
    )
    class_accuracy: Optional[Union[Tensor, np.ndarray]] = field(
        default=None, repr=False, compare=False
    )

    # Optional arguments used to create the attributes of the metrics above.
    # NOTE: These wont become attributes on the object, just args to postinit.
    x: InitVar[Optional[Tensor]] = None
    h_x: InitVar[Optional[Tensor]] = None
    logits: InitVar[Optional[Tensor]] = None
    y_pred: InitVar[Optional[Tensor]] = None
    y: InitVar[Optional[Tensor]] = None
    num_classes: InitVar[Optional[int]] = None

    def __post_init__(
        self,
        x: Tensor = None,
        h_x: Tensor = None,
        logits: Tensor = None,
        y_pred: Tensor = None,
        y: Tensor = None,
        num_classes: int = None,
    ):

        super().__post_init__(x=x, h_x=h_x, logits=logits, y_pred=y_pred, y=y)

        if (
            self.confusion_matrix is None
            and (y_pred is not None or logits is not None)
            and y is not None
        ):
            self.confusion_matrix = get_confusion_matrix(
                y_pred=logits if logits is not None else y_pred, y=y, num_classes=num_classes
            )

        # TODO: add other useful metrics (potentially ones using x or h_x?)
        if self.confusion_matrix is not None:
            self.accuracy = get_accuracy(self.confusion_matrix)
            self.accuracy = round(self.accuracy, 6)
            self.class_accuracy = get_class_accuracy(self.confusion_matrix)

    @property
    def objective_name(self) -> str:
        return "Accuracy"

    def __add__(self, other: "ClassificationMetrics") -> "ClassificationMetrics":
        confusion_matrix: Optional[Tensor] = None
        if self.n_samples == 0:
            return other
        if not isinstance(other, ClassificationMetrics):
            return NotImplemented

        # Create the 'sum' confusion matrix:
        confusion_matrix: Optional[np.ndarray] = None
        if self.confusion_matrix is None and other.confusion_matrix is not None:
            confusion_matrix = other.confusion_matrix.clone()
        elif other.confusion_matrix is None:
            confusion_matrix = self.confusion_matrix.clone()
        else:
            confusion_matrix = self.confusion_matrix + other.confusion_matrix

        result = ClassificationMetrics(
            n_samples=self.n_samples + other.n_samples,
            confusion_matrix=confusion_matrix,
            num_classes=self.num_classes,
        )
        return result

    def to_log_dict(self, verbose=False):
        log_dict = super().to_log_dict(verbose=verbose)
        log_dict["accuracy"] = self.accuracy
        if verbose:
            # Maybe add those as plots, rather than tensors?
            log_dict["class_accuracy"] = self.class_accuracy
            log_dict["confusion_matrix"] = self.confusion_matrix
        return log_dict

    # def __str__(self):
    #     s = super().__str__()
    #     s = s.replace(f"accuracy={self.accuracy}", f"accuracy={self.accuracy:.3%}")
    #     return s

    def to_pbar_message(self) -> Dict[str, Union[str, float]]:
        message = super().to_pbar_message()
        message["acc"] = float(self.accuracy)
        return message

    def detach(self) -> "ClassificationMetrics":
        return ClassificationMetrics(
            n_samples=detach(self.n_samples),
            accuracy=float(self.accuracy),
            class_accuracy=detach(self.class_accuracy),
            confusion_matrix=detach(self.confusion_matrix),
        )

    def to(self, device: Union[str, torch.device]) -> "ClassificationMetrics":
        """Returns a new Metrics with all the attributes 'moved' to `device`."""
        return ClassificationMetrics(
            n_samples=move(self.n_samples, device),
            accuracy=move(self.accuracy, device),
            class_accuracy=move(self.class_accuracy, device),
            confusion_matrix=move(self.confusion_matrix, device),
        )

    @property
    def objective(self) -> float:
        return float(self.accuracy)

    # def __lt__(self, other: Union["ClassificationMetrics", Any]) -> bool:
    #     if isinstance(other, ClassificationMetrics):
    #         return self.accuracy < other.accuracy
    #     return NotImplemented

    # def __ge__(self, other: Union["ClassificationMetrics", Any]) -> bool:
    #     if isinstance(other, ClassificationMetrics):
    #         return self.accuracy >= other.accuracy
    #     return NotImplemented

    # def __eq__(self, other: Union["ClassificationMetrics", Any]) -> bool:
    #     if isinstance(other, ClassificationMetrics):
    #         return self.accuracy == other.accuracy and self.n_samples == other.n_samples
    #     return NotImplemented


================================================
FILE: sequoia/common/metrics/classification_test.py
================================================
import numpy as np
import torch

from .classification import ClassificationMetrics
from .get_metrics import get_metrics


def test_classification_metrics_add_properly():
    y_pred = torch.as_tensor(
        [
            [0.01, 0.90, 0.09],
            [0.01, 0, 0.99],
            [0.01, 0, 0.99],
        ]
    )
    y = torch.as_tensor(
        [
            1,
            2,
            0,
        ]
    )
    m1 = ClassificationMetrics(y_pred=y_pred, y=y)
    assert m1.n_samples == 3
    assert np.isclose(m1.accuracy, 2 / 3)

    y_pred = torch.as_tensor(
        [
            [0.01, 0.90, 0.09],
            [0.01, 0, 0.99],
            [0.01, 0, 0.99],
            [0.01, 0, 0.99],
            [0.01, 0, 0.99],
        ]
    )
    y = torch.as_tensor(
        [
            1,
            2,
            2,
            0,
            0,
        ]
    )
    m2 = ClassificationMetrics(y_pred=y_pred, y=y)
    assert m2.n_samples == 5
    assert np.isclose(m2.accuracy, 3 / 5)
    assert all(np.isclose(m2.class_accuracy, [0, 1, 1]))

    m3 = m1 + m2
    assert m3.n_samples == 8
    assert np.isclose(m3.accuracy, 5 / 8)


def test_metrics_from_tensors():
    y_pred = torch.as_tensor(
        [
            [0.01, 0.90, 0.09],
            [0.01, 0, 0.99],
            [0.01, 0, 0.99],
        ]
    )
    y = torch.as_tensor(
        [
            1,
            2,
            0,
        ]
    )
    m = get_metrics(y_pred=y_pred, y=y)
    assert m.n_samples == 3
    assert np.isclose(m.accuracy, 2 / 3)


================================================
FILE: sequoia/common/metrics/get_metrics.py
================================================
""" Defines the get_metrics function with gives back appropriate metrics
for the given tensors.

TODO: Add more metrics! Maybe even fancy things that are based on the
hidden vectors like wasserstein distance, etc?
"""
from typing import List, Optional, Union

import numpy as np
import torch
from torch import Tensor

from sequoia.utils.logging_utils import get_logger

from .classification import ClassificationMetrics
from .metrics import Metrics
from .regression import RegressionMetrics

logger = get_logger(__name__)


def to_optional_tensor(x: Optional[Union[Tensor, np.ndarray, List]]) -> Optional[Tensor]:
    """Converts `x` into a Tensor if `x` is not None, else None."""
    return x if x is None else torch.as_tensor(x)


@torch.no_grad()
def get_metrics(
    y_pred: Union[Tensor, np.ndarray],
    y: Union[Tensor, np.ndarray],
    x: Union[Tensor, np.ndarray] = None,
    h_x: Union[Tensor, np.ndarray] = None,
) -> Optional[Metrics]:
    y = to_optional_tensor(y)
    y_pred = to_optional_tensor(y_pred)
    x = to_optional_tensor(x)
    h_x = to_optional_tensor(h_x)
    if y is not None and y_pred is not None:
        if y.shape != y_pred.shape or not torch.is_floating_point(y):
            # TODO: I think this condition also works for binary classification,
            # at least when the logits have a shape[-1] == 2, but I don't know if it
            # would cause some trouble if there is a single logit, rather than 2.
            return ClassificationMetrics(x=x, h_x=h_x, y_pred=y_pred, y=y)
        return RegressionMetrics(x=x, h_x=h_x, y_pred=y_pred, y=y)
    return None


================================================
FILE: sequoia/common/metrics/metrics.py
================================================
""" Cute little dataclass that is used to describe a given type of Metrics.

This is a bit like the Metrics from pytorch-lightning, but seems easier to use,
as far as I know. Also totally transferable between gpus etc. (Haven't used
the metrics from PL much yet, to be honest).
"""
from dataclasses import dataclass, field, fields
from typing import Any, Dict, TypeVar, Union

import numpy as np
from torch import Tensor

from sequoia.utils.serialization import Serializable

MetricsType = TypeVar("MetricsType", bound="Metrics")


@dataclass
class Metrics(Serializable):
    # This field isn't used in comparisons between Metrics.
    n_samples: int = field(default=0, compare=False)

    # TODO: Refactor this to take any kwargs, and then let each metric type
    # specify its own InitVars.

    def __post_init__(self, **tensors):
        """Creates metrics given `y_pred` and `y`.

        NOTE: Doesn't use `x` and `h_x` for now.

        Args:
            x (Tensor, optional): The input Tensor. Defaults to None.
            h_x (Tensor, optional): The hidden representation for x. Defaults to None.
            y_pred (Tensor, optional): The predicted label. Defaults to None.
            y (Tensor, optional): The true label. Defaults to None.
        """
        # get the batch size:
        for tensor in tensors.values():
            if isinstance(tensor, (np.ndarray, Tensor)) and tensor.shape:
                self.n_samples = tensor.shape[0]
                break

    def __add__(self, other):
        # Instances of the Metrics base class shouldn't be added together, as
        # the subclasses should implement the method. We just return the other.
        return other

    def __radd__(self, other):
        # Instances of the Metrics base class shouldn't be added together, as
        # the subclasses should implement the method. We just return the other.
        if isinstance(other, (int, float)) and other == 0.0:
            return self
        if isinstance(other, Metrics) and type(self) is Metrics:
            assert self.n_samples == 0
            return other
        return NotImplemented

    def __mul__(self, factor: Union[float, Tensor]) -> "Loss":
        # By default, multiplying or dividing a Metrics object doesn't change
        # anything about it.
        return self

    def __rmul__(self, factor: Union[float, Tensor]) -> "Loss":
        # Reverse-order multiply, used to do b * a when a * b returns
        # NotImplemented.
        return self.__mul__(factor)

    def __truediv__(self, coefficient: Union[float, Tensor]) -> "Metrics":
        # By default, multiplying or dividing a Metrics object doesn't change
        # anything about it.
        return self

    def to_log_dict(self, verbose: bool = False) -> Dict:
        """Creates a dictionary to be logged (e.g. by `wandb.log`).

        Args:
            verbose (bool, optional): Wether to include a lot of information, or
            to only log the 'essential' metrics. See the `cleanup` function for
            more info. Defaults to False.

        Returns:
            Dict: A dict containing the things to be logged.

        TODO: Maybe create a `make_plots()` method to get wandb plots from the
        metric?
        """
        log_dict = {}
        for field in fields(self):
            if not (field.repr or verbose):
                continue  # skip field.
            value = getattr(self, field.name)
            if isinstance(value, Metrics):
                log_dict[field.name] = value.to_log_dict(verbose=verbose)
            else:
                log_dict[field.name] = value
        return log_dict

        return {f.name: getattr(self, f.name) for f in fields(self) if f.repr or verbose}

        if verbose:
            return {"n_samples": self.n_samples}
        return {}

    def to_pbar_message(self) -> Dict[str, Union[str, float]]:
        return {}

    def numpy(self):
        """Returns a new object with all the tensor fields converted to numpy arrays."""

        def to_numpy(val: Any):
            if isinstance(val, Tensor):
                return val.detach().cpu().numpy()
            if isinstance(val, (list, tuple)):
                return np.array(val)
            return val

        return type(self)(**{name: to_numpy(val) for name, val in self.items()})

    @property
    def objective(self) -> float:
        """Returns the 'main' metric from this object, as a float.

        Returns
        -------
        float
            The most important metric from this object, as a float.
        """
        return 0
        # raise NotImplementedError(f"TODO: Add the 'objective' property to class {type(self)}")

    @property
    def objective_name(self) -> str:
        """Returns the name to be associated with the objective of this class.

        Returns
        -------
        float
            The name associated with the objective.
        """
        raise NotImplementedError(f"TODO: Add the 'objective_name' property to class {type(self)}")


================================================
FILE: sequoia/common/metrics/metrics_utils.py
================================================
""" Utility functions for calculating metrics. """
from typing import Union

import numpy as np
import torch
from torch import Tensor


@torch.no_grad()
def get_confusion_matrix(
    y_pred: Union[np.ndarray, Tensor], y: Union[np.ndarray, Tensor], num_classes: int = None
) -> Union[Tensor, np.ndarray]:
    """Taken from https://discuss.pytorch.org/t/how-to-find-individual-class-accuracy/6348

    NOTE: `y_pred` is assumed to be the logits with shape [B, C], while the
    labels `y` is assumed to have shape either `[B]` or `[B, 1]`, unless `num_classes`
    is given, in which case y_pred can be the predicted labels.
    """
    if isinstance(y_pred, Tensor):
        y_pred = y_pred.detach().cpu().numpy()
    if isinstance(y, Tensor):
        y = y.detach().cpu().numpy()

    # FIXME: How do we properly check if something is an integer type in np?
    if len(y_pred.shape) == 1 and y_pred.dtype not in {np.float32, np.float64}:
        # y_pred is already the predicted labels.
        y_preds = y_pred
        if num_classes is None:
            raise NotImplementedError(
                f"Can't determine the number of classes. Pass logits rather than predicted labels."
            )
        n_classes = num_classes
    elif y_pred.shape[-1] == 1:
        n_classes = 2  # y_pred is the logit for binary classification.
        y_preds = y_pred.round()
    else:
        # y_pred is assumed to be the logits.
        n_classes = y_pred.shape[-1]
        y_preds = y_pred.argmax(-1)

    y = y.flatten().astype(int)
    y_preds = y_preds.flatten().astype(int)

    # BUG: This is failing on the last batch.
    assert y.shape == y_preds.shape, (y.shape, y_preds.shape)
    # assert y.dtype == y_preds.dtype == np.int, (y.dtype, y_preds.dtype)

    confusion_matrix = np.zeros([n_classes, n_classes])

    assert 0 <= y.min() and y.max() < n_classes, (y, n_classes)
    assert 0 <= y_preds.min() and y_preds.max() < n_classes, (y_preds, n_classes)

    for y_t, y_p in zip(y, y_preds):
        confusion_matrix[y_t, y_p] += 1
    return confusion_matrix


@torch.no_grad()
def accuracy(y_pred: Union[Tensor, np.ndarray], y: Union[Tensor, np.ndarray]) -> float:
    confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y)
    batch_size = y_pred.shape[0]
    _, predicted = y_pred.max(-1)
    acc = (predicted == y).sum(dtype=float) / batch_size
    return acc.item()


@torch.no_grad()
def get_accuracy(confusion_matrix: Union[Tensor, np.ndarray]) -> float:
    if isinstance(confusion_matrix, Tensor):
        diagonal = confusion_matrix.diag()
    else:
        diagonal = np.diag(confusion_matrix)
    return (diagonal.sum() / confusion_matrix.sum()).item()


@torch.no_grad()
def class_accuracy(y_pred: Tensor, y: Tensor) -> Tensor:
    confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y)
    return get_class_accuracy(confusion_mat)


@torch.no_grad()
def get_class_accuracy(confusion_matrix: Tensor) -> Tensor:
    if isinstance(confusion_matrix, Tensor):
        diagonal = confusion_matrix.diag()
    else:
        diagonal = np.diag(confusion_matrix)
    sum_of_columns = confusion_matrix.sum(1)
    if isinstance(confusion_matrix, Tensor):
        sum_of_columns.clamp_(min=1e-10)
    else:
        sum_of_columns = sum_of_columns.clip(min=1e-10)
    return diagonal / sum_of_columns


================================================
FILE: sequoia/common/metrics/metrics_utils_test.py
================================================
import numpy as np
import torch

from .metrics_utils import accuracy, class_accuracy, get_confusion_matrix


def test_accuracy():
    y_pred = torch.as_tensor(
        [
            [0.01, 0.90, 0.09],
            [0.01, 0, 0.99],
            [0.01, 0, 0.99],
        ]
    )
    y = torch.as_tensor(
        [
            1,
            2,
            0,
        ]
    )
    assert np.isclose(accuracy(y_pred, y), 2 / 3)


def test_per_class_accuracy_perfect():
    y_pred = torch.as_tensor(
        [
            [0.1, 0.9, 0.0],
            [0.1, 0.0, 0.9],
            [0.1, 0.4, 0.5],
            [0.9, 0.1, 0.0],
        ]
    )
    y = torch.as_tensor(
        [
            1,
            2,
            2,
            0,
        ]
    )
    expected = [1, 1, 1]
    class_acc = class_accuracy(y_pred, y).tolist()
    assert class_acc == expected


def test_per_class_accuracy_zero():
    y_pred = torch.as_tensor(
        [
            [0.1, 0.9, 0.0],
            [0.1, 0.9, 0.0],
            [0.1, 0.9, 0.0],
            [0.1, 0.9, 0.0],
        ]
    )
    y = torch.as_tensor(
        [
            0,
            0,
            0,
            0,
        ]
    )
    expected = [0, 0, 0]
    class_acc = class_accuracy(y_pred, y).tolist()
    assert class_acc == expected


def test_confusion_matrix():
    y_pred = torch.as_tensor(
        [
            [0.1, 0.9, 0.0],
            [0.1, 0.4, 0.5],
            [0.1, 0.9, 0.0],
            [0.9, 0.0, 0.1],
        ]
    )
    y = torch.as_tensor(
        [
            0,
            0,
            1,
            0,
        ]
    )
    expected = [
        [1, 1, 1],
        [0, 1, 0],
        [0, 0, 0],
    ]
    confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y).tolist()
    assert confusion_mat == expected


def test_per_class_accuracy_realistic():
    y_pred = torch.as_tensor(
        [
            [0.9, 0.0, 0.0],  # correct for class 0
            [0.1, 0.5, 0.4],  # correct for class 1
            [0.1, 0.0, 0.9],  # correct for class 2
            [0.1, 0.8, 0.1],  # wrong, should be 1
            [0.1, 0.0, 0.9],  # wrong, should be 0
            [0.9, 0.0, 0.0],  # wrong, should be 1
            [0.1, 0.5, 0.4],  # wrong, should be 2
            [0.1, 0.4, 0.5],  # correct for class 2
        ]
    )
    y = torch.as_tensor(
        [
            0,
            1,
            2,
            0,
            0,
            1,
            2,
            2,
        ]
    )
    expected = [1 / 3, 1 / 2, 2 / 3]
    class_acc = class_accuracy(y_pred, y).tolist()
    assert all(np.isclose(class_acc, expected))


================================================
FILE: sequoia/common/metrics/regression.py
================================================
""" Metrics class for regression.

Gives the mean squared error between a prediction Tensor `y_pred` and the
target tensor `y`. 
"""

from dataclasses import InitVar, dataclass
from functools import total_ordering
from typing import Any, Dict, Optional, Union

import torch
import torch.nn.functional as functional
from torch import Tensor

from sequoia.utils.logging_utils import get_logger

from .metrics import Metrics

logger = get_logger(__name__)


@total_ordering
@dataclass
class RegressionMetrics(Metrics):
    """TODO: Use this in the RL settings!"""

    mse: Tensor = 0.0  # type: ignore
    l1_error: Tensor = 0.0  # type: ignore

    x: InitVar[Optional[Tensor]] = None
    h_x: InitVar[Optional[Tensor]] = None
    y_pred: InitVar[Optional[Tensor]] = None
    y: InitVar[Optional[Tensor]] = None

    def __post_init__(
        self, x: Tensor = None, h_x: Tensor = None, y_pred: Tensor = None, y: Tensor = None
    ):
        super().__post_init__(x=x, h_x=h_x, y_pred=y_pred, y=y)
        if y_pred is not None and y is not None:
            if y.shape != y_pred.shape:
                logger.warning(
                    UserWarning(
                        f"Shapes aren't the same! (y_pred.shape={y_pred.shape}, "
                        f"y.shape={y.shape}"
                    )
                )
            else:
                self.mse = functional.mse_loss(y_pred, y)
                self.l1_error = functional.l1_loss(y_pred, y)

        self.mse = torch.as_tensor(self.mse)
        self.l1_error = torch.as_tensor(self.l1_error)

    @property
    def objective(self) -> float:
        return float(self.mse)

    def __add__(self, other: "RegressionMetrics") -> "RegressionMetrics":
        # NOTE: Creates new tensors, and links them to the previous ones by
        # addition so the grads are linked.
        if self.mse is not None:
            mse = self.mse.clone()
        if other.mse is not None:
            mse = other.mse.clone()
        else:
            mse = torch.zeros(1)

        if self.l1_error is not None:
            l1_error = self.l1_error.clone()
        if other.l1_error is not None:
            l1_error = other.l1_error.clone()
        else:
            l1_error = torch.zeros(1)

        return RegressionMetrics(
            n_samples=self.n_samples + other.n_samples,
            mse=mse,
            l1_error=l1_error,
        )

    def to_pbar_message(self) -> Dict[str, Union[str, float]]:
        message = super().to_pbar_message()
        message["mse"] = float(self.mse.item())
        message["l1_error"] = float(self.l1_error.item())
        return message

    def to_log_dict(self, verbose=False):
        log_dict = super().to_log_dict(verbose=verbose)
        log_dict["mse"] = self.mse
        log_dict["l1_error"] = self.l1_error
        return log_dict

    def __mul__(self, factor: Union[float, Tensor]) -> "Loss":
        # Multiplying a 'RegressionMetrics' object multiplies its 'mse'.
        return RegressionMetrics(
            n_samples=self.n_samples,
            mse=self.mse * factor,
            l1_error=self.l1_error * factor,
        )

    def __rmul__(self, factor: Union[float, Tensor]) -> "Loss":
        # Reverse-order multiply, used to do b * a when a * b returns
        # NotImplemented.
        return self.__mul__(factor)

    def __truediv__(self, coefficient: Union[float, Tensor]) -> "RegressionMetrics":
        # Dividing a RegressionMetrics object divides its mean squared error.
        return RegressionMetrics(
            n_samples=self.n_samples,
            mse=self.mse / coefficient,
            l1_error=self.l1_error / coefficient,
        )

    def __lt__(self, other: Union["RegressionMetrics", Any]) -> bool:
        if isinstance(other, RegressionMetrics):
            return self.mse < other.mse
        return NotImplemented

    def __ge__(self, other: Union["RegressionMetrics", Any]) -> bool:
        if isinstance(other, RegressionMetrics):
            return self.mse >= other.mse
        return NotImplemented


================================================
FILE: sequoia/common/metrics/rl_metrics.py
================================================
from dataclasses import dataclass, field
from typing import Any, Dict, Union

from .metrics import Metrics


@dataclass
class EpisodeMetrics(Metrics):
    """Metrics for Episodes in RL.

    n_samples is the number of stored episodes.
    """

    n_samples: int = field(default=1, compare=False)
    # The average reward per episode.
    mean_episode_reward: float = 0.0
    # The average length of each episode.
    mean_episode_length: float = 0

    @property
    def n_episodes(self) -> int:
        return self.n_samples

    @property
    def objective_name(self) -> str:
        """Returns the name to be associated with the objective of this class.

        Returns
        -------
        str
            The name associated with the objective.
        """
        return "Mean Reward per Episode"

    @property
    def mean_reward_per_step(self) -> float:
        return self.mean_episode_reward / self.mean_episode_length

    def __add__(self, other: Union["EpisodeMetrics", Any]):
        if isinstance(other, (int, float)) and other == 0:
            # This makes `sum(list_of_metrics)` work!.
            return self
        if isinstance(other, Metrics) and other == Metrics():
            return self
        if not isinstance(other, EpisodeMetrics):
            return NotImplemented

        other: EpisodeMetrics
        other_total_reward = other.mean_episode_reward * other.n_samples
        other_total_length = other.mean_episode_length * other.n_samples
        self_total_reward = self.mean_episode_reward * self.n_samples
        self_total_length = self.mean_episode_length * self.n_samples

        new_n_samples = self.n_samples + other.n_samples
        new_mean_reward = (self_total_reward + other_total_reward) / new_n_samples
        new_mean_length = (self_total_length + other_total_length) / new_n_samples

        return EpisodeMetrics(
            n_samples=new_n_samples,
            mean_episode_reward=new_mean_reward,
            mean_episode_length=new_mean_length,
        )

    @property
    def total_reward(self) -> float:
        return self.n_episodes * self.mean_episode_reward

    @property
    def total_steps(self) -> int:
        return round(self.n_episodes * self.mean_episode_length)

    def to_pbar_message(self) -> Dict[str, Union[str, float]]:
        return self.to_log_dict()

    @property
    def objective(self) -> float:
        return self.mean_episode_reward

    def to_log_dict(self, verbose: bool = False):
        log_dict = {
            "Episodes": self.n_episodes,
            "Mean reward per episode": self.mean_episode_reward,
            "Mean reward per step": self.mean_reward_per_step,
        }
        if verbose:
            log_dict.update(
                {
                    "Total steps": int(self.total_steps),
                    "Total reward": int(self.total_reward),
                    "Mean episode length": float(self.mean_episode_length),
                }
            )
        return log_dict

    @property
    def episodes(self) -> int:
        return self.n_samples

    @property
    def mean_reward_per_episode(self) -> float:
        return self.mean_episode_reward


# @dataclass
# class RLMetrics(Metrics):
#     episodes: List[EpisodeMetrics] = field(default_factory=list, repr=False)

#     average_episode_length: int = field(default=0)
#     average_episode_reward: float = field(default=0.)

#     def __post_init__(self):
#         if self.episodes:
#             self.n_samples = len(self.episodes)
#             self.average_episode_length = sum(ep.episode_length for ep in self.episodes) / self.n_samples
#             self.average_episode_reward = sum(ep.total_reward for ep in self.episodes) / self.n_samples

#     def __add__(self, other: Union["RLMetrics", EpisodeMetrics, Any]) -> "RLMetrics":
#         if isinstance(other, RLMetrics):
#             return RLMetrics(
#                 episodes = self.episodes + other.episodes,
#             )
#         if isinstance(other, EpisodeMetrics):
#             self.episodes.append(other)
#             return self
#         return NotImplemented

#     def to_pbar_message(self) -> Dict[str, Union[str, float]]:
#         log_dict = self.to_log_dict()
#         # Rename "n_samples" to "episodes":
#         log_dict["episodes"] = log_dict.pop("n_samples")
#         return log_dict


@dataclass
class GradientUsageMetric(Metrics):
    """Small Metrics to report the fraction of gradients that were used vs
    'wasted', when using batch_size > 1.
    """

    used_gradients: int = 0
    wasted_gradients: int = 0
    used_gradients_fraction: float = 0.0

    def __post_init__(self):
        self.n_samples = self.used_gradients + self.wasted_gradients
        if self.n_samples:
            self.used_gradients_fraction = self.used_gradients / self.n_samples

    def __add__(self, other: Union["GradientUsageMetric", Any]) -> "GradientUsageMetric":
        if not isinstance(other, GradientUsageMetric):
            return NotImplemented
        return GradientUsageMetric(
            used_gradients=self.used_gradients + other.used_gradients,
            wasted_gradients=self.wasted_gradients + other.wasted_gradients,
        )

    def to_pbar_message(self) -> Dict[str, Union[str, float]]:
        return {"used_fraction": self.used_gradients_fraction}


================================================
FILE: sequoia/common/replay.py
================================================
""" Labeled, Unlabeled and Semi-supervised Replay buffer objects.

TODO: Unused for now, but could be used in a LightningModule.
"""
import random
from collections import Counter, deque
from dataclasses import dataclass
from typing import *

import torch
from simple_parsing import field
from torch import Tensor
from torch.utils.data import TensorDataset

from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import Pickleable, Serializable

logger = get_logger(__name__)
T = TypeVar("T")


class ReplayBuffer(deque, Deque[T], Pickleable):
    """Simple implementation of a replay buffer.

    Uses a doubly-ended Queue, which unfortunately isn't registered as a buffer
    for pytorch.
    """

    def __init__(self, capacity: int):
        super().__init__(maxlen=capacity)
        # self.extend("ABC")
        self.capacity: int = capacity
        # TODO: figure out how to persist the buffer with state_dict maybe?
        # self.register_buffer("memory", torch.zeros(1))
        self.labeled: Optional[bool] = None
        self.current_size: int = 0

    def as_dataset(self) -> TensorDataset:
        contents = zip(*self)
        return TensorDataset(*map(torch.stack, contents))

    def _push_and_sample(self, *values: T, size: int) -> List[T]:
        """Pushes `values` into the buffer and samples `size` samples from it.

        NOTE: In contrast to `push`, allows sampling more than `len(self)`
        samples from the buffer (up to `len(self) + len(values)`)

        Args:
            *values (T): An iterable of items to push.
            size (int): Number of samples to take.
        """
        extended = list(self)
        extended.extend(values)
        # NOTE: Type hints indicate that random.shuffle expects a list, not
        # a deque. Seems to work just fine though.
        random.shuffle(extended)  # type: ignore
        assert size <= len(
            extended
        ), f"Asked to sample {size} values, while there are only {len(extended)} in the batch + buffer!"

        self.extend(extended)
        return extended[:size]

    def _sample(self, size: int) -> List[T]:
        assert size <= len(
            self
        ), f"Asked to sample {size} values while there are only {len(self)} in the buffer!"
        return random.sample(self, size)

    @property
    def full(self) -> bool:
        return len(self) == self.capacity


class UnlabeledReplayBuffer(ReplayBuffer[Tensor]):
    def sample_batch(self, size: int) -> Tensor:
        batch = super()._sample(size)
        return torch.stack(batch)

    def push(self, x_batch: Tensor, y_batch: Tensor = None) -> None:
        super().extend(x_batch)

    def push_and_sample(self, x_batch: Tensor, y_batch: Tensor = None, size: int = None) -> Tensor:
        size = x_batch.shape[0] if size is None else size
        return torch.stack(super()._push_and_sample(x_batch, size=size))


class LabeledReplayBuffer(ReplayBuffer[Tuple[Tensor, Tensor]]):
    def sample(self, size: int) -> Tuple[Tensor, Tensor]:
        list_of_pairs = super()._sample(size)
        data_list, target_list = zip(*list_of_pairs)
        return torch.stack(data_list), torch.stack(target_list)

    def push(self, x_batch: Tensor, y_batch: Tensor) -> None:
        super().extend(zip(x_batch, y_batch))

    def push_and_sample(
        self, x_batch: Tensor, y_batch: Tensor, size: int = None
    ) -> Tuple[Tensor, Tensor]:
        size = x_batch.shape[0] if size is None else size
        list_of_pairs = super()._push_and_sample(*zip(x_batch, y_batch), size=size)
        data_list, target_list = zip(*list_of_pairs)
        return torch.stack(data_list), torch.stack(target_list)

    def samples_per_class(self) -> Dict[int, int]:
        """Returns a Counter showing how many samples there are per class."""
        # TODO: Idea, could use the None key for unlabeled replay buffer.
        return Counter(int(y) for x, y in self)


class SemiSupervisedReplayBuffer(object):
    def __init__(self, labeled_capacity: int, unlabeled_capacity: int = 0):
        """Semi-Supervised (ish) version of a replay buffer.
        With the default parameters, acts just like a regular replay buffer.

        When passed `unlabeled_capacity`, allows for storing unlabeled samples
        as well as labeled samples. Unlabeled samples are stored in a different
        buffer than labeled samples.

        Allows sampling both labeled and unlabeled samples.

        Args:
            labeled_capacity (int): [description]
            unlabeled_capacity (int, optional): [description]. Defaults to 0.
        """
        super().__init__()
        self.labeled_capacity = labeled_capacity
        self.unlabeled_capacity = unlabeled_capacity

        self.labeled = LabeledReplayBuffer(labeled_capacity)
        self.unlabeled = UnlabeledReplayBuffer(unlabeled_capacity)

    def sample(self, size: int) -> Tuple[Tensor, Tensor]:
        """Takes `size` (labeled) samples from the buffer.

        Args:
            size (int): Number of samples to return.

        Returns:
            Tuple[Tensor, Tensor]: batched data and label tensors.
        """
        assert size <= len(self.labeled), (
            f"Asked to sample {size} values while there are only "
            f"{len(self.labeled)} labeled samples in the buffer! "
        )
        return self.labeled.sample(size)

    def sample_unlabeled(self, size: int, take_from_labeled_buffer_first: bool = None) -> Tensor:
        """Samples `size` unlabeled samples.

        Can also use samples from the labeled replay buffer (while discarding
        the labels) if there is no unlabeled replay buffer.

        Args:
            size (int): Number of x's to sample
            take_from_labeled_buffer_first (bool, optional):
                When `None` (default), doesn't take any samples from the labeled
                buffer.
                When `True`, prioritizes taking samples from the labeled replay
                buffer.
                When `False`, prioritizes taking samples from the unlabeled replay
                buffer, but take the remaining samples from the labeled buffer.

        Returns:
            Tensor: A batch of X's.
        """

        total = len(self.unlabeled)
        if take_from_labeled_buffer_first is not None:
            total += len(self.labeled)

        assert size <= total, (
            f"Asked to sample {size} values while there are only "
            f"{total} unlabeled samples in total in the buffer! "
        )
        # Number of x's we still have to sample.
        samples_left = size
        tensors: List[Tensor] = []

        if take_from_labeled_buffer_first:
            # Take labeled samples and drop the label.
            n_samples_from_labeled = min(len(self.labeled), samples_left)
            if n_samples_from_labeled > 0:
                data, _ = self.labeled.sample(size)
                samples_left -= data.shape[0]
                tensors.append(data)

        # Take the rest of the samples from the unlabeled buffer.
        n_samples_from_labeled = min(len(self.labeled), samples_left)
        data = self.unlabeled.sample_batch(samples_left)
        tensors.append(data)
        samples_left -= data.shape[0]

        if take_from_labeled_buffer_first is False:
            # Take the rest of the labeled samples and drop the label.
            n_samples_from_labeled = min(len(self.labeled), samples_left)
            if n_samples_from_labeled > 0:
                data, _ = self.labeled.sample(size)
                samples_left -= data.shape[0]
                tensors.append(data)

        data = torch.cat(tensors)
        return data

    def push_and_sample(self, x: Tensor, y: Tensor, size: int = None) -> Tuple[Tensor, Tensor]:
        size = x.shape[0] if size is None else size
        self.unlabeled.push(x)
        return self.labeled.push_and_sample(x, y, size=size)

    def push_and_sample_unlabeled(self, x: Tensor, y: Tensor = None, size: int = None) -> Tensor:
        size = x.shape[0] if size is None else size
        if y is not None:
            self.labeled.push(x, y)
        return self.unlabeled.push_and_sample(x, size=size)

    def clear(self):
        self.labeled.clear()
        self.unlabeled.clear()


@dataclass
class ReplayOptions(Serializable):
    """Options related to Replay."""

    # Size of the labeled replay buffer.
    labeled_buffer_size: int = field(0, alias="replay_buffer_size")
    # Size of the unlabeled replay buffer.
    unlabeled_buffer_size: int = 0

    # Always use the replay buffer to help "smooth" out the data stream.
    always_use_replay: bool = False
    # Sampling size, when used as described above to smooth out the data stream.
    # If not given, will use the same value as the batch size.
    sampled_batch_size: Optional[int] = None

    @property
    def enabled(self) -> bool:
        return self.labeled_buffer_size > 0 or self.unlabeled_buffer_size > 0


================================================
FILE: sequoia/common/spaces/__init__.py
================================================
""" Custom `gym.spaces.Space` subclasses used by Sequoia. """
from .image import Image, ImageTensorSpace
from .named_tuple import NamedTuple, NamedTupleSpace
from .space import Space
from .sparse import Sparse
from .tensor_spaces import TensorBox, TensorDiscrete, TensorMultiDiscrete, TensorSpace
from .typed_dict import TypedDictSpace


================================================
FILE: sequoia/common/spaces/image.py
================================================
""" IDEA: Create a subclass of spaces.Box for images.
"""
from typing import Optional, Tuple, Union

import numpy as np
import torch
from gym import spaces
from gym.vector.utils import batch_space

from .space import Space, T
from .tensor_spaces import TensorBox


def could_become_image(space: spaces.Space) -> bool:
    if not isinstance(space, spaces.Box):
        return False
    shape = space.shape
    return len(shape) == 3 and (
        shape[0] == shape[1] and shape[2] in {1, 3} or shape[1] == shape[2] and shape[0] in {1, 3}
    )


class Image(spaces.Box, Space[T]):
    """Subclass of `gym.spaces.Box` for images.

    Comes with a few useful attributes, like `h`, `w`, `c`, `channels_first`,
    `channels_last`, etc.
    """

    def __init__(
        self,
        low: Union[float, np.ndarray],
        high: Union[float, np.ndarray],
        shape: Tuple[int, ...] = None,
        dtype: np.dtype = None,
        **kwargs,
    ):
        if dtype is None:
            if isinstance(low, int) and isinstance(high, int) and low == 0 and high == 255:
                dtype = np.uint8
            else:
                dtype = np.float32
        super().__init__(low=low, high=high, shape=shape, dtype=dtype, **kwargs)
        self.channels_first: bool = False

        # Optional batch dimension
        self.b: Optional[int] = None
        self.h: int
        self.w: int
        self.c: int
        assert len(self.shape) in {3, 4}, "Need three or four dimensions."
        if len(self.shape) == 3:
            self.b = None
            if self.shape[0] in {1, 3}:
                self.c, self.h, self.w = self.shape
                self.channels_first = True
            elif self.shape[-1] in {1, 3}:
                self.h, self.w, self.c = self.shape
            else:
                # NOTE: will assume that in channels_first for now, but won't set
                # `channels_first` property.
                self.c, self.h, self.w = self.shape
        elif len(self.shape) == 4:
            if self.shape[1] in {1, 3}:
                self.b, self.c, self.h, self.w = self.shape
                self.channels_first = True
            elif self.shape[-1] in {1, 3}:
                self.b, self.h, self.w, self.c = self.shape
            else:
                # NOTE: will assume that in channels_first for now:
                self.b, self.c, self.h, self.w = self.shape

        if any(v is None for v in [self.h, self.w, self.c]):
            raise RuntimeError(
                f"Shouldn't be using an Image space, since the shape "
                f"doesn't appear to be an image: {self.shape}"
            )

    @property
    def channels(self) -> int:
        return self.c

    @property
    def height(self) -> int:
        return self.h

    @property
    def width(self) -> int:
        return self.w

    @property
    def batch_size(self) -> Optional[int]:
        return self.b

    @classmethod
    def from_box(cls, box_space: spaces.Box):
        return cls(box_space.low, box_space.high, dtype=box_space.dtype)

    @classmethod
    def wrap(cls, space: Union["Image", spaces.Box]):
        if isinstance(space, Image):
            return space
        if isinstance(space, spaces.Box):
            return cls.from_box(space)
        raise NotImplementedError(space)

    @property
    def channels_last(self) -> bool:
        return not self.channels_first

    def __repr__(self):
        return f"{type(self).__name__}({self.low.min()}, {self.high.max()}, {self.shape}, {self.dtype})"

    def sample(self) -> T:
        return super().sample()


class ImageTensorSpace(Image, TensorBox):
    @classmethod
    def from_box(cls, box_space: TensorBox, device: torch.device = None):
        device = device or box_space.device
        return cls(box_space.low, box_space.high, dtype=box_space.dtype, device=device)

    def __repr__(self):
        return f"{type(self).__name__}({self.low.min()}, {self.high.max()}, {self.shape}, {self.dtype}, device={self.device})"

    def sample(self):
        self.dtype = self._numpy_dtype
        s = super().sample()
        self.dtype = self._torch_dtype
        return torch.as_tensor(s, dtype=self._torch_dtype, device=self.device)


# @to_tensor.register
# def _(space: Image,
#       sample: Union[np.ndarray, Tensor],
#       device: torch.device = None) -> Union[Tensor]:
#     """ Converts a sample from the given space into a Tensor. """
#     return torch.as_tensor(sample, device=device)


@batch_space.register
def _batch_image_space(space: Image, n: int = 1) -> Union[Image, spaces.Box]:
    if space.b is not None:
        # This might happen in BatchedVectorEnv, when creating env_a and env_b,
        # which have an extra batch/chunk dimension.
        if space.b == 1:
            if n == 1:
                return space
            repeats = [n, 1, 1, 1]
        else:
            # instead maybe we should just fall back to a Box Space?
            repeats = [n] + [1] * space.low.ndim
            low, high = np.tile(space.low, repeats), np.tile(space.high, repeats)
            return spaces.Box(low=low, high=high, dtype=space.dtype)

            raise RuntimeError(f"can't batch an already batched image space {space}, n={n}")
    else:
        repeats = [n, 1, 1, 1]
    low, high = np.tile(space.low, repeats), np.tile(space.high, repeats)
    img = type(space)(low=low, high=high, dtype=space.dtype)
    return img


================================================
FILE: sequoia/common/spaces/named_tuple.py
================================================
""" IDEA: Subclass of `gym.spaces.Tuple` that yields namedtuples,
as a bit of a hybrid between `gym.spaces.Dict` and `gym.spaces.Tuple`.
"""
from collections import namedtuple
from collections.abc import Mapping as MappingABC
from typing import Any, Dict, Iterable, List, Mapping, Sequence, Tuple, Type, Union

import numpy as np
from gym import Space, spaces

from sequoia.utils.generic_functions._namedtuple import NamedTuple


class NamedTupleSpace(spaces.Tuple):
    """
    A tuple (i.e., product) of simpler (named) spaces. Samples are namedtuples.

    Example usage:

    ```python
    self.observation_space = NamedTupleSpace(x=spaces.Discrete(2), t=spaces.Discrete(3))
    ```

    Note: here the dtype is actually the type of namedtuple to use, not a
    numpy dtype.
    """

    def __init__(
        self,
        spaces: Union[Mapping[str, Space], Sequence[Space]] = None,
        names: Sequence[str] = None,
        dtype: Type[NamedTuple] = None,
        **kwargs,
    ):
        self._spaces: Dict[str, Space] = {}
        if isinstance(spaces, MappingABC):
            assert names is None
            self._spaces = dict(spaces.items())
        elif kwargs:
            assert all(isinstance(k, str) and isinstance(v, Space) for k, v in kwargs.items())
            self._spaces = kwargs
        else:
            # if not names:
            #     try:
            #         names = [getattr(space, "__name") for space in spaces]
            #     except AttributeError:
            #         pass
            assert names is not None, "need to pass names when spaces isn't a mapping."
            assert spaces and len(names) == len(spaces), "need to pass a name for each space"
            self._spaces = dict(zip(names, spaces))

        # NOTE: dict.values() is ordered since python 3.7.
        spaces = tuple(self._spaces.values())
        super().__init__(spaces)
        self.names: Sequence[str] = tuple(self._spaces.keys())
        self.dtype: Type[Tuple] = dtype or namedtuple("NamedTuple", self.names)
        # idea: could use this _name attribute to change the __repr__ first part
        self._name = self.dtype.__name__
        assert all(name == key for name, key in zip(self.names, self._spaces.keys()))

    def __getitem__(self, index: Union[int, str]) -> Space:
        if isinstance(index, str):
            return self._spaces[index]
        return super().__getitem__(index)

    def __getattr__(self, attr: str) -> Space:
        if attr == "_spaces":
            raise AttributeError(attr)
        if attr in self._spaces:
            return self._spaces[attr]
        raise AttributeError(attr)

    def __repr__(self):
        # TODO: Tricky: decide what name to show for the space class:
        cls_name = type(self).__name__
        # cls_name = self._name or type(self).__name__
        return (
            f"{cls_name}("
            + ", ".join([str(k) + "=" + str(s) for k, s in self._spaces.items()])
            + ")"
        )

    def _replace(self, **kwargs):
        """replaces the given subspaces with newer ones, maintaining the
        current ordering.
        """
        spaces = self._spaces.copy()
        assert all(k in spaces for k in kwargs), "no new keys allowed"
        spaces.update(kwargs)
        return type(self)(**spaces)

    def __eq__(self, other: Union["NamedTupleSpace", Any]) -> bool:
        return isinstance(other, spaces.Tuple) and tuple(self.spaces) == tuple(other.spaces)

    def sample(self):
        return self.dtype(*super().sample())

    def contains(self, x) -> bool:
        if isinstance(x, MappingABC):
            # TODO: If a namedtuple/dataclass has more items than those required
            # by this space, should we consider it valid if all its items are
            # contained in their respective spaces in `self`?
            x = tuple(x[k] for k in self.names)
            # x = tuple(x.values())
        return super().contains(x)

    def keys(self) -> List[str]:
        return self._spaces.keys()

    def values(self) -> List[Space]:
        return self._spaces.values()

    def items(self) -> Iterable[Tuple[str, Space]]:
        yield from self._spaces.items()


# See https://github.com/openai/gym/issues/2140 : Fix __eq__ of gym.spaces.Tuple
def __eq__(self, other: Union["NamedTupleSpace", Any]) -> bool:
    # BUG in openai gym: spaces passed to the spaces.Tuple constructor could
    # be a list of spaces, rather than a tuple, and so this might return
    # False when it shouldn't.
    return isinstance(other, spaces.Tuple) and tuple(self.spaces) == tuple(other.spaces)


spaces.Tuple.__eq__ = __eq__


from gym.spaces.utils import flatten
from gym.vector.utils import batch_space


@batch_space.register(NamedTupleSpace)
def batch_namedtuple_space(space: NamedTupleSpace, n: int = 1):
    return NamedTupleSpace(
        **{key: batch_space(space[key], n) for key in space.names}, dtype=space.dtype
    )


@flatten.register
def flatten_namedtuple_space_sample(space: NamedTupleSpace, x: NamedTuple):
    assert not isinstance(x, Batch), f"NamedTupleSpace, shouldn't have Batch samples: {space} {x}"
    return np.concatenate([flatten(s, x_part) for x_part, s in zip(x, space.spaces)])


================================================
FILE: sequoia/common/spaces/named_tuple_test.py
================================================
import numpy as np
import pytest
from gym import spaces
from gym.spaces import Box, Discrete
from gym.vector.utils import batch_space

from .named_tuple import NamedTuple, NamedTupleSpace

pytestmark = pytest.mark.skip(
    reason="Removing the NamedTuple space and NamedTuple class in favour of TypedDict.",
)


def test_basic():
    named_tuple_space = NamedTupleSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
    )
    v = named_tuple_space.sample()
    print(v)
    assert v in named_tuple_space
    # TODO: Maybe re-use all the tests for gym.spaces.Tuple in the gym repo
    # somehow?

    normal_tuple_space = spaces.Tuple(
        [
            Box(0, 1, (2, 2)),
            Discrete(2),
            Box(0, 1, (2, 2)),
        ]
    )
    assert normal_tuple_space.sample() in named_tuple_space
    assert named_tuple_space.sample() in normal_tuple_space


class StateTransition(NamedTuple):
    current_state: np.ndarray
    action: int
    next_state: np.ndarray


def test_basic_with_dtype():
    named_tuple_space = NamedTupleSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    v = named_tuple_space.sample()
    assert v in named_tuple_space
    assert isinstance(v, StateTransition)

    normal_tuple_space = spaces.Tuple(
        [
            Box(0, 1, (2, 2)),
            Discrete(2),
            Box(0, 1, (2, 2)),
        ]
    )
    assert normal_tuple_space.sample() in named_tuple_space
    assert named_tuple_space.sample() in normal_tuple_space


@pytest.mark.xfail()
def test_isinstance_namedtuple():
    named_tuple_space = NamedTupleSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    assert isinstance(named_tuple_space, NamedTupleSpace)
    assert isinstance(named_tuple_space.sample(), NamedTuple)


def test_equals_tuple_space_with_same_items():
    """Test that a NamedTupleSpace is considered equal to a Tuple space if
    the spaces are in the same order and all equal (regardless of the names).
    """
    named_tuple_space = NamedTupleSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    tuple_space = spaces.Tuple(
        [
            Box(0, 1, (2, 2)),
            Discrete(2),
            Box(0, 1, (2, 2)),
        ]
    )
    assert named_tuple_space == tuple_space
    assert tuple_space == named_tuple_space


def test_batch_objets_considered_valid_samples():
    from dataclasses import dataclass

    import numpy as np

    from sequoia.common.batch import Batch

    @dataclass(frozen=True)
    class StateTransitionDataclass(Batch):
        current_state: np.ndarray
        action: int
        next_state: np.ndarray

    named_tuple_space = NamedTupleSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransitionDataclass,
    )
    obs = StateTransitionDataclass(
        current_state=np.ones([2, 2]) / 2,
        action=1,
        next_state=np.zeros([2, 2]),
    )
    assert obs in named_tuple_space
    assert named_tuple_space.sample() in named_tuple_space
    assert isinstance(named_tuple_space.sample(), StateTransitionDataclass)


def test_batch_space():
    named_tuple_space = NamedTupleSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    assert batch_space(named_tuple_space, n=5) == NamedTupleSpace(
        current_state=Box(0, 1, (5, 2, 2)),
        action=spaces.MultiDiscrete([2, 2, 2, 2, 2]),
        next_state=Box(0, 1, (5, 2, 2)),
        dtype=StateTransition,
    )


## IDEA: Creating a space like this, using the same syntax as with NamedTuple
# class StateTransitionSpace(NamedTupleSpace):
#     current_state: Box = Box(0, 1, (2,2))
#     action: Discrete = Discrete(2)
#     current_state: Box = Box(0, 1, (2,2))

# space = StateTransitionSpace()
# space.sample()


================================================
FILE: sequoia/common/spaces/space.py
================================================
""" Small typing improvements to the `gym.spaces.Space` class. """
from typing import Any, Generic, TypeVar, Union

from gym.spaces import Space as _Space

T = TypeVar("T")


class Space(_Space, Generic[T]):
    def sample(self) -> T:
        return super().sample()

    def __contains__(self, x: Union[T, Any]) -> bool:
        return super().__contains__(x)

    def contains(self, v: Union[T, Any]) -> bool:
        return super().contains(v)


================================================
FILE: sequoia/common/spaces/sparse.py
================================================
""" 'wrapper' around a gym.Space that adds has a probability of sampling `None`
instead of a sample from the 'base' space.

As a result, `None` is always a valid sample from any Sparse space.
"""
import multiprocessing as mp
from ctypes import c_bool

# from gym.spaces.utils import flatdim, flatten
from functools import singledispatch
from multiprocessing.context import BaseContext
from typing import Any, Dict, Optional, Sequence, Tuple, Union

import gym
import gym.spaces.utils
import gym.vector.utils.numpy_utils
import gym.vector.utils.shared_memory
import numpy as np
import torch
from gym import spaces
from gym.vector.utils import batch_space, concatenate
from gym.vector.utils.numpy_utils import concatenate
from torch import Tensor

from .space import Space, T


class Sparse(Space[Optional[T]]):
    """Space which returns a value of `None` `sparsity`% of the time when sampled.

    `None` is also a valid sample of this space in addition to those of the wrapped space.

    TODO: Maybe refactor this into a mixin class, a bit like `TensorSpace`? If so,
    then make sure that we don't suddenly need to create SparseTensorBox and the like.
    """

    def __init__(self, base: Space[T], sparsity: float = 0.0):
        self.base = base
        assert 0 <= sparsity <= 1, "invalid spasity, needs to be in [0, 1]"
        self._sparsity = sparsity
        # Would it ever cause a problem to have different dtypes for different
        # instances of the same space?
        # dtype = self.base.dtype if sparsity == 0. else np.object_
        super().__init__(shape=self.base.shape, dtype=np.object_)

    @property
    def sparsity(self) -> float:
        return self._sparsity

    # def __getattr__(self, attr: str):
    #     return getattr(self.base, attr)

    def seed(self, seed=None):
        super().seed(seed)
        return self.base.seed(seed=seed)

    def sample(self) -> Optional[T]:
        if self.sparsity == 0:
            return self.base.sample()
        if self.sparsity == 1.0:
            return None
        p = self.np_random.random()
        if p <= self.sparsity:
            return None
        else:
            return self.base.sample()

    def contains(self, x: Union[Optional[T], Any]) -> bool:
        """
        Return boolean specifying if x is a valid
        member of this space
        """
        return x is None or self.base.contains(x)

    def __repr__(self):
        return f"Sparse({self.base}, sparsity={self.sparsity})"

    def __eq__(self, other: Any):
        if not isinstance(other, Sparse):
            return NotImplemented
        return other.base == self.base and other.sparsity == self.sparsity

    def to_jsonable(self, sample_n):
        assert False, "TODO: This isn't really ever used anywhere, even in Gym, is it?"
        super().to_jsonable
        # serialize as dict-repr of vectors
        return {
            key: space.to_jsonable([sample[key] for sample in sample_n])
            for key, space in self.spaces.items()
        }

    def from_jsonable(self, sample_n):
        assert False, "TODO: This isn't really ever used anywhere, even in Gym, is it?"
        dict_of_list = {}
        for key, space in self.spaces.items():
            dict_of_list[key] = space.from_jsonable(sample_n[key])
        ret = []
        for i, _ in enumerate(dict_of_list[key]):
            entry = {}
            for key, value in dict_of_list.items():
                entry[key] = value[i]
            ret.append(entry)
        return ret


# Customize how these functions handle `Sparse` spaces by making them
# singledispatch callables and registering a new callable.


def _is_singledispatch(module_function):
    return hasattr(module_function, "registry")


def register_sparse_variant(module, module_fn_name: str):
    """Converts a function from the given module to a singledispatch callable,
    and registers the wrapped function as the callable to use for Sparse spaces.

    The module function must have the space as the first argument for this to
    work.
    """
    module_function = getattr(module, module_fn_name)

    # Convert the function to a singledispatch callable.
    if not _is_singledispatch(module_function):
        module_function = singledispatch(module_function)
        setattr(module, module_fn_name, module_function)
    # Register the function as the callable to use when the first arg is a
    # Sparse object.
    def wrapper(function):
        module_function.register(Sparse, function)
        return function

    return wrapper


@register_sparse_variant(gym.spaces.utils, "flatdim")
def flatdim_sparse(space: Sparse) -> int:
    return gym.spaces.utils.flatdim(space.base)


@register_sparse_variant(gym.spaces.utils, "flatten")
def flatten_sparse(space: Sparse[T], x: Optional[T]) -> Optional[np.ndarray]:
    return np.array([None]) if x is None else gym.spaces.utils.flatten(space.base, x)


@register_sparse_variant(gym.spaces.utils, "flatten_space")
def flatten_sparse_space(space: Sparse[T]) -> Optional[np.ndarray]:
    space = gym.spaces.utils.flatten_space(space.base)
    space.dtype = np.object_
    return space


@register_sparse_variant(gym.spaces.utils, "unflatten")
def unflatten_sparse(space: Sparse[T], x: np.ndarray) -> Optional[T]:
    if len(x) == 1 and x[0] is None:
        return None
    else:
        return gym.spaces.utils.unflatten(space.base, x)


@register_sparse_variant(gym.vector.utils, "create_empty_array")
def create_empty_array_sparse(space: Sparse, n=1, fn=np.zeros) -> np.ndarray:
    return fn([n], dtype=np.object_)


@register_sparse_variant(gym.vector.utils.shared_memory, "create_shared_memory")
def create_shared_memory_for_sparse_space(space: Sparse, n: int = 1, ctx: BaseContext = mp):
    # The shared memory should be something that can accomodate either 'None'
    # or a sample from the space. Therefore we should probably just create the
    # array for the base space, but then how would store a 'None' value in that
    # space?
    # What if we return a tuple or something, in which we actually add an 'is-none'
    print(f"Creating shared memory for {n} entries from space {space}")

    return {
        "is_none": ctx.Array(c_bool, np.zeros(n, dtype=np.bool)),
        "value": gym.vector.utils.shared_memory.create_shared_memory(space.base, n, ctx),
    }


@register_sparse_variant(gym.vector.utils.shared_memory, "write_to_shared_memory")
def write_to_shared_memory(
    index: int,
    value: Optional[T],
    shared_memory: Union[Dict, Tuple, BaseContext.Array],
    space: Union[Sparse[T], gym.Space],
):
    print(f"Writing entry from space {space} at index {index} in shared memory")
    if isinstance(space, Sparse):
        assert isinstance(shared_memory, dict)
        is_none_array = shared_memory["is_none"]
        value_array = shared_memory["value"]
        raise NotImplementedError(f"Still debugging this")
        # assert False, index
        # assert False, is_none_array

        is_none_array[index] = value is None

        if value is not None:
            return write_to_shared_memory(index, value, value_array, space.base)
    else:
        # TODO: Would this cause a problem, say in the case where we have a
        # regular space like Tuple that contains some Sparse spaces, then would
        # calling this "old" function here prevent this "new" function from
        # being used on the children?
        return gym.vector.utils.shared_memory(index, value, shared_memory, space)


from gym.vector.utils.shared_memory import read_from_shared_memory as read_from_shared_memory_


@register_sparse_variant(gym.vector.utils.shared_memory, "read_from_shared_memory")
def read_from_shared_memory(
    shared_memory: Union[Dict, Tuple, BaseContext.Array], space: Sparse, n: int = 1
):
    print(f"Reading {n} entries from space {space} from shared memory")
    if isinstance(space, Sparse):
        assert isinstance(shared_memory, dict)
        is_none_array = list(shared_memory["is_none"])
        value_array = shared_memory["value"]
        assert len(is_none_array) == len(value_array) == n

        # This might include some garbage (or default) values, which weren't
        # set.
        read_values = read_from_shared_memory(value_array, space.base, n)
        print(f"Read values from space: {read_values}")
        print(f"is_none array: {list(is_none_array)}")
        # assert False, (list(is_none_array), read_values, space)
        values = [None if is_none_array[index] else read_values[index] for index in range(n)]
        print(f"resulting values: {values}")
        return values
        return read_from_shared_memory_(shared_memory, space.base, n)
    return read_from_shared_memory_(shared_memory, space, n)


@register_sparse_variant(gym.vector.utils, "batch_space")
def batch_sparse_space(space: Sparse, n: int = 1) -> gym.Space:
    """Batch this sparse space.

    NOTE: The sparsity of `space` currently has an important impact on the kind of space returned!

    Taking a base space of type `Discrete` as an example:
    - If `space.sparsity == 0 or space.sparsity == 1`, then the result is a Sparse[MultiDiscrete],
    - *However*, if `0 < sparsity < 1`, then the result is a `Tuple[Sparse[Discrete], ...]`.
    """
    # NOTE: This means we do something different depending on the sparsity.
    # Could that become an issue?
    # assert _is_singledispatch(batch_space)

    sparsity = space.sparsity

    # NOTE: It is tempting to just make this more consistent by always returning the same kind of
    # result, because it's nice to avoid dealing with arrays like `np.array([None, 1, ])`
    # or, even worse, `np.array([None, None])` which are not fun.
    # *HOWEVER*, it's not a good idea! As an example, when using VectorEnvs, the spaces are just to
    # represent what the observations of the VectorEnv will look like. Since each env has 'its own'
    # Sparse[Discrete] space, and they are "sampled" independantly, then if 0 < sparsity < 1 we WILL
    # have some entries be None and other not. Therefore, it's better in that case to just return
    # the tuple of sparse spaces.
    # return Sparse(batch_space(space.base, n), sparsity=sparsity)

    # TODO: Use something like this eventually. There are still problem with to_tensor.
    # return SparseMultiDiscrete(
    #     np.full((n,), space.n, dtype=space.base.dtype), sparsity=space.sparsity
    # )
    if sparsity in {0, 1}:
        # If the space has 0 sparsity, then batch it just like you would its
        # base space.
        # TODO: This is convenient, but not very consistent, as the length of
        # the batches changes depending on the sparsity of the space..
        return Sparse(batch_space(space.base, n), sparsity=sparsity)

    # Sticking to the default behaviour from gym for now, which is to just
    # return a tuple of length n with n copies of the space.
    return spaces.Tuple(tuple(space for _ in range(n)))

    # We could also do this, where we make the sub-spaces sparse:
    # batch_space(Sparse<Tuple<A, B>>) -> Tuple<batch_space(Sparse<A>), batch_space(Sparse<B>)>

    if isinstance(space.base, spaces.Tuple):
        return spaces.Tuple(
            [
                spaces.Tuple([Sparse(sub_space, sparsity) for _ in range(n)])
                for sub_space in space.base.spaces
            ]
        )
    if isinstance(space.base, spaces.Dict):
        return spaces.Dict(
            {
                name: Sparse(batch_space(sub_space, n), sparsity)
                for name, sub_space in space.base.spaces.items()
            }
        )

    return batch_space(space.base, n)


@register_sparse_variant(gym.vector.utils.numpy_utils, "concatenate")
def concatenate_sparse_items(
    space: Sparse, items: Sequence[Optional[T]], out: Union[tuple, dict, np.ndarray]
) -> Optional[Sequence[T]]:
    if space.sparsity == 0:
        if not all(item is not None for item in items):
            raise ValueError("Space has sparsity of 0, there shouldn't be any `None` items!")
        # Assume that the items are samples of the individual spaces.
        # In most cases this means they shouldn't be None, but there's the special case where the
        # individual spaces are also Sparse, and then it's fine for them to be None.
        return concatenate(space.base, items=items, out=out)
    if space.sparsity == 1:
        if not all(item is None for item in items):
            raise ValueError("Space has sparsity of 1, all items should be None!")
        # Assume that the items are samples of the individual spaces.
        # In most cases this means they shouldn't be None, but there's the special case where the
        # individual spaces are also Sparse, and then it's fine for them to be None.
        return None
    return tuple(items)
    # NOTE: Avoiding returning this np.array of type `object`, simply because `np.array([None])` is
    # not fun to have to deal with.
    # return np.array([None if v == None else v for v in items], dtype=object)
    return np.array(items)
    # for i, item in enumerate(items):
    #     out[i] = items
    # return out


from sequoia.utils.generic_functions.to_from_tensor import to_tensor


@to_tensor.register(Sparse)
def sparse_sample_to_tensor(
    space: Sparse, sample: Union[Optional[Any], np.ndarray], device: torch.device = None
) -> Optional[Union[Tensor, np.ndarray]]:
    if space.sparsity == 1.0:
        if isinstance(space.base, spaces.MultiDiscrete):
            assert all(v == None for v in sample)
            return np.array([None if v == None else v for v in sample])
        if sample is not None:
            assert isinstance(sample, np.ndarray) and sample.dtype == np.object
            assert not sample.shape
        return None
    if space.sparsity == 0.0:
        # Do we need to convert dtypes here though?
        return to_tensor(space.base, sample, device)
    # 0 < sparsity < 1
    if isinstance(sample, np.ndarray) and sample.dtype == np.object:
        return np.array([None if v == None else v for v in sample])

    assert False, (space, sample)


================================================
FILE: sequoia/common/spaces/sparse_test.py
================================================
from typing import Iterable

import gym
import numpy as np
import pytest
from gym import spaces

from .sparse import Sparse

base_spaces = [
    spaces.Discrete(n=10),
    spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
    spaces.Tuple(
        [
            spaces.Discrete(n=10),
            spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
        ]
    ),
    spaces.Dict(
        {
            "x": spaces.Tuple(
                [
                    spaces.Discrete(n=10),
                    spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
                ]
            ),
            "t": spaces.Discrete(1),
        }
    ),
]


def equals(value, expected) -> bool:
    assert type(value) == type(expected)
    if isinstance(value, (int, float, bool)):
        return value == expected
    if isinstance(value, np.ndarray):
        return value.tolist() == expected.tolist()
    if isinstance(value, (tuple, list)):
        assert len(value) == len(expected)
        return all(equals(a_v, e_v) for a_v, e_v in zip(value, expected))
    if isinstance(value, dict):
        assert len(value) == len(expected)
        for k in expected.keys():
            if k not in value:
                return False
            if not equals(value[k], expected[k]):
                return False
        return True
    return value == expected


def is_sparse(iterable: Iterable[bool]) -> bool:
    """Returns wether some (but not all) values in the iterable are None."""
    none_values: int = 0
    non_none_values: int = 0
    for value in iterable:
        if value is None:
            none_values += 1
            if non_none_values:
                return True
        else:
            non_none_values += 1
            if none_values:
                return True
    return False
    # Equivalent, but with a copy:
    values = list(values)
    return any(v is None for v in values) and not all(v is None for v in values)


@pytest.mark.parametrize("base_space", base_spaces)
def test_sample(base_space: gym.Space):
    space = Sparse(base_space, sparsity=0.0)
    samples = [space.sample() for i in range(100)]
    assert all(sample is not None for sample in samples)
    assert all(sample in base_space for sample in samples)

    space = Sparse(base_space, sparsity=0.5)
    samples = [space.sample() for i in range(100)]
    assert is_sparse(samples)
    assert all([sample in base_space for sample in samples if sample is not None])

    space = Sparse(base_space, sparsity=1.0)
    samples = [space.sample() for i in range(100)]
    assert all(sample is None for sample in samples)


@pytest.mark.parametrize("sparsity", [0.0, 0.5, 1.0])
@pytest.mark.parametrize("base_space", base_spaces)
def test_contains(base_space: gym.Space, sparsity: float):
    space = Sparse(base_space, sparsity=sparsity)
    samples = [space.sample() for i in range(100)]
    assert all(sample in space for sample in samples)


from gym.vector.utils import batch_space


@pytest.mark.parametrize("base_space", base_spaces)
def test_batching_works(base_space: gym.Space, n: int = 3):
    batched_base_space = batch_space(base_space, n)
    sparse_space = Sparse(base_space)

    batched_sparse_space = batch_space(sparse_space, n)

    base_batch = batched_base_space.sample()
    sparse_batch = batched_sparse_space.sample()
    assert len(base_batch) == len(sparse_batch)


# @pytest.mark.xfail(reason="TODO: Need to decide how we want the sparsity to "
#                           "affect the batching of Tuple or Dict spaces.")
@pytest.mark.parametrize("base_space", base_spaces)
@pytest.mark.parametrize("sparsity", [0.0, 0.5, 1.0])
def test_batching_works(base_space: gym.Space, sparsity: float, n: int = 10):
    batched_base_space = batch_space(base_space, n)

    sparse_space = Sparse(base_space, sparsity=sparsity)
    batched_sparse_space = batch_space(sparse_space, n)

    batched_base_space.seed(123)
    base_batch = batched_base_space.sample()

    batched_sparse_space.seed(123)
    sparse_batch = batched_sparse_space.sample()

    if sparsity == 0:
        # When there is no sparsity, the batching is the same as batching the
        # same space.
        assert equals(base_batch, sparse_batch)
    elif sparsity == 1:
        assert sparse_batch is None
        # assert len(sparse_batch) == n
        # assert sparse_batch == tuple([None] * n)
    else:
        assert len(sparse_batch) == n
        assert isinstance(sparse_batch, tuple)

        for i, value in enumerate(sparse_batch):
            if value is not None:
                assert value in base_space

        # There should be some sparsity.
        assert any(v is None for v in sparse_batch) and not all(
            v is None for v in sparse_batch
        ), sparse_batch


from gym.spaces.utils import flatdim, flatten


@pytest.mark.xfail(
    reason="When using the normal gym repo rather than the "
    "fork, the change doesn't persist through an import."
)
def test_change_doesnt_persist_after_import():
    """When re-importing the `concatenate` function from `gym.vector.utils`,
    the changes aren't preserved.
    """
    assert hasattr(gym.vector.utils.numpy_utils.concatenate, "registry")
    assert hasattr(gym.vector.utils.batch_space, "registry")


def test_change_persists_after_full_import():
    """When re-importing the `concatenate` function from
    `gym.vector.utils.numpy_utils`, the changes are preserved.
    """
    assert hasattr(gym.vector.utils.numpy_utils.concatenate, "registry")
    assert hasattr(gym.vector.utils.batch_space, "registry")


@pytest.mark.parametrize("base_space", base_spaces)
def test_flatdim(base_space: gym.Space):
    sparse_space = Sparse(base_space, sparsity=0.0)

    base_flat_dims = flatdim(base_space)
    sparse_flat_dims = flatdim(sparse_space)

    assert base_flat_dims == sparse_flat_dims


@pytest.mark.parametrize("base_space", base_spaces)
def test_flatdim(base_space: gym.Space):
    sparse_space = Sparse(base_space, sparsity=0.0)

    base_flat_dims = flatdim(base_space)
    sparse_flat_dims = flatdim(sparse_space)
    assert base_flat_dims == sparse_flat_dims

    # The flattened dimensions shouldn't depend on the sparsity.
    sparse_space = Sparse(base_space, sparsity=1.0)
    sparse_flat_dims = flatdim(sparse_space)
    assert base_flat_dims == sparse_flat_dims


@pytest.mark.parametrize("base_space", base_spaces)
def test_seeding_works(base_space: gym.Space):
    sparse_space = Sparse(base_space, sparsity=0.0)

    base_space.seed(123)
    base_sample = base_space.sample()

    sparse_space.seed(123)
    sparse_sample = sparse_space.sample()

    assert equals(base_sample, sparse_sample)


@pytest.mark.parametrize("base_space", base_spaces)
def test_flatten(base_space: gym.Space):
    sparse_space = Sparse(base_space, sparsity=0.0)
    base_space.seed(123)
    base_sample = base_space.sample()
    flattened_base_sample = flatten(base_space, base_sample)

    sparse_space.seed(123)
    sparse_sample = sparse_space.sample()
    flattened_sparse_sample = flatten(sparse_space, sparse_sample)

    assert equals(flattened_base_sample, flattened_sparse_sample)


@pytest.mark.parametrize("base_space", base_spaces)
def test_equality(base_space: gym.Space):
    sparse_space = Sparse(base_space, sparsity=0.0)
    other_space = Sparse(base_space, sparsity=0.0)
    assert sparse_space == other_space

    sparse_space = Sparse(base_space, sparsity=0.2)
    assert sparse_space != other_space

    sparse_space = Sparse(spaces.Tuple([base_space, base_space]), sparsity=0.0)
    assert sparse_space != other_space


================================================
FILE: sequoia/common/spaces/tensor_spaces.py
================================================
""" TODO: Maybe create a typed version of 'add_tensor_support' of gym_wrappers.convert_tensors
"""
from typing import Optional, Union

import gym
import numpy as np
import torch
from gym import spaces
from torch import Tensor

# Dict of NumPy dtype -> torch dtype (when the correspondence exists)
numpy_to_torch_dtypes = {
    bool: torch.bool,
    np.uint8: torch.uint8,
    np.int8: torch.int8,
    np.int16: torch.int16,
    np.int32: torch.int32,
    np.int64: torch.int64,
    np.float16: torch.float16,
    np.float32: torch.float32,
    np.float64: torch.float64,
    np.complex64: torch.complex64,
    np.complex128: torch.complex128,
}
# Dict of torch dtype -> NumPy dtype
torch_to_numpy_dtypes = {value: key for (key, value) in numpy_to_torch_dtypes.items()}


def get_numpy_dtype_equivalent_to(torch_dtype: torch.dtype) -> np.dtype:
    """TODO: Gets the numpy dtype equivalent to the given torch dtype."""

    def dtypes_equal(a: torch.dtype, b: torch.dtype) -> bool:
        return a == b  # simple for now.

    matching_dtypes = [v for k, v in torch_to_numpy_dtypes.items() if dtypes_equal(k, torch_dtype)]
    if len(matching_dtypes) == 0:
        raise RuntimeError(f"Unable to find a numpy dtype equivalent to {torch_dtype}")
    if len(matching_dtypes) > 1:
        raise RuntimeError(f"Found more than one match for dtype {torch_dtype}: {matching_dtypes}")
    return np.dtype(matching_dtypes[0])


def get_torch_dtype_equivalent_to(numpy_dtype: np.dtype) -> torch.dtype:
    """TODO: Gets the torch dtype equivalent to the given np dtype."""

    def dtypes_equal(a: torch.dtype, b: torch.dtype) -> bool:
        return a == b  # simple for now.

    matching_dtypes = [v for k, v in numpy_to_torch_dtypes.items() if dtypes_equal(k, numpy_dtype)]
    if len(matching_dtypes) == 0:
        raise RuntimeError(f"Unable to find a torch dtype equivalent to {numpy_dtype}")
    if len(matching_dtypes) > 1:
        raise RuntimeError(f"Found more than one match for dtype {numpy_dtype}: {matching_dtypes}")
    return matching_dtypes[0]


from inspect import isclass
from typing import Any


def is_numpy_dtype(dtype: Any) -> bool:
    return isinstance(dtype, np.dtype) or isclass(dtype) and issubclass(dtype, np.generic)


def is_torch_dtype(dtype: Any) -> bool:
    return isinstance(dtype, torch.dtype)


from abc import ABC


def supports_tensors(space: gym.Space) -> bool:
    raise NotImplementedError(f"TODO: Create a generic function for this.")
    return isinstance(space, TensorSpace)


class TensorSpace(gym.Space, ABC):
    """Mixin class that makes a Space's `contains` and `sample` methods accept and
    produce tensors, respectively.
    """

    def __init__(self, *args, device: torch.device = None, **kwargs):
        # super().__init__(*args, **kwargs)
        self.device: Optional[torch.device] = torch.device(device) if device else None
        # Depending on the value passed to `dtype`
        dtype = kwargs.get("dtype")
        if dtype is None:
            if isinstance(self, (spaces.Discrete, spaces.MultiDiscrete)):
                # NOTE: They dont actually give a 'dtype' argument for these.
                self._numpy_dtype = np.dtype(np.int64)
                self._torch_dtype = torch.int64
            else:
                raise NotImplementedError(f"Space {self} doesn't have a `dtype`?")
        elif is_numpy_dtype(dtype):
            self._numpy_dtype = np.dtype(dtype)
            self._torch_dtype = get_torch_dtype_equivalent_to(dtype)
        elif is_torch_dtype(dtype):
            self._numpy_dtype = get_numpy_dtype_equivalent_to(dtype)
            self._torch_dtype = dtype
        elif str(dtype) == "float32":
            self._numpy_dtype = np.dtype(np.float32)
            self._torch_dtype = torch.float32
        else:
            assert not any(dtype == k for k in numpy_to_torch_dtypes)
            assert not any(dtype == k for k in torch_to_numpy_dtypes)
            raise NotImplementedError(f"Unsupported dtype {dtype} (of type {type(dtype)})")
        if "dtype" in kwargs:
            kwargs["dtype"] = self._numpy_dtype
        super().__init__(*args, **kwargs)
        self.dtype: torch.dtype = self._torch_dtype


class TensorBox(TensorSpace, spaces.Box):
    """Box space that accepts both Tensor and ndarrays."""

    def __init__(self, low, high, shape=None, dtype=np.float32, device: torch.device = None):
        super().__init__(low, high, shape=shape, dtype=dtype, device=device)
        self.low_tensor = torch.as_tensor(self.low, device=self.device)
        self.high_tensor = torch.as_tensor(self.high, device=self.device)
        self.dtype = self._torch_dtype

    def sample(self):
        self.dtype = self._numpy_dtype
        sample = super().sample()
        self.dtype = self._torch_dtype
        return torch.as_tensor(sample, dtype=self._torch_dtype, device=self.device)

    def contains(self, x: Union[list, np.ndarray, Tensor]) -> bool:
        if isinstance(x, list):
            x = np.array(x)  # Promote list to array for contains check
        if isinstance(x, Tensor):
            if not (x.device == self.low_tensor.device == self.high_tensor.device):
                raise RuntimeError(
                    f"Values aren't on the same device: {x.device}, {self.device}, {self.low_tensor.device}"
                )

            return (
                x.shape == self.shape
                and (x >= self.low_tensor).all()
                and (x <= self.high_tensor).all()
            )
        return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)

    def __repr__(self):
        return (
            f"{type(self).__name__}({self.low.min()}, {self.high.max()}, "
            f"{self.shape}, {self.dtype}"
            + (f", device={self.device}" if self.device is not None else "")
            + ")"
        )

    @classmethod
    def from_box(cls, box: spaces.Box, device: torch.device = None):
        return cls(
            low=box.low.flat[0],
            high=box.high.flat[0],
            shape=box.shape,
            dtype=box.dtype,  # NOTE: Gets converted in TensorSpace constructor.
            device=device,
        )


class TensorDiscrete(TensorSpace, spaces.Discrete):
    def contains(self, v: Union[int, Tensor]) -> bool:
        if isinstance(v, Tensor):
            v = v.detach().cpu().numpy()
        return super().contains(v)

    def sample(self):
        self.dtype = self._numpy_dtype
        s = super().sample()
        self.dtype = self._torch_dtype
        return torch.as_tensor(s, dtype=self.dtype, device=self.device)


class TensorMultiDiscrete(TensorSpace, spaces.MultiDiscrete):
    def contains(self, v: Tensor) -> bool:
        try:
            return super().contains(v)
        except:
            v_numpy = v.detach().cpu().numpy()
            return super().contains(v_numpy)

    def sample(self):
        self.dtype = self._numpy_dtype
        s = super().sample()
        self.dtype = self._torch_dtype
        return torch.as_tensor(s, dtype=self.dtype, device=self.device)


from gym.vector.utils.spaces import batch_space


@batch_space.register(TensorDiscrete)
def _batch_discrete_space(space: TensorDiscrete, n: int = 1) -> TensorMultiDiscrete:
    return TensorMultiDiscrete(torch.full((n,), space.n, dtype=space.dtype))


================================================
FILE: sequoia/common/spaces/tensor_spaces_test.py
================================================
import numpy as np
import pytest
from gym import spaces
from torch import Tensor

from .tensor_spaces import TensorBox, numpy_to_torch_dtypes


@pytest.mark.parametrize("np_dtype", [np.uint8, np.float32])
def test_tensor_box(np_dtype: np.dtype):
    torch_dtype = numpy_to_torch_dtypes[np_dtype]

    space = spaces.Box(0, 1, (28, 28), dtype=np_dtype)
    new_space = TensorBox.from_box(space)
    sample = new_space.sample()

    assert isinstance(sample, Tensor)
    assert sample in new_space
    assert sample.cpu().numpy().astype(np_dtype) in space
    assert sample.dtype == torch_dtype


================================================
FILE: sequoia/common/spaces/typed_dict.py
================================================
""" Subclass of `spaces.Dict` that allows custom dtypes and uses type annotations.
"""
import dataclasses
from collections import OrderedDict
from collections.abc import Mapping as MappingABC
from copy import deepcopy
from dataclasses import fields, is_dataclass
from inspect import isclass
from typing import (
    Any,
    ClassVar,
    Dict,
    Iterable,
    List,
    Mapping,
    Sequence,
    Tuple,
    Type,
    TypeVar,
    Union,
    get_type_hints,
)

import gym
import numpy as np
from gym import Space, spaces
from gym.vector.utils import batch_space, concatenate

from .sparse import batch_space, concatenate

try:
    from typing import get_origin
except ImportError:
    # Python 3.7's typing module doesn't have this `get_origin` function, so get it from
    # `typing_inspect`.
    from typing_inspect import get_origin


M = TypeVar("M", bound=Mapping[str, Any])
S = TypeVar("S")
Dataclass = TypeVar("Dataclass")


class TypedDictSpace(spaces.Dict, Space[M]):
    """Subclass of `spaces.Dict` that allows custom dtypes and uses type annotations.

    ## Examples:

    - Using it just like a regular spaces.Dict:

    >>> from gym.spaces import Box
    >>> s = TypedDictSpace(x=Box(0, 1, (4,), dtype=np.float64))
    >>> s
    TypedDictSpace(x:Box(0.0, 1.0, (4,), float64))
    >>> _ = s.seed(123)
    >>> s.sample()
    {'x': array([0.06132501, 0.48141959, 0.41703335, 0.34899889])}

    - Using it like a TypedDict: (This equivalent to the above)

    >>> class VisionSpace(TypedDictSpace):
    ...     x: Box = Box(0, 1, (4,), dtype=np.float64)
    >>> s = VisionSpace()
    >>> s
    VisionSpace(x:Box(0.0, 1.0, (4,), float64))
    >>> _ = s.seed(123)
    >>> s.sample()
    {'x': array([0.06132501, 0.48141959, 0.41703335, 0.34899889])}

    - You can also overwrite the values from the type annotations by passing them to the
      constructor:

    >>> s = VisionSpace(x=spaces.Box(0, 2, (3,), dtype=np.int64))
    >>> s
    VisionSpace(x:Box(0, 2, (3,), int64))
    >>> _ = s.seed(123)
    >>> s.sample()
    {'x': array([0, 1, 1])}

    ### Using custom dtypes

    Can use any type here, as long as it can receive the samples from each space as
    keyword arguments.

    One good example of this is to use a `dataclass` as the custom dtype.
    You are strongly encouraged to use a dtype that inherits from the `Mapping` class
    from `collections.abc`, so that samples form your space can be handled similarly to
    regular dictionaries.

    >>> from collections import OrderedDict
    >>> s = TypedDictSpace(x=spaces.Box(0, 1, (4,), dtype=float), dtype=OrderedDict)
    >>> s
    TypedDictSpace(x:Box(0.0, 1.0, (4,), float64), dtype=<class 'collections.OrderedDict'>)
    >>> _ = s.seed(123)
    >>> s.sample()
    OrderedDict([('x', array([0.06132501, 0.48141959, 0.41703335, 0.34899889]))])

    ### Required items:

    If an annotation on the class doesn't have a default value, then it is treated as a
    required argument:

    >>> class FooSpace(TypedDictSpace):
    ...     a: spaces.Box = spaces.Box(0, 1, (4,), float)
    ...     b: spaces.Discrete
    >>> s = FooSpace()  # doesn't work!
    Traceback (most recent call last):
      ...
    TypeError: Space of type <class 'sequoia.common.spaces.typed_dict.FooSpace'> requires a 'b' item!
    >>> s = FooSpace(b=spaces.Discrete(5))
    >>> s
    FooSpace(a:Box(0.0, 1.0, (4,), float64), b:Discrete(5))

    NOTE: spaces can also inherit from each other!

    >>> class ImageSegmentationSpace(VisionSpace):
    ...     bounding_box: Box
    ...
    >>> s = ImageSegmentationSpace(
    ...     x=spaces.Box(0, 1, (2, 2), dtype=float),
    ...     bounding_box=spaces.Box(0, 4, (4, 2), dtype=int),
    ... )
    >>> s
    ImageSegmentationSpace(x:Box(0.0, 1.0, (2, 2), float64), bounding_box:Box(0, 4, (4, 2), int64))
    """

    def __init__(self, spaces: Mapping[str, Space] = None, dtype: Type[M] = dict, **spaces_kwargs):
        """Creates the TypedDict space.

        Can either pass a dict of spaces, or pass the spaces as keyword arguments.

        Parameters
        ----------
        spaces : Mapping[str, Space], optional
            Dictionary mapping from strings to spaces, by default None
        dtype : Type[M], optional
            Type of outputs to return. By default `dict`, but this can also use any
            other dtype which will accept the values from each space as a keyword
            argument.

            NOTE: This `dtype` is usually set to some dataclass type in Sequoia, such as
            `Observation`, `Rewards`, etc. (subclasses of `Batch`).

            By default, `dtype` is just `dict`, and `space.sample()` will return simple
            dictionaries.

        Raises
        ------
        RuntimeError
            If both `spaces` and **kwargs are used.
        TypeError
            If the class has a type annotation for a space, and the required space isn't
            passed as an argument (emulating a required argument, in a way).
        """

        if spaces and spaces_kwargs:
            raise RuntimeError("Can only use one of `spaces` or **kwargs, not both.")
        spaces_from_args = spaces or spaces_kwargs

        # have to use OrderedDict just in case python <= 3.6.x
        spaces_from_annotations: Dict[str, gym.Space] = OrderedDict()

        cls = type(self)
        class_typed_attributes: Dict[str, Type] = get_type_hints(cls)
        # NOTE: This is only needed when using `__future__ import annotations` in a
        # client file:
        # Get the `globals` of the caller when checking type annotations:
        # NOTE: Might actually need to get the globals of where that class is defined!
        # caller_globals = inspect.stack()[1][0].f_globals
        # class_typed_attributes: Dict[str, Type] = get_type_hints(cls, globalns=caller_globals)

        if class_typed_attributes:
            for attribute, type_annotation in class_typed_attributes.items():
                if getattr(type_annotation, "__origin__", "") is ClassVar:
                    continue

                is_space = False
                if isclass(type_annotation) and issubclass(type_annotation, gym.Space):
                    is_space = True
                else:
                    origin = get_origin(type_annotation)
                    is_space = (
                        origin is not None and isclass(origin) and issubclass(origin, gym.Space)
                    )

                # NOTE: emulate a 'required argument' when there is a type
                # annotation, but no value.
                # Note: How about a None value, is that ok?
                if is_space:
                    _missing = object()
                    value = getattr(cls, attribute, _missing)
                    if value is _missing and attribute not in spaces_from_args:
                        raise TypeError(
                            f"Space of type {type(self)} requires a '{attribute}' item!"
                        )
                    if isinstance(value, gym.Space):
                        # Shouldn't be able to have two annotations with the same name.
                        assert attribute not in spaces_from_annotations
                        # TODO: Should copy the space, so that modifying the class
                        # attribute doesn't affect the instances of that space.
                        spaces_from_annotations[attribute] = deepcopy(value)

        # Avoid the annoying sorting of keys that `spaces.Dict` does if we pass a
        # regular dict.
        spaces = OrderedDict()  # Need to use this for 3.6.x
        spaces.update(spaces_from_annotations)
        spaces.update(spaces_from_args)  # Arguments overwrite the spaces from the annotations.

        if not spaces:
            raise TypeError(
                "Need to either have type annotations on the class, or pass some "
                "arguments to the constructor!"
            )
        assert all(isinstance(s, gym.Space) for s in spaces.values()), spaces

        super().__init__(spaces=spaces)
        self.spaces = dict(self.spaces)  # Get rid of the OrderedDict.

        # Sequoia-specific check.
        if "x" in self.spaces:
            assert list(self.spaces.keys()).index("x") == 0, self.spaces

        self.dtype = dtype

        # Optional: But just to make sure this works:
        if dataclasses.is_dataclass(self.dtype):
            dtype_fields: List[str] = [f.name for f in dataclasses.fields(self.dtype)]
            # Check that the dtype can handle all the entries of `self.spaces`, so that
            # we won't get any issues when calling `self.dtype(**super().sample())`.
            for space_name, space in self.spaces.items():
                if space_name not in dtype_fields:
                    raise RuntimeError(
                        f"dtype {self.dtype} doesn't have a field for space "
                        f"'{space_name}' ({space})!"
                    )

    def keys(self) -> Sequence[str]:
        return self.spaces.keys()

    def items(self) -> Iterable[Tuple[str, Space]]:
        return self.spaces.items()

    def values(self) -> Sequence[Space]:
        return self.spaces.values()

    def sample(self) -> M:
        dict_sample: dict = super().sample()
        # Gets rid of OrderedDict.
        return self.dtype(**dict_sample)

    def __getattr__(self, attr: str) -> Space:
        if attr != "spaces":
            if attr in self.spaces:
                return self.spaces[attr]
        raise AttributeError(f"Space doesn't have attribute {attr}")

    def __getitem__(self, key: Union[str, int]) -> Space:
        if key not in self.spaces:
            if isinstance(key, int):
                # IDEA: Try to get the item at given index in the keys? a bit like a
                # tuple space?
                # return self[list(self.spaces.keys())[key]]
                pass
        return super().__getitem__(key)

    def __len__(self) -> int:
        return len(self.spaces)

    # def __setitem__(self, key, value):
    #     return super().__setitem__(key, value)

    def contains(self, x: Union[M, Mapping[str, Space]]) -> bool:
        if is_dataclass(x):
            if is_dataclass(self.dtype):
                if not isinstance(x, self.dtype):
                    # NOTE: This could be a bit controversial, since it departs a bit how Dict
                    # does things.
                    return False
            # NOTE: We don't use dataclasses.asdict as it doesn't work with Tensor
            # items with grad attributes.
            x = {f.name: getattr(x, f.name) for f in fields(x)}

        # NOTE: Modifying this so that we allow samples with more values, as long as it
        # has all the required keys.
        if not isinstance(x, (dict, MappingABC)) or not all(k in x for k in self.spaces):
            return False
        for k, space in self.spaces.items():
            if k not in x:
                return False
            if not space.contains(x[k]):
                return False
        return True
        # return super().contains(x)

    def __repr__(self) -> str:
        return (
            f"{str(type(self).__name__)}("
            + ", ".join([f"{k}:{s}" for k, s in self.spaces.items()])
            + (f", dtype={self.dtype}" if self.dtype is not dict else "")
            + ")"
        )

    def __eq__(self, other):
        if isinstance(other, TypedDictSpace) and self.dtype != other.dtype:
            return False
        return super().__eq__(other)


@batch_space.register(TypedDictSpace)
def _batch_typed_dict_space(space: TypedDictSpace, n: int = 1) -> spaces.Dict:
    return type(space)(
        {key: batch_space(subspace, n=n) for (key, subspace) in space.spaces.items()},
        dtype=space.dtype,
    )


@concatenate.register(TypedDictSpace)
def _concatenate_typed_dicts(
    space: TypedDictSpace,
    items: Union[list, tuple],
    out: Union[tuple, dict, np.ndarray],
) -> Dict:
    return space.dtype(
        **{
            key: concatenate(subspace, [item[key] for item in items], out=out[key])
            for (key, subspace) in space.spaces.items()
        }
    )


from sequoia.utils.generic_functions.to_from_tensor import from_tensor, to_tensor

T = TypeVar("T")


@from_tensor.register(TypedDictSpace)
def _(space: TypedDictSpace, sample: Union[T, Mapping]) -> T:
    return space.dtype(
        **{key: from_tensor(sub_space, sample[key]) for key, sub_space in space.spaces.items()}
    )


import torch


@to_tensor.register(TypedDictSpace)
def _(
    space: TypedDictSpace[T],
    sample: Dict[str, Union[np.ndarray, Any]],
    device: torch.device = None,
) -> T:
    return space.dtype(
        **{
            key: to_tensor(subspace, sample=sample[key], device=device)
            for key, subspace in space.items()
        }
    )


================================================
FILE: sequoia/common/spaces/typed_dict_test.py
================================================
from dataclasses import Field, dataclass, fields
from typing import Dict, Iterable, Mapping, Tuple, TypeVar

import gym
import numpy as np
from gym import spaces
from gym.spaces import Box, Discrete
from gym.vector.utils import batch_space

from .typed_dict import TypedDictSpace

T = TypeVar("T")


def test_basic():
    space = TypedDictSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
    )
    v = space.sample()
    print(v)
    assert v in space
    # TODO: Maybe re-use all the tests for gym.spaces.Tuple in the gym repo
    # somehow?

    vanilla_space = spaces.Dict(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
    )
    assert vanilla_space.sample() in space
    assert space.sample() in vanilla_space


def test_supports_dataclasses():
    # IDEA: Wrapper that makes the 'default factory' of each field actually use
    # the 'sample' method from a space associated with each class.

    @dataclass
    class Sample:
        a: np.ndarray
        b: bool
        c: Tuple[int, int]

    space = spaces.Dict(
        a=spaces.Box(0, 1, [2, 2], dtype=np.float64),
        b=spaces.Box(False, True, (), np.bool),
        c=spaces.MultiDiscrete([2, 2]),
    )

    wrapped_space: TypedDictSpace = TypedDictSpace(spaces=space.spaces, dtype=Sample)
    assert isinstance(wrapped_space, spaces.Dict)
    s = Sample(
        a=np.ones([2, 2]),
        b=np.array(False),
        c=np.array([0, 1]),
    )
    assert s in wrapped_space
    assert isinstance(wrapped_space.sample(), Sample)


@dataclass
class StateTransition(Mapping[str, T]):
    current_state: T
    action: int
    next_state: T

    def __post_init__(self):
        self._fields: Dict[str, Field] = {f.name: f for f in fields(self)}

    def __len__(self) -> int:
        return len(self._fields)

    def __getitem__(self, attr: str) -> T:
        if attr not in self._fields:
            raise KeyError(attr)
        return getattr(self, attr)

    def __iter__(self) -> Iterable[str]:
        return iter(self._fields)


def test_basic_with_dtype():
    space = TypedDictSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    v = space.sample()
    assert v in space
    assert isinstance(v, StateTransition)

    normal_space = spaces.Dict(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
    )
    assert normal_space.sample() in space
    # NOTE: this doesn't work when using a dtype that isn't a subclass of dict!
    if issubclass(space.dtype, dict):
        assert space.sample() in normal_space


def test_isinstance():
    space = TypedDictSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    assert isinstance(space, spaces.Dict)
    assert isinstance(space.sample(), StateTransition)


def test_equals_dict_space_with_same_items():
    """Test that a TypedDictSpace is considered equal to aDict space if
    the spaces are in the same order and all equal.
    """
    space = TypedDictSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    dict_space = spaces.Dict(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
    )
    assert space == dict_space
    assert dict_space == space


def test_batch_objets_considered_valid_samples():
    from dataclasses import dataclass

    import numpy as np

    from sequoia.common.batch import Batch

    @dataclass(frozen=True)
    class StateTransitionDataclass(Batch):
        current_state: np.ndarray
        action: int
        next_state: np.ndarray

    space = TypedDictSpace(
        current_state=Box(0, 1, (2, 2), dtype=np.float64),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2), dtype=np.float64),
        dtype=StateTransitionDataclass,
    )
    obs = StateTransitionDataclass(
        current_state=np.ones([2, 2]) / 2,
        action=1,
        next_state=np.zeros([2, 2]),
    )
    assert obs in space
    assert space.sample() in space
    assert isinstance(space.sample(), StateTransitionDataclass)


def test_batch_space():
    space = TypedDictSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    assert batch_space(space, n=5) == TypedDictSpace(
        current_state=Box(0, 1, (5, 2, 2)),
        action=spaces.MultiDiscrete([2, 2, 2, 2, 2]),
        next_state=Box(0, 1, (5, 2, 2)),
        dtype=StateTransition,
    )


def test_batch_space_preserves_dtype():
    space = TypedDictSpace(
        current_state=Box(0, 1, (2, 2)),
        action=Discrete(2),
        next_state=Box(0, 1, (2, 2)),
        dtype=StateTransition,
    )
    batched_space = batch_space(space, n=5)
    assert isinstance(batched_space, TypedDictSpace)
    assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys())
    assert list(batched_space.spaces.keys()) == [
        "current_state",
        "action",
        "next_state",
    ]
    assert batched_space.dtype is StateTransition

    space = TypedDictSpace(
        dict(
            current_state=Box(0, 1, (2, 2)),
            action=Discrete(2),
            next_state=Box(0, 1, (2, 2)),
        ),
        dtype=StateTransition,
    )
    batched_space = batch_space(space, n=5)
    assert isinstance(batched_space, TypedDictSpace)
    assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys())
    assert list(batched_space.spaces.keys()) == [
        "current_state",
        "action",
        "next_state",
    ]
    assert list(batched_space.sample().keys()) == [
        "current_state",
        "action",
        "next_state",
    ]
    assert list(v[0] for v in space.spaces.items()) == [
        "current_state",
        "action",
        "next_state",
    ]
    assert batched_space.dtype is StateTransition

    space = TypedDictSpace(
        dict(
            x=Box(0, 1, (2, 2)),
            action=Discrete(2),
            next_state=Box(0, 1, (2, 2)),
        ),
    )
    batched_space = batch_space(space, n=5)
    assert batched_space.x == Box(0, 1, (5, 2, 2))
    assert isinstance(batched_space, TypedDictSpace)
    assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys())
    assert list(batched_space.spaces.keys()) == ["x", "action", "next_state"]
    assert list(batched_space.sample().keys()) == ["x", "action", "next_state"]
    assert list(v[0] for v in space.spaces.items()) == ["x", "action", "next_state"]


class DummyDictEnv(gym.Env):
    def __init__(self):
        super().__init__()
        self.observation_space = TypedDictSpace(
            x=Box(0, 1, (2, 2)),
            t=Discrete(2),
            done=Box(False, True, (1,), bool),
        )
        self.action_space = spaces.Discrete(10)
        self.reward_space = spaces.Box(-10, 10, shape=(1,), dtype=np.float32)

    def reset(self):
        return self.observation_space.sample()

    def step(self, action):
        return self.observation_space.sample(), self.reward_space.sample(), False, {}

    def seed(self, seed=None):
        seeds = []
        seeds += self.observation_space.seed(seed)
        seeds += self.action_space.seed(seed)
        seeds += self.reward_space.seed(seed)
        return seeds


def test_vector_env():
    env = DummyDictEnv()
    from gym.envs.registration import register
    from gym.vector import make

    register("dummy_foo-v0", entry_point=DummyDictEnv)
    env = make("dummy_foo-v0", num_envs=10)


from typing import Optional

from numpy.typing import ArrayLike

from sequoia.common.batch import Batch


def test_object_with_extra_keys_fits():
    @dataclass(frozen=True)
    class Observation(Batch):
        x: np.ndarray
        t: ArrayLike
        done: Optional[ArrayLike] = None

    space = TypedDictSpace(
        x=spaces.Box(0, 10, (10,), dtype=np.float64), t=spaces.Box(0, 1, (1,), dtype=np.int32)
    )

    obs = Observation(
        x=np.arange(10, dtype=np.float64),
        t=np.array([1], dtype=np.int32),
        done=False,
    )
    assert obs.x in space.x
    assert obs.t in space.t
    assert obs in space


def test_order_of_keys_is_same_in_samples():
    space = TypedDictSpace(x=spaces.Box(0, 10, (10,), dtype=np.int32), t=spaces.Discrete(10))
    expected = ["x", "t"]
    assert list(space.keys()) == expected
    assert list(k for k, v in space.items()) == expected

    assert list(space.sample().keys()) == expected
    assert list(k for k, v in space.sample().items()) == expected
    space.seed(123)
    s = space.sample()
    assert str(s) == f"{{'x': {repr(s['x'])}, 't': {repr(s['t'])}}}"


def test_debugging():
    assert {
        "task_labels": 0,
        "x": np.array([-0.25162117, -0.43992427, 0.42706016, 1.47862901]),
    } in TypedDictSpace(
        x=spaces.Box(-3.4028234663852886e38, 3.4028234663852886e38, (4,), np.float64),
        task_labels=spaces.Discrete(5),
        dtype=dict,
    )


def test_equality():
    s1 = TypedDictSpace(
        x=spaces.Box(-np.inf, np.inf, (39,), np.float32),
        task_labels=spaces.Discrete(10),
        dtype=dict,
    )
    s2 = TypedDictSpace(
        x=spaces.Box(-np.inf, np.inf, (39,), np.float32),
        task_labels=spaces.Discrete(10),
        dtype=dict,
    )
    assert s1 == s2


## IDEA: Creating a space like this, using the same syntax as with TypedDict
# class StateTransitionSpace(TypedDict):
#     current_state: Box = Box(0, 1, (2,2))
#     action: Discrete = Discrete(2)
#     current_state: Box = Box(0, 1, (2,2))

# space = StateTransitionSpace()
# space.sample()


================================================
FILE: sequoia/common/task.py
================================================
""" NOTE: Unused at the moment.

This defines a `Task` object that is just used to represent the information
about a 'Task'.
"""
from dataclasses import dataclass, field
from typing import List

from simple_parsing import list_field

from sequoia.utils.serialization import Serializable


@dataclass
class Task(Serializable):
    """Dataclass that represents a task.

    TODO (@lebrice): This isn't being used anymore, but we could probably
    use it / add it to the Continuum package, if it doesn't already have something
    like it.
    TODO: Maybe the this could also specify from which dataset(s) it is sampled.
    """

    # The index of this task (the order in which it was encountered)
    index: int = field(default=-1, repr=False)
    # All the unique classes present within this task. (order matters)
    classes: List[int] = list_field()


================================================
FILE: sequoia/common/transforms/__init__.py
================================================
from .channels import (
    ChannelsFirst,
    ChannelsFirstIfNeeded,
    ChannelsLast,
    ChannelsLastIfNeeded,
    ThreeChannels,
)
from .compose import Compose
from .split_batch import SplitBatch, split_batch
from .to_tensor import ToTensor, image_to_tensor
from .transform import Transform
from .transform_enum import Transforms


================================================
FILE: sequoia/common/transforms/channels.py
================================================
# from torchvision.transforms import Lambda
from collections.abc import Mapping
from dataclasses import dataclass
from functools import singledispatch
from typing import Any, Iterable, Tuple, Union

import numpy as np
import torch
from gym import spaces
from torch import Tensor

from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace
from sequoia.utils.logging_utils import get_logger

from .transform import Img, Transform
from .utils import is_image

logger = get_logger(__name__)


@singledispatch
def has_channels_last(img_or_shape: Union[Img, Tuple[int, ...], spaces.Box]) -> bool:
    """Returns wether the given image, or image batch, shape, or Space is in
    the channels last format.
    """
    shape = getattr(img_or_shape, "shape", img_or_shape)
    return len(shape) and shape[-1] in {1, 3}


def has_channels_first(img_or_shape: Union[Img, Tuple[int, ...], spaces.Box]) -> bool:
    """Returns wether the given image or image batch, shape, or Space is in
    the channels first format.
    """
    shape = getattr(img_or_shape, "shape", img_or_shape)
    if len(shape) == 3:
        return shape[0] in {1, 3}
    elif len(shape) == 4:
        return shape[1] in {1, 3}
    return False
    # return len(shape) and shape[0 if len(shape) == 3 else 1] in {1, 3}


def channels_last_if_needed(x: Any) -> Any:
    if has_channels_first(x):
        return channels_last(x)
    elif has_channels_last(x):
        return x
    raise RuntimeError(f"Input isn't channels_first or channels_last! {x.shape}")


def channels_first_if_needed(x: Any) -> Any:
    if has_channels_last(x):
        return channels_first(x)
    elif has_channels_first(x):
        return x
    raise RuntimeError(f"Input isn't channels_first or channels_last! {x.shape}")


class NamedDimensions(Transform[Tensor, Tensor]):
    """'Transform' that gives names to the dimensions of input tensors.
    Overwrites existing named dimensions, if any.
    """

    def __init__(self, names: Iterable[str]):
        self.names = tuple(names)

    def __call__(self, tensor: Tensor) -> Tensor:
        return tensor.refine_names(*self.names)


@singledispatch
def three_channels(x: Any) -> Any:
    """Transform that makes the input images have three channels if they don't.

    * New: Also adds names to each dimension, when possible. (edit: off for now)

    For instance, if the input shape is:
    [28, 28] -> [3, 28, 28] (copy the image three times)
    [1, 28, 28] -> [3, 28, 28] (same idea)
    [10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.)

    """
    raise NotImplementedError(f"This doesn't currently support input {x} of type {type(x)}")


@three_channels.register(Tensor)
def _(x: Tensor) -> Tensor:
    names: Tuple[str, ...] = ()
    if x.ndim == 2:
        x = x.reshape([1, *x.shape])
        x = x.repeat(3, 1, 1)
        names = ("C", "H", "W")
    if x.ndim == 3:
        if x.shape[0] == 1:
            x = x.repeat(3, 1, 1)
            names = ("C", "H", "W")
        elif x.shape[-1] == 1:
            x = x.repeat(1, 1, 3)
            names = ("H", "W", "C")
    if x.ndim == 4:
        if x.shape[1] == 1:
            x = x.repeat(1, 3, 1, 1)
            names = ("N", "C", "H", "W")
        elif x.shape[-1] == 1:
            x = x.repeat(1, 1, 1, 3)
            names = ("N", "H", "W", "C")
    # FIXME: Turning this off for now, since using named dimensions
    # generates a whole lot of UserWarnings atm.
    # if isinstance(x, Tensor) and names:
    #     # Cool new pytorch feature!
    #     x.rename(*names)
    return x


@three_channels.register(np.ndarray)
def _(x: np.ndarray) -> np.ndarray:
    if x.ndim == 2:
        # names = ("C", "H", "W")
        x = x.reshape([1, *x.shape])
        x = np.tile(x, [3, 1, 1])
    if x.ndim == 3:
        if x.shape[0] == 1:
            # names = ("C", "H", "W")
            x = np.tile(x, [3, 1, 1])
        elif x.shape[-1] == 1:
            # names = ("H", "W", "C")
            x = np.tile(x, [1, 1, 3])
    if x.ndim == 4:
        if x.shape[1] == 1:
            # names = ("N", "C", "H", "W")
            x = np.tile(x, [1, 3, 1, 1])
        elif x.shape[-1] == 1:
            # names = ("N", "H", "W", "C")
            x = np.tile(x, [1, 1, 1, 3])
    return x


@three_channels.register(spaces.Box)
def _(x: spaces.Box) -> spaces.Box:
    return type(x)(low=three_channels(x.low), high=three_channels(x.high), dtype=x.dtype)


@three_channels.register(torch.Size)
@three_channels.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
    dims = len(x)
    if dims == 2:
        return (3, *x)
    elif dims == 3:
        if x[0] == 1:
            return (3, *x[1:])
        elif x[-1] == 1:
            return (*x[:-1], 3)
    elif dims == 4:
        if x[1] == 1:
            return (x[0], 3, *x[2:])
        elif x[-1] == 1:
            return (*x[:-1], 3)
    return x


@three_channels.register(NamedTupleSpace)
def _three_channels(x: Any) -> Any:
    return type(x)(
        **{key: three_channels(value) if is_image(value) else value for key, value in x.items()},
        dtype=x.dtype,
    )


@three_channels.register(spaces.Dict)
@three_channels.register(Mapping)
def _three_channels(x: Any) -> Any:
    return type(x)(
        **{key: three_channels(value) if is_image(value) else value for key, value in x.items()}
    )


@three_channels.register(TypedDictSpace)
def _three_channels(x: TypedDictSpace) -> TypedDictSpace:
    return type(x)(
        {key: three_channels(value) if is_image(value) else value for key, value in x.items()},
        dtype=x.dtype,
    )


@dataclass
class ThreeChannels(Transform[Tensor, Tensor]):
    """Transform that makes the input images have three tensors.

    * New: Also adds names to each dimension, when possible.

    For instance, if the input shape is:
    [28, 28] -> [3, 28, 28] (copy the image three times)
    [1, 28, 28] -> [3, 28, 28] (same idea)
    [10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.)

    """

    def __call__(self, x: Tensor) -> Tensor:
        return three_channels(x)


@singledispatch
def channels_first(x: Any) -> Any:
    """Re-orders the dimensions of the input from ((n), H, W, C) to ((n), C, H, W).
    If the tensor doesn't have named dimensions, this will ALWAYS re-order the
    dimensions, regarless of if the image or space already has channels first.

    Also converts non-Tensor inputs to tensors using `to_tensor`.
    """
    raise RuntimeError(f"Transform isn't applicable to input {x} of type {type(x)}.")


@channels_first.register(Tensor)
def _(x: Tensor) -> Tensor:
    if x.ndim == 3:
        if any(x.names):
            return x.align_to("C", "H", "W")
        return x.permute(2, 0, 1)  # .to(memory_format=torch.contiguous_format)
    if x.ndim == 4:
        if any(x.names):
            return x.align_to("N", "C", "H", "W")
        return x.permute(0, 3, 1, 2).contiguous()
    return x


@channels_first.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
    if len(x) == 3:
        # TODO: Re-enable the naming of the dimensions at some point.
        return type(x)(x[i] for i in (2, 0, 1))
    if len(x.shape) == 4:
        return type(x)(x[i] for i in (0, 3, 1, 2))
    raise NotImplementedError(x)


@channels_first.register(np.ndarray)
def _(x: spaces.Box) -> spaces.Box:
    if x.ndim == 4:
        return np.moveaxis(x, 3, 1)
    elif x.ndim == 3:
        return np.moveaxis(x, 2, 0)
    else:
        raise NotImplementedError(f"Expected 3-d or 4-d input, got {x}")


@channels_first.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
    if len(x) == 4:
        return type(x)(x[i] for i in (0, 3, 1, 2))
    if len(x) == 3:
        return type(x)(x[i] for i in (2, 0, 1))
    raise NotImplementedError(x)


@channels_first.register(spaces.Box)
def _(x: spaces.Box) -> spaces.Box:
    return type(x)(
        low=channels_first(x.low),
        high=channels_first(x.high),
        dtype=x.dtype,
    )


@dataclass
class ChannelsFirst(Transform[Union[np.ndarray, Tensor], Tensor]):
    """Re-orders the dimensions of the tensor from ((n), H, W, C) to ((n), C, H, W).
    If the tensor doesn't have named dimensions, this will ALWAYS re-order the
    dimensions, regarless of the length of the last dimension.

    Also converts non-Tensor inputs to tensors using `to_tensor`.
    """

    def __call__(self, x: Tensor) -> Tensor:
        return self.apply(x)

    @classmethod
    def apply(cls, x: Tensor) -> Tensor:
        return channels_first(x)

        # if not isinstance(x, Tensor):
        #     raise RuntimeError(f"Transform only applies to Tensors. (Not {x} of type {type(x)}).")

        # # if has_channels_first(x):
        # #     logger.warning(RuntimeWarning(f"Input already seems to have channels first, but this transform will be applied anyway.."))

        # if x.ndim == 3:
        #     if any(x.names):
        #         return x.align_to("C", "H", "W")
        #     return x.permute(2, 0, 1)#.to(memory_format=torch.contiguous_format)
        # if x.ndim == 4:
        #     if any(x.names):
        #         return x.align_to("N", "C", "H", "W")
        #     return x.permute(0, 3, 1, 2).contiguous()
        # return x

    # @staticmethod
    # def shape_change(input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
    #     ndim = len(input_shape)
    #     if ndim == 3:
    #         return tuple(input_shape[i] for i in (2, 0, 1))
    #     elif ndim == 4:
    #         return tuple(input_shape[i] for i in (0, 3, 1, 2))
    #     return input_shape


@dataclass
class ChannelsFirstIfNeeded(ChannelsFirst):
    """Only puts the channels first if the input has channels last."""

    @classmethod
    def apply(cls, x: Tensor) -> Tensor:
        if has_channels_last(x):
            return super().apply(x)
        return x

    # @classmethod
    # def shape_change(cls, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
    #     if has_channels_last(input_shape):
    #         return super().shape_change(input_shape)
    #     return input_shape


@singledispatch
def channels_last(x: Any) -> Any:
    raise NotImplementedError(f"This doesn't support input {x} of type {type(x)}")


@channels_last.register(Tensor)
def _(x: Tensor) -> Tensor:
    if len(x.shape) == 3:
        # TODO: Re-enable the naming of the dimensions at some point.
        # if not x.names:
        #     x.rename("C", "H", "W")
        #     return x.align_to("H", "W", "C")
        return x.permute(1, 2, 0)
    if len(x.shape) == 4:
        return x.permute(0, 2, 3, 1)


@channels_last.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
    if len(x) == 3:
        # TODO: Re-enable the naming of the dimensions at some point.
        return type(x)(x[i] for i in (1, 2, 0))
    if len(x.shape) == 4:
        return type(x)(x[i] for i in (0, 2, 3, 1))
    raise NotImplementedError(x)


@channels_last.register(np.ndarray)
def _(x: np.ndarray) -> np.ndarray:
    if len(x.shape) == 4:
        return np.moveaxis(x, 1, 3)
    elif len(x.shape) == 3:
        return np.moveaxis(x, 0, 2)
    raise NotImplementedError(x.shape)


@channels_last.register(spaces.Box)
def _(x: spaces.Box) -> spaces.Box:
    return type(x)(
        low=channels_last(x.low),
        high=channels_last(x.high),
        dtype=x.dtype,
    )


@dataclass
class ChannelsLast(Transform[Tensor, Tensor]):
    def __call__(self, x: Tensor) -> Tensor:
        return self.apply(x)

    @classmethod
    def apply(cls, x: Tensor) -> Tensor:
        return channels_last(x)


@dataclass
class ChannelsLastIfNeeded(ChannelsLast):
    """Only puts the channels last if the input has channels first."""

    @classmethod
    def apply(cls, x: Tensor) -> Tensor:
        return channels_last_if_needed(x)


================================================
FILE: sequoia/common/transforms/compose.py
================================================
from typing import Callable, List, TypeVar

from gym import spaces
from torchvision.transforms import Compose as ComposeBase

from sequoia.utils.logging_utils import get_logger

from .transform import InputType, OutputType, Transform

logger = get_logger(__name__)

T = TypeVar("T", bound=Callable)


class Compose(List[T], ComposeBase, Transform[InputType, OutputType]):
    """Extend the Compose class of torchvision with methods of `list`.

    This can also be passed in members of the `Transforms` enum, which makes it
    possible to do something like this:
    >>> from .transform_enum import Compose, Transforms
    >>> transforms = Compose([Transforms.to_tensor, Transforms.three_channels,])
    >>> Transforms.three_channels in transforms
    True
    >>> transforms += [Transforms.random_grayscale]
    >>> transforms
    [<Transforms.to_tensor: ToTensor()>, <Transforms.three_channels: ThreeChannels()>, <Transforms.random_grayscale: RandomGrayscale(p=0.1)>]

    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        ComposeBase.__init__(self, transforms=self)

    def __call__(self, img):
        if isinstance(img, spaces.Space):
            for t in self:
                try:
                    img = t(img)
                except:
                    logger.debug(
                        f"Unable to apply transform {t} on space {img}: assuming that transform {t} doesn't change the space."
                    )
            return img
        else:
            for t in self:
                img = t(img)
            return img

    # def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
    #     logger.debug(f"shape_change on Compose: input shape: {input_shape}")
    #     # TODO: Give the impact of this transform on a given input shape.
    #     for transform in self:
    #         logger.debug(f"Shape before transform {transform}: {input_shape}")
    #         shape_change_method: Optional[Callable] = getattr(transform, "shape_change", None)
    #         if shape_change_method and callable(shape_change_method):
    #             input_shape = transform(input_shape)  # type: ignore
    #         else:
    #             logger.debug(
    #                 f"Unable to detect the change of shape caused by "
    #                 f"transform {transform}, assuming its output has same "
    #                 f"shape as its input."
    #             )
    #     logger.debug(f"Final shape: {input_shape}")
    #     return input_shape

    # def space_change(self, input_space: gym.Space) -> gym.Space:
    #     from .transform_enum import Transforms
    #     for transform in self:
    #         if isinstance(transform, Transforms):
    #             transform = transform.value
    #         input_space = transform(input_space)
    #     return input_space


================================================
FILE: sequoia/common/transforms/resize.py
================================================
from collections.abc import Mapping
from functools import singledispatch
from typing import Dict, List, Tuple

import numpy as np
import torch
from gym import spaces
from PIL import Image
from torch import Tensor
from torch.nn.functional import interpolate
from torchvision.transforms import InterpolationMode
from torchvision.transforms import Resize as Resize_
from torchvision.transforms import functional as F

from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support, has_tensor_support
from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace
from sequoia.common.spaces.image import Image as ImageSpace
from sequoia.utils.logging_utils import get_logger

from .channels import channels_first, channels_last, has_channels_first, has_channels_last
from .transform import Img, Transform
from .utils import is_image

logger = get_logger(__name__)


@singledispatch
def resize(x: Img, size: Tuple[int, ...], **kwargs) -> Img:
    """Resizes a PIL.Image, a Tensor, ndarray, or a Box space."""
    raise NotImplementedError(f"Transform doesn't support input {x} of type {type(x)}")


@resize.register
def _(x: Image.Image, size: Tuple[int, ...], **kwargs) -> Image.Image:
    return F.resize(x, size, **kwargs)


@resize.register(np.ndarray)
@resize.register(Tensor)
def _resize_array_or_tensor(x: np.ndarray, size: Tuple[int, ...], **kwargs) -> np.ndarray:
    """TODO: This resizes numpy arrays by converting them to tensors and then
    using the `interpolate` function. There is for sure a more efficient way to
    do this.
    """
    original = x
    if isinstance(original, np.ndarray):
        # Need to convert to tensor (for interpolate to work).
        x = torch.as_tensor(x)
    if len(original.shape) == 3:
        # Need to add a batch dimension (for interpolate to work).
        x = x.unsqueeze(0)
    if has_channels_last(original):
        # Need to make it channels first (for interpolate to work).
        x = channels_first(x)

    assert has_channels_first(x), f"Image needs to have channels first (shape is {x.shape})"

    x = interpolate(x, size, mode="area")
    if isinstance(original, np.ndarray):
        x = x.numpy()
    if len(original.shape) == 3:
        x = x[0]
    if has_channels_last(original):
        x = channels_last(x)
    return x


@resize.register
def _resize_namedtuple_space(
    x: NamedTupleSpace, size: Tuple[int, ...], **kwargs
) -> NamedTupleSpace:
    """When presented with a NamedTupleSpace input, this transform will be
    applied to all 'Image' spaces.
    """
    return type(x)(
        **{
            key: resize(v, size, **kwargs) if isinstance(v, ImageSpace) else v
            for key, v in x._spaces.items()
        }
    )


@resize.register(Mapping)
def _resize_namedtuple(x: Dict, size: Tuple[int, ...], **kwargs) -> Dict:
    """When presented with a Mapping-like input, this transform will be
    applied to all 'Image' spaces.
    """
    return type(x)(
        **{
            key: resize(value, size, **kwargs) if is_image(value) else value
            for key, value in x.items()
        }
    )


@resize.register(TypedDictSpace)
def _resize_typed_dict(x: TypedDictSpace, size: Tuple[int, ...], **kwargs) -> TypedDictSpace:
    """When presented with a Mapping-like input, this transform will be
    applied to all 'Image' spaces.
    """
    return type(x)(
        {
            key: resize(value, size, **kwargs) if is_image(value) else value
            for key, value in x.items()
        },
        dtype=x.dtype,
    )


@resize.register(tuple)
def _resize_image_shape(x: Tuple[int, ...], size: Tuple[int, ...], **kwargs) -> Tuple[int, ...]:
    """Give the resized image shape, given the input shape."""
    new_shape: List[int] = list(size)
    if len(size) == 2:
        # Preserve the number of channels.
        if len(x) == 4:
            if has_channels_first(x):
                new_shape = [*x[:2], *size]
            elif has_channels_last(x):
                new_shape = [x[0], *size, x[-1]]
            else:
                raise NotImplementedError(x)
        elif len(x) == 3:
            if has_channels_first(x):
                new_shape = [x[0], *size]
            elif has_channels_last(x):
                new_shape = [*size, x[-1]]
            else:
                raise NotImplementedError(x)
    else:
        NotImplementedError(size)
    return type(x)(new_shape)


@resize.register(spaces.Box)
def _resize_space(x: spaces.Box, size: Tuple[int, ...], **kwargs) -> spaces.Box:
    # Hmm, not sure if the bounds would actually also be respected though.
    new_space = type(x)(
        low=resize(x.low, size, **kwargs),
        high=resize(x.high, size, **kwargs),
        dtype=x.dtype,
    )
    # If the 'old' space supported tensors as samples, then so will the new space.
    if has_tensor_support(x):
        return add_tensor_support(new_space)
    return new_space


class Resize(Resize_, Transform[Img, Img]):
    def __init__(self, size: Tuple[int, ...], interpolation=InterpolationMode.BILINEAR):
        super().__init__(size, interpolation)
        # self.size = size
        # self.interpolation = interpolation

    def __call__(self, img):
        # TODO: (@lebrice) Weirdly enough, it seems that even though we
        # implement forward below, and __call__ is supposed to just use
        # `forward`, the base class somehow doesn't use our implementation, so
        # the test
        # env_dataset_test.py::test_iteration_with_more_than_one_wrapper would
        # fail if we don't have this __call__ explicitly implemented,
        return self.forward(img)

    def forward(self, img: Img) -> Img:
        return resize(img, size=self.size)


================================================
FILE: sequoia/common/transforms/split_batch.py
================================================
import dataclasses
from typing import Any, Callable, Optional, Tuple, Type, TypeVar

import numpy as np
from torch import Tensor

from ..batch import Batch
from .transform import Transform

# Type variables just for the below function.
ObservationType = TypeVar("ObservationType", bound=Batch)
RewardType = TypeVar("RewardType", bound=Batch)


class SplitBatch(Transform[Any, Tuple[ObservationType, RewardType]]):
    """
    Transform that will split batches into Observations and Rewards.

    The provided observation and reward types (which have to be subclasses of
    the `Batch` class) will be used to construct the observation and reward
    objects, respectively.

    To make this simpler, this callable will always return an Observation and a
    Reward object, even when the batch is unlabeled. In that case, the Reward
    object will have a 'None' passed for any of its required arguments.

    Parameters
    ----------
    observation_type : Type[ObservationType]
        [description]
    reward_type : Type[RewardType]
        [description]

    Returns
    -------
    Callable[[Any], Tuple[ObservationType, RewardType]]
        [description]

    Raises
    ------
    RuntimeError
        If the observation_type or reward_type don't both subclass Batch.
    NotImplementedError
        If the type of the batch isn't supported.
    RuntimeError
        [description]
    NotImplementedError
        [description]
    """

    def __init__(self, observation_type: Type[ObservationType], reward_type: Type[RewardType]):
        self.Observations = observation_type
        self.Rewards = reward_type
        self.func = split_batch(observation_type=observation_type, reward_type=reward_type)

    def __call__(self, batch: Any) -> Tuple[ObservationType, RewardType]:
        return self.func(batch)


def split_batch(
    observation_type: Type[ObservationType], reward_type: Type[RewardType]
) -> Callable[[Any], Tuple[ObservationType, Optional[RewardType]]]:
    """Makes a callable that will split batches into Observations and Rewards.

    The provided observation and reward types (which have to be subclasses of
    the `Batch` class) will be used to construct the observation and reward
    objects, respectively.

    To make this simpler, this callable will always return a tuple with an
    Observation and an optional Reward object, even when the batch is unlabeled.
    In that case, the Reward will be None.

    Parameters
    ----------
    observation_type : Type[ObservationType]
        [description]
    reward_type : Type[RewardType]
        [description]

    Returns
    -------
    Callable[[Any], Tuple[ObservationType, RewardType]]
        [description]

    Raises
    ------
    RuntimeError
        If the observation_type or reward_type don't both subclass Batch.
    NotImplementedError
        If the type of the batch isn't supported.
    RuntimeError
        [description]
    NotImplementedError
        [description]
    """
    if not (issubclass(observation_type, Batch) and issubclass(reward_type, Batch)):
        raise RuntimeError(
            "Both `observation_type` and `reward_type` need to " "inherit from `Batch`!"
        )

    # Get the min, max and total number of args for each object type.
    min_for_obs = n_required_fields(observation_type)
    max_for_obs = n_fields(observation_type)
    n_required_for_obs = min_for_obs
    n_optional_for_obs = max_for_obs - min_for_obs

    min_for_rew = n_required_fields(reward_type)
    max_for_reward = n_fields(reward_type)
    n_required_for_rew = min_for_rew
    n_optional_for_rew = max_for_reward - min_for_obs

    min_items = min_for_obs + min_for_rew
    max_items = max_for_obs + max_for_reward

    def split_batch_transform(batch: Any) -> Tuple[ObservationType, RewardType]:
        if isinstance(batch, (Tensor, np.ndarray)):
            batch = (batch,)

        if isinstance(batch, dict):
            obs_fields = observation_type.field_names
            rew_fields = reward_type.field_names
            assert not set(obs_fields).intersection(
                set(rew_fields)
            ), "Observation and Reward shouldn't share fields names"
            obs_kwargs = {k: v for k, v in batch.items() if k in obs_fields}
            obs = observation_type(**obs_kwargs)
            reward_kwargs = {k: v for k, v in batch.items() if k in rew_fields}
            reward = reward_type(**reward_kwargs)
            return obs, reward

        if isinstance(batch, observation_type):
            return batch, None

        if not isinstance(batch, (tuple, list)):
            # TODO: Add support for more types maybe? Or just wrap it in a tuple
            # and call it a day?
            raise RuntimeError(f"Batch is of an unsuported type: {type(batch)}.")

        # If the batch already has two elements, check if they are already of
        # the right type, to avoid unnecessary computation below.
        if len(batch) == 2:
            obs, rew = batch
            if isinstance(obs, observation_type) and isinstance(rew, reward_type):
                return obs, rew

        n_items = len(batch)
        if n_items < min_items or n_items > max_items:
            raise RuntimeError(
                f"There aren't the right number of elements in the batch to "
                f"create both an Observation and a Reward!\n"
                f"(batch has {n_items} items, but type "
                f"{observation_type} requires from {min_for_obs} to "
                f"{max_for_obs} args, while {reward_type} requires from "
                f"{min_for_rew} to {max_for_reward} args. "
            )

        # Batch looks like:
        # [
        #     O_1, O_2, ..., O_{min_obs}, (O_{min_obs+1}), ..., (O_{max_obs}),
        #     R_1, R_2, ..., R_{min_rew}, (R_{min_rew+1}), ..., (R_{max_rew}),
        # ]
        if n_items == 0:
            obs = observation_type()
            rew = reward_type()
        if n_items == max_items:
            # Easiest case! Just use all the values.
            obs = observation_type(*batch[:max_for_obs])
            rew = reward_type(*batch[max_for_obs:])
        elif n_items == min_items:
            # Easy case as well. Also simply uses all the values directly.
            obs = observation_type(*batch[:min_for_obs])
            rew = reward_type(*batch[min_for_obs:])
        elif n_optional_for_obs == 0 and n_optional_for_rew != 0:
            # All the extra args go in the reward.
            obs = observation_type(*batch[:min_for_obs])
            rew = reward_type(*batch[min_for_obs:])
        elif n_optional_for_obs != 0 and n_optional_for_rew == 0:
            # All the extra args go in the observation.
            obs = observation_type(*batch[:max_for_obs])
            rew = reward_type(*batch[max_for_obs:])
        else:
            # We can't tell where the 'extra' tensors should go.

            # TODO: Maybe just assume that all the 'extra' tensors are meant to
            # be part of the observation? or the reward? For instance:
            # Option 1: All the extra args go in the observation:
            # obs = Observation(*batch[:n_items-n_required_for_rew])
            # rew = Observation(*batch[n_items-n_required_for_rew:])
            # Option 2: All the extra args go in the reward:
            # obs = Observation(*batch[:n_required_for_obs])
            # rew = Observation(*batch[n_required_for_obs:])
            n_extra = n_items - min_items
            max_extra = n_optional_for_obs + n_optional_for_rew
            raise NotImplementedError(
                f"Can't tell where to put these extra tensors!\n"
                f"(batch has {n_items} items, but type "
                f"{observation_type} requires from {min_for_obs} to "
                f"{max_for_obs} args, while {reward_type} requires from "
                f"{min_for_rew} to {max_for_reward} args. There are "
                f"{n_extra} extra items out of a potential of {max_extra}."
            )
        return obs, rew

    return split_batch_transform


def n_fields(batch_type: Type[Batch]) -> int:
    """Helper function, gives back the total number of fields in Batch subclass.

    Parameters
    ----------
    batch_type : Type
        A subclass of Batch.

    Returns
    -------
    int
        The total number of fields in the type. See the `fields` function of the
        `dataclasses` package for more info.
    """
    return len(dataclasses.fields(batch_type))


def n_required_fields(batch_type: Type) -> int:
    """Helper function, gives the number of required fields in the dataclass.

    Parameters
    ----------
    batch_type : Type
        [description]

    Returns
    -------
    int
        The number of fields which don't have a default value or a default
        factory and are required by the constructor (have init=True).
    """
    # Need to figure out a way to get the number fields through the
    # class itself.
    fields = dataclasses.fields(batch_type)
    required_fields_names = [
        f.name
        for f in fields
        if f.default is dataclasses.MISSING and f.default_factory is dataclasses.MISSING and f.init
    ]
    # print(f"class {batch_type}: required fields: {required_fields_names}")
    return len(required_fields_names)


================================================
FILE: sequoia/common/transforms/to_tensor.py
================================================
""" Slight modification of the ToTensor transform from TorchVision.

@lebrice: I wrote this because I would often get weird 'negative stride in
images' errors when converting PIL images from some gym environments when
using `ToTensor` from torchvision.
"""
from collections.abc import Mapping
from dataclasses import dataclass
from functools import singledispatch
from typing import Dict, Sequence, Tuple, Union

import gym
import numpy as np
import torch
from gym import spaces
from PIL.Image import Image
from torch import Tensor
from torchvision.transforms import ToTensor as ToTensor_
from torchvision.transforms import functional as F

from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace
from sequoia.utils.logging_utils import get_logger

from .channels import channels_first_if_needed
from .transform import Img, Transform

logger = get_logger(__name__)


def copy_if_negative_strides(image: Img) -> Img:
    # It sometimes happens when taking images from a gym env that the strides
    # are negative, for some reason. Therefore we need to copy the array
    # before we can call torchvision.transforms.functional.to_tensor(image).
    if isinstance(image, Image):
        image = np.array(image)

    if isinstance(image, np.ndarray):
        strides = image.strides
    elif isinstance(image, Tensor):
        strides = image.stride()
    elif hasattr(image, "strides"):
        strides = image.strides
    else:
        raise NotImplementedError(f"Can't get strides of object {image}")
    if any(s < 0 for s in strides):
        return image.copy()
    return image


@singledispatch
def image_to_tensor(image: Union[Img, Sequence[Img], gym.Space]) -> Union[Tensor, gym.Space]:
    """
    Converts a PIL Image or numpy.ndarray ((N) x H x W x C) in the range
    [0, 255] to a torch.FloatTensor of shape ((N) x C x H x W) in the range
    [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F,
    RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8

    Parameters
    ----------
    image : Union[Img, Sequence[Img]]
        [description]

    Returns
    -------
    Tensor
        [description]
    """
    raise NotImplementedError(f"Don't know how to convert {image} to a Tensor.")


# @image_to_tensor.register
# def _(image: Tensor) -> Tensor:
#     return channels_first_if_needed(image)


@image_to_tensor.register(Tensor)
@image_to_tensor.register(np.ndarray)
@image_to_tensor.register(Image)
def _(image: Union[Image, np.ndarray]) -> Tensor:
    """Converts a PIL Image, or np.uint8 ndarray to a Tensor. Also reshapes it
    to channels_first format (because ToTensor from torchvision does it also).
    """
    from .channels import channels_first_if_needed

    image = copy_if_negative_strides(image)

    if len(image.shape) == 2:
        return F.to_tensor(image)

    if isinstance(image, np.ndarray):
        # Convert to channels last if needed, because ToTensor expects to
        # receive that.
        image = channels_first_if_needed(image)
        image = torch.from_numpy(image).contiguous()
        # backward compatibility
        if isinstance(image, torch.ByteTensor):
            image = image.float().div(255)
        return image

    if len(image.shape) == 4:
        return channels_first_if_needed(torch.stack(list(map(image_to_tensor, image))))

    if not isinstance(image, Tensor):
        image = F.to_tensor(image)
    return channels_first_if_needed(image)


@image_to_tensor.register(list)
def _list_of_images_to_tensor(image: Sequence[Img]) -> Tensor:
    return torch.stack(list(map(image_to_tensor, image)))


@image_to_tensor.register(tuple)
def _to_tensor_effect_on_image_shape(image: Tuple[int, ...]) -> Tuple[int, ...]:
    """Give the output shape given the input shape of an image."""
    if len(image) == 3:
        from .channels import channels_first_if_needed

        return channels_first_if_needed(image)
    return image


@image_to_tensor.register(spaces.Box)
def _(image: spaces.Box) -> spaces.Box:
    if image.dtype == np.uint8:
        # images get their bounds changed to [0. 1.] and their shape changed to
        # channels_first.
        image = type(image)(
            low=0.0, high=1.0, shape=channels_first_if_needed(image.shape), dtype=np.float32
        )
    # TODO: it sometimes happens that the `image` space has already been
    # through 'to_tensor`, not sure what to do in that case.
    # elif not has_tensor_support(image):
    #     raise RuntimeError(f"image spaces should have dtype np.uint8!: {image}")
    # Since the transform would convert images / ndarrays to tensors, then we
    # add 'Tensor' support when applying the same transform on the Space of
    # images!
    image = add_tensor_support(image)
    return image


@image_to_tensor.register(NamedTupleSpace)
def _(space: Dict, device: torch.device = None) -> Dict:
    from .resize import is_image

    return type(space)(
        **{
            key: image_to_tensor(value) if is_image(value) else value
            for key, value in space.items()
        },
        dtype=space.dtype,
    )


@image_to_tensor.register(Mapping)
@image_to_tensor.register(spaces.Dict)
def _space_with_images_to_tensor(space: Dict, device: torch.device = None) -> Dict:
    from .resize import is_image

    return type(space)(
        **{
            key: image_to_tensor(value) if is_image(value) else value
            for key, value in space.items()
        }
    )


@image_to_tensor.register(TypedDictSpace)
def _space_with_images_to_tensor(
    space: TypedDictSpace, device: torch.device = None
) -> TypedDictSpace:
    from .resize import is_image

    return type(space)(
        {key: image_to_tensor(value) if is_image(value) else value for key, value in space.items()},
        dtype=space.dtype,
    )


# @image_to_tensor.register(Image)
# def to_tensor(image: Union[Img, Sequence[Img]]) -> Tensor:

#     tensor: Tensor
#     if isinstance(image, Tensor):
#         return channels_first(image)
#         return image
#         # return channels_first(image)

#     if isinstance(image, (list, tuple)) or (isinstance(image, np.ndarray) and image.ndim == 4):
#         return torch.stack(list(map(to_tensor, image)))

#     assert isinstance(image, (np.ndarray, Image))
#     image = copy_if_negative_strides(image)

#     if isinstance(image, np.ndarray):
#         # Convert to channels last if needed, because ToTensor expects to
#         # receive that.
#         if len(image.shape) == 2:
#             pass
#         elif image.shape[-1] not in {1, 3}:
#             assert image.shape[0] in {1, 3}, image.shape
#             image = image.transpose(1, 2, 0)
#         # image = channels_last(image)
#     image = F.to_tensor(image)
#     assert isinstance(image, Tensor), image.shape
#     return image


@dataclass
class ToTensor(ToTensor_, Transform):
    def __call__(self, image):
        """
        Args:
            image (PIL Image or numpy.ndarray): Image to be converted to tensor.

        Returns:
            Tensor: Converted image.

        NOTE: torchvision's ToTensor transform assumes that whatever it is given
        is always in channels_last format (as is usually the case with PIL
        images) and always returns images with the channels *first*!

            Converts a PIL Image or numpy.ndarray (H x W x C) in the range
            [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range
            [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P,
            I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has
            dtype = np.uint8
        """
        return image_to_tensor(image)

    # @classmethod
    # def shape_change(cls, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
    #     from .channels import ChannelsFirstIfNeeded
    #     return ChannelsFirstIfNeeded.shape_change(input_shape)

    # @classmethod
    # def space_change(cls, input_space: gym.Space) -> gym.Space:
    #     if not isinstance(input_space, spaces.Box):
    #         logger.warning(UserWarning(f"Transform {cls} is only meant for Box spaces, not {input_space}"))
    #         return input_space
    #     return spaces.Box(
    #         low=0.,
    #         high=1.,
    #         shape=cls.shape_change(input_space.shape),
    #         dtype=np.float32,
    #     )


================================================
FILE: sequoia/common/transforms/transform.py
================================================
""" Defines a 'smarter' Transform class. """
from abc import abstractmethod
from typing import Generic, Tuple, TypeVar, Union, overload

import numpy as np
from gym import Space
from PIL.Image import Image
from torch import Tensor

InputType = TypeVar("InputType")
OutputType = TypeVar("OutputType")

Img = TypeVar("Img", Image, np.ndarray, Tensor)
Shape = TypeVar("Shape", bound=Tuple[int, ...])


class Transform(Generic[InputType, OutputType]):
    """Callable that can also tell you its impact on the shape of inputs."""

    @overload
    def __call__(self, input: InputType) -> OutputType:
        ...

    @overload
    def __call__(self, input: Shape) -> Shape:
        ...

    @overload
    def __call__(self, input: Space) -> Space:
        ...

    @abstractmethod
    def __call__(self, input: Union[InputType, Space, Shape]) -> Union[OutputType, Space, Shape]:
        pass


================================================
FILE: sequoia/common/transforms/transform_enum.py
================================================
""" Transforms and such. Trying to make it possible to parse such from the
command-line.

Also, playing around with the idea of adding the ability to predict the change
in shape resulting from the transforms, à-la-Tensorflow.

"""

from enum import Enum
from typing import Any, Callable, List, Tuple, TypeVar, Union

import gym
import torch
from simple_parsing.helpers.serialization.encoding import encode
from torchvision.transforms import Compose as ComposeBase
from torchvision.transforms import RandomGrayscale

from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import decode

logger = get_logger(__name__)

from .channels import (
    ChannelsFirst,
    ChannelsFirstIfNeeded,
    ChannelsLast,
    ChannelsLastIfNeeded,
    ThreeChannels,
)
from .resize import Resize
from .to_tensor import ToTensor
from .transform import Transform


# TODO: Add names to the dimensions in the transforms!
# from pl_bolts.models.self_supervised.simclr import (SimCLREvalDataTransform,
#                                                     SimCLRTrainDataTransform)
class Transforms(Enum):
    """Enum of possible transforms.

    By having this as an Enum, we can choose which transforms to use from the
    command-line.
    This also makes it easier to check for identity, e.g. to check wether a
    particular transform was used.

    TODO: Add the SimCLR/MOCO/etc transforms from  https://pytorch-lightning-bolts.readthedocs.io/en/latest/transforms.html
    TODO: Figure out a way to let people customize the arguments to the transforms?
    """

    three_channels = ThreeChannels()
    to_tensor = ToTensor()
    random_grayscale = RandomGrayscale()
    channels_first = ChannelsFirst()
    channels_first_if_needed = ChannelsFirstIfNeeded()
    channels_last = ChannelsLast()
    channels_last_if_needed = ChannelsLastIfNeeded()
    resize_64x64 = Resize((64, 64))
    resize_32x32 = Resize((32, 32))

    def __call__(self, x):
        return self.value(x)

    @classmethod
    def _missing_(cls, value: Any):
        # called whenever performing something like Transforms[<something>]
        # with <something> not being one of the enum values.
        for e in cls:
            if e.name == value:
                return e
            elif type(e.value) == type(value):
                return e
        return super()._missing_(value)

    def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
        raise NotImplementedError(f"TODO: Add shape (tuple) support to {self}")
        if isinstance(self.value, Transform):
            return self.value.shape_change(input_shape)

    def space_change(self, input_space: gym.Space) -> gym.Space:
        raise NotImplementedError(f"TODO: Add space support to {self}")
        if isinstance(self.value, Transform):
            return self.value.space_change(input_space)


T = TypeVar("T", bound=Callable)


class Compose(List[T], ComposeBase):
    """Extend the Compose class of torchvision with methods of `list`.

    This can also be passed in members of the `Transforms` enum, which makes it
    possible to do something like this:
    >>> transforms = Compose([Transforms.to_tensor, Transforms.three_channels,])
    >>> Transforms.three_channels in transforms
    True
    >>> transforms += [Transforms.resize_32x32]
    >>> from pprint import pprint
    >>> pprint(transforms)
    [<Transforms.to_tensor: ToTensor()>,
     <Transforms.three_channels: ThreeChannels()>,
     <Transforms.resize_32x32: Resize(size=(32, 32), interpolation=bilinear)>]

    NEW: This Compose transform also applies on gym spaces:

    >>> import numpy as np
    >>> from gym.spaces import Box
    >>> image_space = Box(0, 255, (28, 28, 1), dtype=np.uint8)
    >>> transforms(image_space)
    TensorBox(0.0, 1.0, (3, 32, 32), torch.float32)
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        ComposeBase.__init__(self, transforms=self)

    # def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
    #     for transform in self:
    #         if isinstance(transform, Transforms):
    #             transform = transform.value
    #         if isinstance(transform, Transform) or hasattr(transform, "shape_change"):
    #             input_shape = transform.shape_change(input_shape)
    #         else:
    #             logger.debug(
    #                 f"Unable to detect the change of shape caused by "
    #                 f"transform {transform}, assuming its output has same "
    #                 f"shape as its input."
    #             )
    #     logger.debug(f"Final shape: {input_shape}")
    #     return input_shape


@encode.register
def encode_transforms(v: Transforms) -> str:
    return v.name


@decode.register
def decode_transforms(v: str) -> Transforms:
    return Transforms[v]


if __name__ == "__main__":
    import doctest

    doctest.testmod()


================================================
FILE: sequoia/common/transforms/transforms_test.py
================================================
from dataclasses import dataclass, field
from typing import List, Tuple

import gym
import numpy as np
import pytest
import torch
from gym import spaces

from sequoia.conftest import requires_pyglet
from sequoia.utils.serialization import Serializable

from . import Compose, Transforms


@pytest.mark.parametrize(
    "transform,input_shape,output_shape",
    [
        ## Channels first:
        (Transforms.channels_first, (9, 9, 3), (3, 9, 9)),
        # Check that the ordering doesn't get messed up:
        (Transforms.channels_first, (9, 12, 3), (3, 9, 12)),
        (Transforms.channels_first, (400, 600, 3), (3, 400, 600)),
        # Axes get permuted even when the channels are already 'first'.
        (Transforms.channels_first, (3, 12, 9), (9, 3, 12)),
        ## Channels first (if needed):
        (Transforms.channels_first_if_needed, (9, 9, 3), (3, 9, 9)),
        (Transforms.channels_first_if_needed, (9, 12, 3), (3, 9, 12)),
        (Transforms.channels_first_if_needed, (400, 600, 3), (3, 400, 600)),
        # Axes do NOT get permuted when the channels are already 'first'.
        (Transforms.channels_first_if_needed, (3, 12, 9), (3, 12, 9)),
        # Does nothing when the channel dim isn't in {1, 3}:
        (Transforms.channels_first_if_needed, (7, 12, 13), (7, 12, 13)),
        (Transforms.channels_first_if_needed, (7, 12, 123), (7, 12, 123)),
        # when the input is 4-dimensional with batch size of 1 or 3, still works:
        (Transforms.channels_first_if_needed, (1, 28, 12, 3), (1, 3, 28, 12)),
        (Transforms.channels_first_if_needed, (1, 400, 600, 3), (1, 3, 400, 600)),
        (Transforms.channels_first_if_needed, (1, 3, 28, 27), (1, 3, 28, 27)),
        (Transforms.channels_first_if_needed, (3, 28, 12, 3), (3, 3, 28, 12)),
        (Transforms.channels_first_if_needed, (3, 400, 600, 3), (3, 3, 400, 600)),
        (Transforms.channels_first_if_needed, (3, 3, 28, 27), (3, 3, 28, 27)),
        ## Channels Last:
        (Transforms.channels_last, (3, 9, 9), (9, 9, 3)),
        # Check that the ordering doesn't get messed up:
        (Transforms.channels_last, (3, 9, 12), (9, 12, 3)),
        # Axes get permuted even when the channels are already 'last'.
        (Transforms.channels_last, (5, 6, 1), (6, 1, 5)),
        ## Channels Last (if needed):
        (Transforms.channels_last_if_needed, (3, 9, 9), (9, 9, 3)),
        # Check that the ordering doesn't get messed up:
        (Transforms.channels_last_if_needed, (3, 9, 12), (9, 12, 3)),
        # Axes do NOT get permuted when the channels are already 'last':
        (Transforms.channels_last_if_needed, (5, 6, 1), (5, 6, 1)),
        (Transforms.channels_last_if_needed, (12, 13, 3), (12, 13, 3)),
        # Test out the 'ThreeChannels' transform
        (Transforms.three_channels, (7, 12, 13), (7, 12, 13)),
        (Transforms.three_channels, (1, 28, 28), (3, 28, 28)),
        (Transforms.three_channels, (28, 28, 1), (28, 28, 3)),
        # Test out the 'Resize' transforms
        (Transforms.resize_64x64, (3, 128, 128), (3, 64, 64)),
        (Transforms.resize_64x64, (128, 128, 3), (64, 64, 3)),
        (Transforms.resize_64x64, (3, 64, 64), (3, 64, 64)),
        (Transforms.resize_64x64, (64, 64, 3), (64, 64, 3)),
        (Transforms.resize_64x64, (3, 111, 128), (3, 64, 64)),
        (Transforms.resize_64x64, (111, 128, 3), (64, 64, 3)),
    ],
)
def test_transform(transform: Transforms, input_shape, output_shape):
    x = torch.rand(input_shape)
    assert transform(x).shape == output_shape, transform

    # Apply the transform onto the input shape directly:
    assert transform(input_shape) == output_shape

    input_space = spaces.Box(low=0, high=1, shape=input_shape)
    output_space = spaces.Box(low=0, high=1, shape=output_shape)

    # Apply the transform onto the input space directly:
    actual_output_space = transform(input_space)
    assert actual_output_space == output_space

    # TODO: Test that serializing / deserializing the transforms works correctly.
    @dataclass
    class Foo(Serializable):
        transforms: List[Transforms] = field(default_factory=list)

    foo = Foo(transforms=[transform])
    foo_ = Foo.loads_json(foo.dumps_json())
    assert foo_ == foo
    assert Compose(foo_.transforms)(x).shape == output_shape
    assert Compose(foo_.transforms)(input_space) == output_space


@pytest.mark.parametrize(
    "transform,input_shape,output_shape",
    [
        # NOTE: to_tensor also does the channels-first operation (because since the
        # torchvision transform ToTensor does it, we do it also).
        (Transforms.to_tensor, (9, 9, 3), (3, 9, 9)),
        (Transforms.to_tensor, (3, 9, 9), (3, 9, 9)),
    ],
)
def test_to_tensor(transform: Transforms, input_shape, output_shape):
    x = np.random.randint(0, 255, input_shape, dtype=np.uint8)
    # x = PIL.Image.fromarray(x, mode="RGB")
    y = transform(x)
    assert y.shape == output_shape
    assert transform(input_shape) == output_shape
    assert isinstance(y, torch.Tensor)

    input_space = spaces.Box(low=0, high=255, shape=input_shape, dtype=np.uint8)
    output_space = spaces.Box(low=0, high=1, shape=output_shape, dtype=np.float32)

    assert transform(input_space) == output_space


@pytest.mark.parametrize(
    "transform, input_shape",
    [
        (Transforms.channels_last_if_needed, (7, 12, 13)),
    ],
)
def test_applying_transforms_on_weird_input_raises_error(
    transform: Transforms, input_shape: Tuple[int, ...]
):
    with pytest.raises(Exception):
        transform(input_shape)

    input_space = spaces.Box(low=0, high=255, shape=input_shape, dtype=np.uint8)
    with pytest.raises(Exception):
        transform(input_space)

    with pytest.raises(Exception):
        transform(input_space.sample())


def test_compose_applied_on_shape():
    transform = Compose([Transforms.channels_first])
    start_shape = (9, 9, 3)
    x = transform(torch.rand(start_shape))
    assert x.shape == (3, 9, 9)
    assert x.shape == transform(start_shape)
    assert x.shape == transform(start_shape) == (3, 9, 9)


import gym

from sequoia.common.gym_wrappers import PixelObservationWrapper, TransformObservation


@requires_pyglet
def test_channels_first_transform_on_gym_env():
    env = gym.make("CartPole-v0")
    env = PixelObservationWrapper(env)
    assert env.reset().shape == (400, 600, 3)

    transform = Compose(
        [
            Transforms.to_tensor,
            Transforms.channels_first_if_needed,
        ]
    )
    env = TransformObservation(env, transform)
    assert env.reset().shape == (3, 400, 600)
    assert env.observation_space.shape == (3, 400, 600)

    obs, *_ = env.step(env.action_space.sample())
    assert obs.shape == (3, 400, 600)


def test_preserves_device_when_possible():
    # TODO: Write a test that checks which transforms can be run on GPU, and checks
    # that they preserve the `device` attribute of a space when it's applied on a space.
    pass


================================================
FILE: sequoia/common/transforms/utils.py
================================================
from typing import Any

import numpy as np
from gym import spaces
from PIL import Image
from torch import Tensor

from sequoia.common.spaces.image import Image as ImageSpace


def is_image(v: Any) -> bool:
    """Returns wether the value is an Image, an image tensor, or an image
    space.
    """
    return (
        isinstance(v, Image.Image)
        or (isinstance(v, (Tensor, np.ndarray)) and len(v.shape) >= 3)
        or isinstance(v, ImageSpace)
        or isinstance(v, spaces.Box)
        and len(v.shape) >= 3
    )


================================================
FILE: sequoia/common.puml
================================================
@startuml common

!include gym.puml

' class List

package common {
    abstract class Batch {}

    package transforms as common.transforms {
        enum Transforms {
            to_tensor: ToTensor
            three_channels: ThreeChannels
            random_grayscale: RandomGrayscale
            channels_first: ChannelsFirst
            channels_last: ChannelsLast
            resize_64x64: Resize
            resize_32x32: Resize
            ...
        }
        abstract class Transform
        class Compose extends torchvision.transforms.Compose {
        }
    }

    package gym_wrappers as common.gym_wrappers {}
    package spaces as common.spaces {}
}
@enduml


================================================
FILE: sequoia/conftest.py
================================================
import json
import logging
import sys
from pathlib import Path
from typing import Any, Iterable, List, Optional, Type, get_type_hints

import gym
import numpy as np
import pytest

from sequoia.common.config import Config
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Method
from sequoia.settings.rl.envs import (
    ATARI_PY_INSTALLED,
    METAWORLD_INSTALLED,
    MONSTERKONG_INSTALLED,
    MTENV_INSTALLED,
    MUJOCO_INSTALLED,
)
from sequoia.methods import AVALANCHE_INSTALLED, SB3_INSTALLED


# Prevent the collection of these modules if the requirements for them aren't installed.
collect_ignore = []
collect_ignore_glob = []
if not MONSTERKONG_INSTALLED:
    collect_ignore.append("settings/rl/envs/monsterkong.py")
if not MUJOCO_INSTALLED:
    collect_ignore.append("settings/rl/envs/mujoco")
if not AVALANCHE_INSTALLED:
    collect_ignore.append("methods/avalanche_methods")
if not SB3_INSTALLED:
    collect_ignore.append("methods/stable_baselines3_methods")
logger = logging.getLogger(__name__)

parametrize = pytest.mark.parametrize

xfail = pytest.mark.xfail


def xfail_param(*args, reason: str):
    return pytest.param(*args, marks=pytest.mark.xfail(reason=reason))


def skip_param(*args, reason: str):
    return pytest.param(*args, marks=pytest.mark.skip(reason=reason))


def skipif_param(condition, *args, reason: str):
    return pytest.param(*args, marks=pytest.mark.skipif(condition, reason=reason))


@pytest.fixture(autouse=True)
def add_np(doctest_namespace):
    doctest_namespace["np"] = np


@pytest.fixture()
def trainer_config(tmp_path_factory):
    tmp_path = tmp_path_factory.mktemp("log_dir")
    return TrainerConfig(
        fast_dev_run=True,
        # TODO: What if we don't have a GPU when testing?
        # TODO: Parametrize with the distributed backend, skip param if no GPU?
        distributed_backend="dp",
        default_root_dir=tmp_path,
    )


@pytest.fixture()
def config(tmp_path: Path):
    # TODO: Set the results dir somehow with the value of this `tmp_path` fixture.
    tmp_results_dir = tmp_path / "tmp_results"
    tmp_results_dir.mkdir()
    return Config(debug=True, seed=123, log_dir=tmp_results_dir)


@pytest.fixture(scope="session")
def session_config(tmp_path_factory: Path):
    test_log_dir = tmp_path_factory.mktemp("test_log_dir")
    # TODO: Set the results dir somehow with the value of this `tmp_path` fixture.
    return Config(debug=True, seed=123, log_dir=test_log_dir)


def id_fn(params: Any) -> str:
    """Creates a 'name' for an execution of a parametrized test.

    Args:
        params (Dict): [description]

    Returns:
        str: [description]
    """
    # if not params:
    #     return "default"
    if isinstance(params, dict):
        return json.dumps(params, sort_keys=True, separators=(",", ":"))

    return str(params)


def get_all_dataset_names(method_class: Type[Method] = None) -> List[str]:
    # When not given a method class, use the Method class (gives ALL the
    # possible datasets).
    method_class = method_class or Method

    dataset_names: Iterable[List[str]] = map(
        lambda s: list(s.available_datasets), method_class.get_applicable_settings()
    )
    return sorted(list(set(sum(dataset_names, []))))


def get_dataset_params(
    method_type: Type[Method],
    supported_datasets: List[str],
    skip_unsuported: bool = True,
) -> List[str]:
    all_datasets = get_all_dataset_names(method_type)
    dataset_params = []
    for dataset in all_datasets:
        if dataset in supported_datasets:
            dataset_params.append(dataset)
        elif skip_unsuported:
            dataset_params.append(skip_param(dataset, reason="Not supported yet"))
        else:
            dataset_params.append(xfail_param(dataset, reason="Not supported yet"))
    return dataset_params


test_datasets_option_name: str = "datasets"


def pytest_addoption(parser):
    parser.addoption("--slow", action="store_true", default=False)
    parser.addoption(f"--{test_datasets_option_name}", action="store", nargs="*", default=[])


slow = pytest.mark.skipif(
    "--slow" not in sys.argv,
    reason="This test is slow so we only run it when necessary.",
)


def slow_param(*args):
    """Mark a parameter as 'slow', so it's only run when using the "--slow" flag."""
    return pytest.param(*args, marks=slow)


def find_class_under_test(
    module, function, name: str = "method", global_var_name: str = None
) -> Optional[Type]:
    cls: Optional[Type] = None
    module_name: str = module.__name__
    function_name: str = function.__name__
    type_hints = get_type_hints(function)
    global_var_name = global_var_name or name.capitalize()
    for k in [name, f"{name}_class", f"{name}_type"]:
        cls = type_hints.get(k)
        if cls:
            logger.debug(
                f"function {function_name} has annotation of type " f"{cls} for argument {k}."
            )
            break
    if cls is None:
        # Try to get the class to test from a global variable on the module.
        cls = getattr(module, global_var_name, None)
        logger.debug(
            f"Test module {module_name} has a '{global_var_name}' gloval variable of type {cls}"
        )
    return cls


def parametrize_test_datasets(metafunc):
    # We want to get these from inspecting the test function:
    # The datasets to test on.
    test_datasets: List[str] = []
    default_test_datasets = ["mnist", "cifar10"]
    func_param_name = "test_dataset"
    global_var_names = ["test_datasets", "supported_datasets"]

    if func_param_name not in metafunc.fixturenames:
        return

    module = metafunc.module
    function = metafunc.function

    module_name: str = module.__name__
    function_name: str = function.__name__

    # Get the test datasets from the command-line option.
    datasets_from_command_line = metafunc.config.getoption(test_datasets_option_name)

    if "ALL" in datasets_from_command_line:
        method_class: Optional[Type[Method]] = find_class_under_test(
            module,
            function,
            name="method",
        )
        test_datasets = get_all_dataset_names(method_class)
    elif "NONE" in datasets_from_command_line:
        test_datasets = [skip_param("?", reason="Set to skip, with command line arg.")]
    elif datasets_from_command_line:
        assert isinstance(datasets_from_command_line, list) and all(
            isinstance(v, str) for v in datasets_from_command_line
        )
        # If any datasets were set, use them.
        test_datasets = datasets_from_command_line
    else:
        # The default datasets to try are the ones specified at the global
        # variable with name {module_test_datasets_name} in the module.
        for global_var_name in global_var_names:
            test_datasets = getattr(module, global_var_name, None)
            if test_datasets is not None:
                break
        else:
            logger.warning(
                RuntimeWarning(
                    f"Test module {module_name} didn't specify a test_datasets "
                    f"global variable, defaulting to {default_test_datasets}"
                )
            )
            test_datasets = default_test_datasets
    test_datasets = sorted(test_datasets)
    logger.info(
        f"Parametrizing the '{func_param_name}' param of test "
        f"{module_name} :: {function_name} with {test_datasets}."
    )
    metafunc.parametrize(func_param_name, test_datasets)


def pytest_generate_tests(metafunc):
    """Automatically Parametrize the tests.
    TODO: Having some fun parametrizing tests automatically, but should check
    that it's worth it, because otherwise it might make things too confusing.
    """
    parametrize_test_datasets(metafunc)


class DummyEnvironment(gym.Env):
    """Dummy environment for testing.

    The reward is how close to the target value the state (a counter) is. The
    actions are:
    0:  keep the counter the same.
    1:  Increment the counter.
    2:  Decrement the counter.
    """

    def __init__(self, start: int = 0, target: int = 5, max_value: int = None):
        self.i = start
        self.start = start
        max_value = max_value if max_value is not None else target * 2
        assert 0 <= target <= max_value
        self.max_value = max_value
        self.reward_range = (0, max_value)
        self.action_space = gym.spaces.Discrete(n=3)
        self.observation_space = gym.spaces.Discrete(n=max_value)

        self.target = target
        self.reward_range = (0, max(target, max_value - target))

        self.done: bool = False
        self._reset: bool = False

    def step(self, action: int):
        # The action modifies the state, producing a new state, and you get the
        # reward associated with that transition.
        if not self._reset:
            raise RuntimeError("Need to reset before you can step.")
        if action == 1:
            self.i += 1
        elif action == 2:
            self.i -= 1
        self.i %= self.max_value
        done = self.i == self.target
        reward = abs(self.i - self.target)
        # print(self.i, reward, done, action)
        return self.i, reward, done, {}

    def reset(self):
        self._reset = True
        self.i = self.start
        return self.i

    def seed(self, seed: Optional[int]) -> List[int]:
        seeds = []
        seeds.append(self.observation_space.seed(seed))
        seeds.append(self.action_space.seed(seed))
        return seeds


monsterkong_required = pytest.mark.skipif(
    not MONSTERKONG_INSTALLED, reason="monsterkong is required for this test."
)


def param_requires_monsterkong(*args):
    return skipif_param(
        not MONSTERKONG_INSTALLED,
        *args,
        reason="monsterkong is required for this parameter.",
    )


atari_py_required = pytest.mark.skipif(
    not ATARI_PY_INSTALLED, reason="atari_py is required for this test."
)


def param_requires_atari_py(*args):
    return skipif_param(
        not ATARI_PY_INSTALLED,
        *args,
        reason="atari_py is required for this parameter.",
    )


mtenv_required = pytest.mark.skipif(not MTENV_INSTALLED, reason="mtenv is required for this test.")


def param_requires_mtenv(*args):
    return skipif_param(
        not MTENV_INSTALLED,
        *args,
        reason="mtenv is required for this parameter.",
    )


# Metaworld needs mujoco
metaworld_required = pytest.mark.skipif(
    not METAWORLD_INSTALLED, reason="metaworld is required for this test."
)


def param_requires_metaworld(*args):
    return skipif_param(
        not METAWORLD_INSTALLED,
        *args,
        reason="metaworld is required for this parameter.",
    )


mujoco_required = pytest.mark.skipif(
    not MUJOCO_INSTALLED, reason="mujoco-py is required for this test."
)


def param_requires_mujoco(*args):
    return skipif_param(
        not MUJOCO_INSTALLED,
        *args,
        reason="mujoco-py is required for this parameter.",
    )


PYGLET_INSTALLED = False
try:
    import pyglet

    PYGLET_INSTALLED = True
except ImportError:
    pass

requires_pyglet = pytest.mark.skipif(
    not PYGLET_INSTALLED, reason="pyglet is required to render envs."
)


def param_requires_pyglet(*args):
    return skipif_param(
        not PYGLET_INSTALLED,
        *args,
        reason="pyglet is required to render envs.",
    )


================================================
FILE: sequoia/experiments/__init__.py
================================================
""" Package that defines a list of "Experiments".
"""
from .experiment import Experiment
from .hpo_sweep import HPOSweep


================================================
FILE: sequoia/experiments/experiment.py
================================================
""" Module used for launching an Experiment (applying a Method to one or more
Settings).
"""
import os
import shlex
import sys
from dataclasses import dataclass
from inspect import isclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type, Union

from simple_parsing import ArgumentParser, choice, mutable_field

from sequoia.common.config import Config, WandbConfig
from sequoia.methods import Method, get_all_methods
from sequoia.settings import Results, Setting, all_settings
from sequoia.settings.presets import setting_presets
from sequoia.utils import Parseable, Serializable, get_logger
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)

source_dir = Path(os.path.dirname(__file__))


def get_method_names() -> Dict[str, Type[Method]]:
    all_methods = get_all_methods()
    return {method.get_full_name(): method for method in all_methods}


@dataclass
class Experiment(Parseable, Serializable):
    """Applies a Method to an experimental Setting to obtain Results.

    When the `setting` is not set, this will apply the chosen method on all of
    its "applicable" settings. (i.e. all subclasses of its target setting).

    When the `method` is not set, this will apply all applicable methods on the
    chosen setting.
    """

    # Which experimental setting to use. When left unset, will evaluate the
    # provided method on all applicable settings.
    setting: Optional[Union[Setting, Type[Setting]]] = choice(
        {setting.get_name(): setting for setting in all_settings},
        default=None,
        type=str,
    )
    # Path to a json/yaml file containing preset options for the chosen setting.
    # Can also be one of the key from the `setting_presets` dictionary,
    # for convenience.
    benchmark: Optional[Union[str, Path]] = None

    # Which experimental method to use. When left unset, will evaluate all
    # compatible methods on the provided setting.
    method: Optional[Union[str, Method, Type[Method]]] = choice(get_method_names(), default=None)

    # All the other configuration options, which are independant of the choice
    # of Setting or of Method, go in this next dataclass here! For example,
    # things like the log directory, wether Cuda is used, etc.
    config: Config = mutable_field(Config)

    wandb: Optional[WandbConfig] = None

    def __post_init__(self):
        if not (self.setting or self.method):
            raise RuntimeError("One of `setting` or `method` must be set!")

        # All settings have a unique name.
        if isinstance(self.setting, str):
            self.setting = get_class_with_name(self.setting, all_settings)

        # Each Method also has a unique name.
        if isinstance(self.method, str):
            self.method = get_class_with_name(self.method, all_methods)

        if self.benchmark:
            # If the provided benchmark isn't a path, try to get the value from
            # the `setting_presets` dict. If it isn't in the dict, raise an
            # error.
            if not Path(self.benchmark).is_file():
                if self.benchmark in setting_presets:
                    self.benchmark = setting_presets[self.benchmark]
                else:
                    raise RuntimeError(
                        f"Could not find benchmark '{self.benchmark}': it "
                        f"is neither a path to a file or a key of the "
                        f"`setting_presets` dictionary. \n\n"
                        f"Available presets: \n"
                        + "\n".join(
                            f"- {preset_name}: \t{preset_file.relative_to(os.getcwd())}"
                            for preset_name, preset_file in setting_presets.items()
                        )
                    )
            # Creating an experiment for the given setting, loaded from the
            # config file.
            # TODO: IDEA: Do the same thing for loading the Method?
            logger.info(
                f"Will load the options for the setting from the file " f"at path {self.benchmark}."
            )
            drop_extras = True
            if self.setting is None:
                logger.warn(
                    UserWarning(
                        f"You didn't specify which setting to use, so this will "
                        f"try to infer the correct type of setting to use from the "
                        f"contents of the file, which might not work!\n (Consider "
                        f"running this with the `--setting` option instead."
                    )
                )
                # Find the first type of setting that fits the given file.
                drop_extras = False
                self.setting = Setting

            # Raise an error if any of the args in sys.argv would have been used
            # up by the Setting, just to prevent any ambiguities.
            try:
                _, unused_args = self.setting.from_known_args()
            except (ImportError, AssertionError) as exc:
                # NOTE: An ImportError can occur here because of a missing OpenGL
                # dependency, since when no arguments are passed, the default RL setting
                # is created (cartpole with pixel observations), which requires a render
                # wrapper to be added (which itself uses pyglet, which uses OpenGL).
                logger.warning(RuntimeWarning(f"Unable to check for unused args: {exc}"))
                # In this case, we just pretend that no arguments would have been used.
                unused_args = sys.argv[1:]

            ignored_args = list(set(sys.argv[1:]) - set(unused_args))

            if ignored_args:
                # TODO: This could also be trigerred if there were arguments
                # in the method with the same name as some from the Setting.
                raise RuntimeError(
                    f"Cannot pass command-line arguments for the Setting when "
                    f"loading a preset, since these arguments whould have been "
                    f"ignored when creating the setting of type {self.setting} "
                    f"anyway: {ignored_args}"
                )

            assert isclass(self.setting) and issubclass(self.setting, Setting)
            # Actually load the setting from the file.
            # TODO: Why isn't this using `load_benchmark`?
            self.setting = self.setting.load(path=self.benchmark, drop_extra_fields=drop_extras)
            self.setting.wandb = self.wandb

            if self.method is None:
                raise NotImplementedError(
                    f"For now, you need to specify a Method to use using the "
                    f"`--method` argument when loading the setting from a file."
                )

        if self.setting is not None and self.method is not None:
            if not self.method.is_applicable(self.setting):
                raise RuntimeError(
                    f"Method {self.method} isn't applicable to " f"setting {self.setting}!"
                )

        assert (
            self.setting is None
            or isinstance(self.setting, Setting)
            or issubclass(self.setting, Setting)
        )
        assert (
            self.method is None
            or isinstance(self.method, Method)
            or issubclass(self.method, Method)
        )

    @staticmethod
    def run_experiment(
        setting: Union[Setting, Type[Setting]],
        method: Union[Method, Type[Method]],
        config: Config,
        argv: Union[str, List[str]] = None,
        strict_args: bool = False,
    ) -> Results:
        """Launches an experiment, applying `method` onto `setting`
        and returning the corresponding results.

        This assumes that both `setting` and `method` are not None.
        This always returns a single `Results` object.

        If either `setting` or `method` are classes, then instances of these
        classes from the command-line arguments `argv`.

        If `strict_args` is True and there are leftover arguments (not consumed
        by either the Setting or the Method), a RuntimeError is raised.

        This then returns the result of `setting.apply(method)`.

        Parameters
        ----------
        argv : Union[str, List[str]], optional
            List of command-line args. When not set, uses the contents of
            `sys.argv`. Defaults to `None`.
        strict_args : bool, optional
            Wether to raise an error when encountering command-line arguments
            that are unexpected by both the Setting and the Method. Defaults to
            `False`.

        Returns
        -------
        Results

        """
        assert setting is not None and method is not None
        assert isinstance(
            setting, Setting
        ), f"TODO: Fix this, need to pass a wandb config to the Setting from the experiment!"
        if not (isinstance(setting, Setting) and isinstance(method, Method)):
            setting, method = parse_setting_and_method_instances(
                setting=setting, method=method, argv=argv, strict_args=strict_args
            )

        assert isinstance(setting, Setting)
        assert isinstance(method, Method)
        assert isinstance(config, Config)

        return setting.apply(method, config=config)

    def launch(
        self,
        argv: Union[str, List[str]] = None,
        strict_args: bool = False,
    ) -> Results:
        """Launches the experiment, applying `self.method` onto `self.setting`
        and returning the corresponding results.

        This differs from `main` in that this assumes that both `self.setting`
        and `self.method` are not None, and so this always returns a single
        `Results` object.

        NOTE: Internally, this is equivalent to calling `run_experiment`,
        passing in the `setting`, `method` and `config` arguments from `self`.

        Parameters
        ----------
        argv : Union[str, List[str]], optional
            List of command-line args. When not set, uses the contents of
            `sys.argv`. Defaults to `None`.
        strict_args : bool, optional
            Wether to raise an error when encountering command-line arguments
            that are unexpected by both the Setting and the Method. Defaults to
            `False`.

        Returns
        -------
        Results
            An object describing the results of applying Method `self.method` onto
            the Setting `self.setting`.
        """
        assert self.setting is not None
        assert self.method is not None
        assert self.config is not None

        if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)):
            self.setting, self.method = parse_setting_and_method_instances(
                setting=self.setting, method=self.method, argv=argv, strict_args=strict_args
            )

        assert isinstance(self.setting, Setting)
        assert isinstance(self.method, Method)

        self.setting.wandb = self.wandb
        self.setting.config = self.config

        return self.setting.apply(self.method, config=self.config)

    @classmethod
    def main(
        cls,
        argv: Union[str, List[str]] = None,
        strict_args: bool = False,
    ) -> Union[Results, Tuple[Dict, Any], List[Tuple[Dict, Results]]]:
        """Launches one or more experiments from the command-line.

        First, we get the choice of method and setting using a first parser.
        Then, we parse the Setting and Method objects using the remaining args
        with two other parsers.

        Parameters
        ----------
        - argv : Union[str, List[str]], optional, by default None

            command-line arguments to use. When None (default), uses sys.argv.

        Returns
        -------
        Union[Results,
              Dict[Tuple[Type[Setting], Type[Method], Config], Results]]
            Results of the experiment, if only applying a method to a setting.
            Otherwise, if either of `--setting` or `--method` aren't set, this
            will be a dictionary mapping from
            (setting_type, method_type) tuples to Results.
        """
        # TODO: Clean this up with the new command-line API.
        if argv is None:
            argv = sys.argv[1:]
        if isinstance(argv, str):
            argv = shlex.split(argv)
        argv_copy = argv.copy()

        experiment: Experiment
        experiment, argv = cls.from_known_args(argv)

        setting: Optional[Type[Setting]] = experiment.setting
        method: Optional[Type[Method]] = experiment.method
        config: Config = experiment.config

        if method is None and setting is None:
            raise RuntimeError(f"One of setting or method must be set.")

        if setting and method:
            # One 'job': Launch it directly.
            results = experiment.launch(argv, strict_args=strict_args)
            print("\n\n EXPERIMENT IS DONE \n\n")
            print(f"Results: {results}")
            return results

        # TODO: Test out this other case. Haven't used it in a while.
        # TODO: Move this to something like a BatchExperiment?
        all_results = launch_batch_of_runs(setting=setting, method=method, argv=argv)
        return all_results


def launch_batch_of_runs(
    setting: Optional[Setting],
    method: Optional[Method],
    argv: Union[str, List[str]] = None,
) -> List[Tuple[Dict, Results]]:
    if argv is None:
        argv = sys.argv[1:]
    if isinstance(argv, str):
        argv = shlex.split(argv)
    argv_copy = argv.copy()

    experiment: Experiment
    experiment, argv = Experiment.from_known_args(argv)

    setting: Optional[Type[Setting]] = experiment.setting
    method: Optional[Type[Method]] = experiment.method
    config = experiment.config

    # TODO: Maybe if everything stays exactly identical, we could 'cache'
    # the results of some experiments, so we don't re-run them all the time?
    all_results: Dict[Tuple[Type[Setting], Type[Method]], Results] = {}

    # The lists of arguments for each 'job'.
    method_types: List[Type[Method]] = []
    setting_types: List[Type[Setting]] = []
    run_configs: List[Config] = []

    if setting:
        logger.info(f"Evaluating all applicable methods on Setting {setting}.")
        method_types = setting.get_applicable_methods()
        setting_types = [setting for _ in method_types]

    elif method:
        logger.info(f"Applying Method {method} on all its applicable settings.")
        setting_types = method.get_applicable_settings()
        method_types = [method for _ in setting_types]

    # Create a 'config' for each experiment.
    # Use a log_dir for each run using the 'base' log_dir (passed
    # when creating the Experiment), the name of the Setting, and
    # the name of the Method.
    for setting_type, method_type in zip(setting_types, method_types):
        run_log_dir = config.log_dir / setting_type.get_name() / method_type.get_name()

        run_config_kwargs = config.to_dict()
        run_config_kwargs["log_dir"] = run_log_dir
        run_config = Config(**run_config_kwargs)

        run_configs.append(run_config)

    arguments_of_each_run: List[Dict] = []
    results_of_each_run: List[Result] = []
    # Create one 'job' per setting-method combination:
    for setting_type, method_type, run_config in zip(setting_types, method_types, run_configs):
        # NOTE: Some methods might use all the values in `argv`, and some
        # might not, so we set `strict=False`.
        arguments_of_each_run.append(
            dict(
                setting=setting_type,
                method=method_type,
                config=run_config,
                argv=argv,
                strict_args=False,
            )
        )

    # TODO: Use submitit or somethign like it, to run each of these in parallel:
    # See https://github.com/lebrice/Sequoia/issues/87 for more info.
    for run_arguments in arguments_of_each_run:
        result = Experiment.run_experiment(**run_arguments)
        logger.info(f"Results for arguments {run_arguments}: {result}")
        results_of_each_run.append(result)

    all_results = list(zip(arguments_of_each_run, results_of_each_run))
    logger.info(f"All results: ")
    for run_arguments, run_results in all_results:
        print(f"Arguments: {run_arguments}")
        print(f"Results: {run_results}")
    return all_results


def parse_setting_and_method_instances(
    setting: Union[Setting, Type[Setting]],
    method: Union[Method, Type[Method]],
    argv: Union[str, List[str]] = None,
    strict_args: bool = False,
) -> Tuple[Setting, Method]:
    # TODO: Should we raise an error if an argument appears both in the Setting
    # and the Method?
    parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)

    if not isinstance(setting, Setting):
        assert issubclass(setting, Setting)
        setting.add_argparse_args(parser)
    if not isinstance(method, Method):
        assert method is not None
        assert issubclass(method, Method)
        method.add_argparse_args(parser)

    if strict_args:
        args = parser.parse_args(argv)
    else:
        args, unused_args = parser.parse_known_args(argv)
        if unused_args:
            logger.warning(UserWarning(f"Unused command-line args: {unused_args}"))

    if not isinstance(setting, Setting):
        setting = setting.from_argparse_args(args)
    if not isinstance(method, Method):
        method = method.from_argparse_args(args)

    return setting, method


def get_class_with_name(
    class_name: str,
    all_classes: Union[List[Type[Setting]], List[Type[Method]]],
) -> Union[Type[Method], Type[Setting]]:
    potential_classes = [c for c in all_classes if c.get_name() == class_name]
    # if target_class:
    #     potential_classes = [
    #         m for m in potential_classes
    #         if m.is_applicable(target_class)
    #     ]
    if len(potential_classes) == 1:
        return potential_classes[0]
    if not potential_classes:
        raise RuntimeError(
            f"Couldn't find any classes with name {class_name} in the list of "
            f"available classes {all_classes}!"
        )
    raise RuntimeError(
        f"There are more than one potential methods with name "
        f"{class_name}, which isn't supposed to happen! "
        f"(all_classes: {all_classes})"
    )


def check_has_descendants(potential_classes: List[Type[Method]]) -> List[bool]:
    """Returns a list where for each method in the list, check if it has
    any descendants (subclasses of itself) also within the list.
    """

    def _has_descendant(method: Type[Method]) -> bool:
        """For a given method, check if it has any descendants within
        the list of potential methods.
        """
        return any(
            (issubclass(other_method, method) and other_method is not method)
            for other_method in potential_classes
        )

    return [_has_descendant(method) for method in potential_classes]


def main():
    logger.debug(
        "Registered Settings: \n"
        + "\n".join(
            f"- {setting.get_name()}: {setting} ({setting.get_path_to_source_file()})"
            for setting in all_settings
        )
    )
    logger.debug(
        "Registered Methods: \n"
        + "\n".join(
            f"- {method.get_name()}: {method} ({method.get_path_to_source_file()})"
            for method in get_all_methods()
        )
    )

    Experiment.main()
    exit(0)


================================================
FILE: sequoia/experiments/experiment_test.py
================================================
import shlex
import sys
from pathlib import Path
from typing import Optional, Type

import pytest

from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods import Method, get_all_methods
from sequoia.methods.method_test import key_fn
from sequoia.settings import Results, Setting, all_settings

from .experiment import Experiment, get_method_names

method_names = get_method_names()


@pytest.mark.xfail(
    reason="@lebrice: I changed my mind on this. For example, it could make "
    "sense to have multiple methods called 'baseline' when a new Setting needs "
    "to create a new subclass of the BaseMethod or a new Method altogether."
)
def test_no_collisions_in_method_names():
    methods = get_all_methods()
    assert len(set(method.get_name() for method in methods)) == len(methods)


def test_no_collisions_in_setting_names():
    assert len(set(setting.get_name() for setting in all_settings)) == len(all_settings)


def test_applicable_methods():
    from sequoia.methods import BaseMethod
    from sequoia.settings import TraditionalSLSetting

    assert BaseMethod in TraditionalSLSetting.get_applicable_methods()


def mock_apply(self: Setting, method: Method, config: Config) -> Results:
    # 1. Configure the method to work on the setting.
    # method.configure(self)
    # 2. Train the method on the setting.
    # method.train(self)
    # 3. Evaluate the method on the setting and return the results.
    # return self.evaluate(method)
    return type(method), type(self)


@pytest.fixture()
def set_argv_for_debug(monkeypatch):
    monkeypatch.setattr(sys, "argv", shlex.split("main.py --debug --fast_dev_run"))


@pytest.fixture(params=sorted(get_all_methods(), key=str))
def method_type(request, monkeypatch, set_argv_for_debug):
    method_class: Type[Method] = request.param
    return method_class


@pytest.fixture(params=sorted(all_settings, key=key_fn))
def setting_type(request, monkeypatch, set_argv_for_debug):
    setting_class: Type[Setting] = request.param
    monkeypatch.setattr(setting_class, "apply", mock_apply)
    for method_type in setting_class.get_applicable_methods():
        pass
    return setting_class


def test_experiment_from_args(
    method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]]
):
    """Test that when parsing the 'Experiment' from the command-line, the
    `setting` and `method` fields get set to the classes corresponding to their
    names.
    """
    # method = method_type.get_name()
    method_name = [k for k, v in method_names.items() if v is method_type][0]
    setting = setting_type.get_name()
    if not method_type.is_applicable(setting_type):
        pytest.skip(
            msg=f"Skipping test since Method {method_type} isn't applicable on "
            f"settings of type {setting_type}."
        )
    experiment = Experiment.from_args(f"--setting {setting} --method {method_name}")
    assert experiment.method is method_type
    assert experiment.setting is setting_type


def test_launch_experiment_with_constructor(
    method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]]
):
    if not method_type.is_applicable(setting_type):
        pytest.skip(
            msg=f"Skipping test since Method {method_type} isn't applicable on "
            f"settings of type {setting_type}."
        )
    experiment = Experiment(method=method_type, setting=setting_type)
    all_results = experiment.launch("--debug --fast_dev_run --batch_size 1")
    assert all_results == (method_type, setting_type)


@slow
@pytest.mark.timeout(300)
def test_none_setting(method_type: Optional[Type[Method]], tmp_path: Path, monkeypatch):
    """Test that leaving the Setting unset runs on all applicable setting."""
    method = method_type.get_name()

    for setting_type in method_type.get_applicable_settings():
        monkeypatch.setattr(setting_type, "apply", mock_apply)

    all_results = Experiment.main(
        f"--method {method} --debug --fast_dev_run " f"--log_dir {tmp_path}"
    )

    for setting_type in method_type.get_applicable_settings():
        monkeypatch.setattr(setting_type, "apply", mock_apply)
        result = all_results[(setting_type, method_type)]
        assert result == (method_type, setting_type)


@slow
@pytest.mark.timeout(300)
def test_none_method(setting_type: Optional[Type[Setting]]):
    """Test that leaving the method unset runs all applicable methods on the
    setting.
    """
    setting = setting_type.get_name()
    all_results = Experiment.main(f"--setting {setting} --debug --fast_dev_run --batch-size 1")
    for method_type in setting_type.get_applicable_methods():
        result = all_results[(setting_type, method_type)]
        assert result == (method_type, setting_type)

    # assert all_results == {
    #     method_type: (method_type, setting_type)
    #     for method_type in setting_type.get_applicable_methods()
    # }


================================================
FILE: sequoia/experiments/hpo_sweep.py
================================================
import json
import shlex
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type, Union

from simple_parsing.helpers import choice

from sequoia.settings import Method, Results, Setting

from .experiment import Experiment, parse_setting_and_method_instances


@dataclass
class HPOSweep(Experiment):
    """Experiment which launches an HPO Sweep using Orion.

    TODO: Maybe use this somewhere in main.py once we redesign the command-line API.
    """

    # Path to a json file containing the orion-formatted search space dictionary.
    # When `None` (by default), the result of `get_search_space` will be used instead.
    search_space_path: Optional[Path] = None
    # Path indicating where the pickle database will be loaded or be created.
    database_path: Path = Path("orion_db.pkl")
    # manual, unique identifier for this experiment. This should only really be used
    # when launching multiple different experiments that involve the same method and
    # the same exact setting configurations, but where some other aspect of the
    # experiment is changed.
    experiment_id: Optional[str] = None

    # Maximum number of runs to perform.
    max_runs: Optional[int] = 10

    hpo_algorithm: str = choice(
        {
            "random": "random",
            "bayesian": "BayesianOptimizer",
        },
        default="bayesian",
    )  # TODO: BayesianOptimizer does not support num > 1

    def __post_init__(self):
        super().__post_init__()
        self.search_space: Dict = {}
        if self.search_space_path:
            with open(self.search_space_path, "r") as f:
                self.search_space = json.load(f)

    def launch(self, argv: Union[str, List[str]] = None, strict_args: bool = False):
        """Launch the experiment, using its attributes and possibly also using the
        provided command-line arguments.

        This differs from `Experiment.launch` in that this will actually launch a
        sequence of runs.

        Parameters
        ----------
        argv : Union[str, List[str]], optional
            [description], by default None
        strict_args : bool, optional
            [description], by default False

        Returns
        -------
        [type]
            [description]
        """
        if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)):
            self.setting, self.method = parse_setting_and_method_instances(
                setting=self.setting,
                method=self.method,
                argv=argv,
                strict_args=strict_args,
            )
        assert isinstance(self.setting, Setting)
        assert isinstance(self.method, Method)
        self.setting.wandb = self.wandb

        # TODO: IDEA: It could actually be really cool if we created a list of
        # Experiment objects here, and just call their 'launch' methods in parallel,
        # rather than do the sweep logic in the Method class!
        best_params, best_objective = self.method.hparam_sweep(
            self.setting,
            search_space=self.search_space,
            database_path=self.database_path,
            experiment_id=self.experiment_id,
            max_runs=self.max_runs,
            hpo_algorithm=self.hpo_algorithm,
        )
        print(
            "Best params:\n" + "\n".join(f"\t{key}: {value}" for key, value in best_params.items())
        )
        print(f"Best objective: {best_objective}")
        return (best_params, best_objective)

    @classmethod
    def main(
        cls,
        argv: Union[str, List[str]] = None,
        strict_args: bool = False,
    ) -> List[Tuple[Dict, Results]]:
        """Launches this experiment from the command-line.

        First, we get the choice of method and setting using a first parser.
        Then, we parse the Setting and Method objects using the remaining args.

        Parameters
        ----------
        - argv : Union[str, List[str]], optional, by default None

            command-line arguments to use. When None (default), uses sys.argv.

        Returns
        -------
        List[Tuple[Dict, Results]]

            Best trial parameters and objective found during the sweep.

        """
        if argv is None:
            argv = sys.argv[1:]
        if isinstance(argv, str):
            argv = shlex.split(argv)
        _ = argv.copy()

        experiment: HPOSweep
        experiment, argv = cls.from_known_args(argv)

        setting: Optional[Type[Setting]] = experiment.setting
        method: Optional[Type[Method]] = experiment.method
        # config: Config = experiment.config

        if method is None or setting is None:
            raise RuntimeError("Both `--setting` and `--method` must be set to run a sweep.")
        return experiment.launch(argv, strict_args=strict_args)


def main():
    HPOSweep.main()


if __name__ == "__main__":
    main()


================================================
FILE: sequoia/experiments/hpo_sweep_test.py
================================================
import random
import shlex
import sys
from pathlib import Path
from typing import Optional, Type

import pytest

from sequoia.common.config import Config
from sequoia.methods import Method, get_all_methods
from sequoia.methods.method_test import key_fn
from sequoia.methods.random_baseline import RandomBaselineMethod
from sequoia.settings import Results, Setting, all_settings
from sequoia.utils.serialization import Serializable

from .hpo_sweep import HPOSweep


class MockResults(Results):
    def __init__(self, hparams):
        self.haprams = hparams
        self._objective = random.random()

    @property
    def objective(self) -> float:
        return self._objective

    def make_plots(self):
        return {}

    def to_log_dict(self, verbose: bool = False):
        return {
            "hparams": self.hparams.to_dict()
            if isinstance(self.hparams, Serializable)
            else self.hparams,
            "objective": self.objective,
        }

    def summary(self):
        return str(self.to_log_dict())


def mock_apply(self: Setting, method: Method, config: Config = None) -> Results:
    # 1. Configure the method to work on the setting.
    # method.configure(self)
    # 2. Train the method on the setting.
    # method.train(self)
    # 3. Evaluate the method on the setting and return the results.
    # return self.evaluate(method)
    # assert False, method.hparams
    return MockResults(getattr(method, "hparams", {}))
    # return type(method), type(self)


@pytest.fixture()
def set_argv_for_debug(monkeypatch):
    monkeypatch.setattr(sys, "argv", shlex.split("main.py --debug --fast_dev_run"))


@pytest.fixture(params=sorted(get_all_methods(), key=str))
def method_type(request, monkeypatch, set_argv_for_debug):
    method_class: Type[Method] = request.param
    return method_class


@pytest.fixture(params=sorted(all_settings, key=key_fn))
def setting_type(request, monkeypatch, set_argv_for_debug):
    setting_class: Type[Setting] = request.param
    monkeypatch.setattr(setting_class, "apply", mock_apply)
    # TODO: Not sure what this was doing, but I think it was important that all methods
    # get imported here.
    for method_type in setting_class.get_applicable_methods():
        pass
    return setting_class


@pytest.mark.skip(reason="BUG: seems to make other tests hang, because of Orion's bug.")
def test_launch_sweep_with_constructor(
    method_type: Optional[Type[Method]],
    setting_type: Optional[Type[Setting]],
    tmp_path: Path,
):
    if not method_type.is_applicable(setting_type):
        pytest.skip(
            msg=f"Skipping test since Method {method_type} isn't applicable on settings of type {setting_type}."
        )

    if issubclass(method_type, RandomBaselineMethod):
        pytest.skip(
            "BUG: RandomBaselineMethod has a hparam space that causes the HPO algo to go into an infinite loop."
        )
        return

    experiment = HPOSweep(
        method=method_type,
        setting=setting_type,
        database_path=tmp_path / "debug.pkl",
        config=Config(debug=True),
        max_runs=3,
    )
    best_hparams, best_performance = experiment.launch(["--debug"])
    assert best_hparams
    assert best_performance


================================================
FILE: sequoia/main.py
================================================
"""Sequoia - The Research Tree 

Used to run experiments, which consist in applying a Method to a Setting.
"""
from argparse import _SubParsersAction
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Type, Union

from simple_parsing import ArgumentParser
from simple_parsing.help_formatter import SimpleHelpFormatter
from simple_parsing.helpers import choice

import sequoia
from sequoia.common.config import Config
from sequoia.common.config.wandb_config import WandbConfig
from sequoia.methods import get_all_methods
from sequoia.settings import all_settings
from sequoia.settings.base import Method, Results, Setting
from sequoia.utils import get_logger

# TODO: Fix all the `get_logger` to use __name__ instead of __file__.
logger = get_logger(__name__)


def main():
    """Adds all command-line arguments, parses the args, and runs the selected action."""
    parser = ArgumentParser(prog="sequoia", description=__doc__, add_dest_to_option_strings=False)
    parser.add_argument(
        "--version",
        action="version",
        version=sequoia.__version__,
        help="Displays the installed version of Sequoia and exits.",
    )

    command_subparsers = parser.add_subparsers(
        title="command",
        dest="command",
        description="Command to execute",
        parser_class=ArgumentParser,
        required=False,
    )

    add_run_command(command_subparsers)
    add_sweep_command(command_subparsers)
    add_info_command(command_subparsers)

    args = parser.parse_args()

    command: str = getattr(args, "command", None)
    if command is None:
        parser.print_help()
    elif command == "run":
        method_type: Type[Method] = args.method_type
        setting_type: Type[Setting] = args.setting_type
        method: Method = method_type.from_argparse_args(args)
        setting: Setting = setting_type.from_argparse_args(args)
        config: Config = args.config
        # TODO: Make this a bit cleaner, current need to set this `wandb` config as a property on
        # the setting. Could either subclass Config and add an Optional[WandbConfig] field, or just
        # add it directly to the existing Config class.
        wandb_config: WandbConfig = args.wandb
        setting.wandb = wandb_config
        run(setting=setting, method=method, config=config)
    elif command == "sweep":
        method_type: Type[Method] = args.method_type
        setting_type: Type[Setting] = args.setting_type
        method: Method = method_type.from_argparse_args(args)
        setting: Setting = setting_type.from_argparse_args(args)
        config: Config = args.config
        # TODO: Fix this up a bit: Currently need to set this on the setting
        wandb_config: WandbConfig = args.wandb
        setting.wandb = wandb_config
        sweep(setting=args.setting, method=method, config=args.config)
    elif command == "info":
        info(component=args.component)


def add_run_command(command_subparsers: _SubParsersAction) -> None:
    run_parser = command_subparsers.add_parser(
        "run",
        description="Run an experiment on a given setting.",
        help="Run an experiment on a given setting.",
        add_dest_to_option_strings=False,
        formatter_class=SimpleHelpFormatter,
    )
    run_parser.add_arguments(Config, dest="config")
    run_parser.add_arguments(WandbConfig, dest="wandb")
    add_args_for_settings_and_methods(run_parser)


def run(setting: Setting, method: Method, config: Config) -> Results:
    """Performs a single run, applying a method to a setting, and returns the results."""
    logger.debug("Setting:")
    # BUG: TypeError: __reduce_ex__() takes exactly one argument (0 given)
    try:
        logger.debug(setting.dumps_yaml())
    except TypeError:
        logger.debug(setting)
    logger.debug("Config:")
    logger.debug(config.dumps_yaml())
    logger.debug("Method")
    logger.debug(str(method))
    results = setting.apply(method, config=config)
    logger.debug("Results:")
    logger.debug(results.summary())
    return results


@dataclass
class SweepConfig(Config):
    """Configuration options for a HPO sweep."""

    # Path indicating where the pickle database will be loaded or be created.
    database_path: Path = Path("orion_db.pkl")
    # manual, unique identifier for this experiment. This should only really be used
    # when launching multiple different experiments that involve the same method and
    # the same exact setting configurations, but where some other aspect of the
    # experiment is changed.
    experiment_id: Optional[str] = None

    # Maximum number of runs to perform.
    max_runs: Optional[int] = 10

    # Which hyper-parameter optimization algorithm to use.
    hpo_algorithm: str = choice(
        {
            "random": "random",
            "bayesian": "BayesianOptimizer",
        },
        default="bayesian",
    )  # TODO: BayesianOptimizer does not support num > 1


def sweep(setting: Setting, method: Method, config: SweepConfig) -> Setting.Results:
    """Performs a Hyper-Parameter Optimization sweep, consisting in running the method
    on the given setting, each run having a different set of hyper-parameters.
    """
    print("Sweep!")
    logger.debug("Setting:")
    # BUG: TypeError: __reduce_ex__() takes exactly one argument (0 given)
    try:
        logger.debug(setting.dumps_yaml())
    except TypeError:
        logger.debug(setting)
    logger.debug("Config:")
    logger.debug(config.dumps_yaml())
    logger.debug(f"Method: {method}")

    # TODO: IDEA: It could actually be really cool if we created a list of
    # Experiment objects here, and just call their 'launch' methods in parallel,
    # rather than do the sweep logic in the Method class!
    # TODO: Need to add these arguments again to the parser?
    best_params, best_objective = method.hparam_sweep(
        setting,
        database_path=config.database_path,
        experiment_id=config.experiment_id,
        max_runs=config.max_runs,
        hpo_algorithm=config.hpo_algorithm,
    )
    logger.info(
        "Best params:\n" + "\n".join(f"\t{key}: {value}" for key, value in best_params.items())
    )
    logger.info(f"Best objective: {best_objective}")
    return (best_params, best_objective)


def add_sweep_command(command_subparsers: _SubParsersAction) -> None:
    sweep_parser = command_subparsers.add_parser(
        "sweep",
        description="Run a hyper-parameter optimization sweep.",
        help="Run a hyper-parameter optimization sweep.",
        add_dest_to_option_strings=False,
    )
    sweep_parser.set_defaults(action=sweep)
    sweep_parser.add_arguments(SweepConfig, dest="config")
    add_args_for_settings_and_methods(sweep_parser)


def add_info_command(command_subparsers: _SubParsersAction) -> None:
    """Add commands to display some information about the settings or methods."""
    info_parser = command_subparsers.add_parser(
        "info",
        # NOTE: Not 100% sure what the difference is between help and description.
        description="Displays some information about a Setting or Method.",
        help="Displays some information about a Setting or Method.",
        add_dest_to_option_strings=False,
    )
    info_parser.set_defaults(**{"component": None})
    info_parser.set_defaults(action=lambda namespace: info(namespace.component))

    component_subparser = info_parser.add_subparsers(
        title="component",
        dest="component",
        description="Setting or Method to display more information about.",
        help="heyo",
        required=False,
    )

    for setting in all_settings:
        setting_name = setting.get_name()
        component_parser: ArgumentParser = component_subparser.add_parser(
            name=setting_name,
            description=f"Show more info about the {setting_name} setting.",
            help=get_help(setting),
            add_dest_to_option_strings=False,
        )
        component_parser.set_defaults(**{"component": setting})

    for method in get_all_methods():
        method_name = method.get_full_name()
        component_parser: ArgumentParser = component_subparser.add_parser(
            name=method_name,
            description=f"Show more info about the {method_name} method.",
            help=get_help(method),
            add_dest_to_option_strings=False,
        )
        component_parser.set_defaults(**{"component": method})


def info(component: Union[Type[Setting], Type[Method]] = None) -> None:
    """Prints some info about a given component (method class or setting class), or
    prints the list of available settings and methods.
    """
    if component is None:
        from sequoia.utils.readme import get_tree_string

        print(get_tree_string())

        # print("Registered Settings:")
        # for setting in all_settings:
        #     print(f"- {setting.get_name()}: {setting.get_path_to_source_file()}")

        print()
        print("Registered Methods:")
        print()
        for method in get_all_methods():
            src = method.get_path_to_source_file()
            print(f"- {method.get_full_name()}: {src}")

    else:
        # IDEA: Could colorize the tree with red or green depending on if the method is
        # applicable to the setting or not!
        help(component)


def get_help(component: Type[Setting]) -> str:
    """Returns the string to be passed as the 'help' argument to the parser."""
    # todo
    docstring = component.__doc__
    if not docstring:
        docstring = f"Help for class {component.__name__} (missing docstring)"
    # IDEA: Get the first two sentences, or a shortened version of the docstring,
    # whichever one is shorter.
    first_two_sentences = ". ".join(docstring.split(".")[:2]) + "."
    # shortened_docstring = textwrap.shorten(docstring, 150)
    # return min(shortened_docstring, first_two_sentences, key=len) + "(help)"
    # NOTE: Seems to be nicer in general to have two whole sentences, even if they are a bit longer.
    return first_two_sentences


# def get_description(command: str, setting: Type[Setting], method: Type[Method] = None) -> str:
#     """ Returns the text to be displayed right under the "usage" line in the command-line
#     when either
#     `sequoia run <setting> --help`
#     or
#     `sequoia run <setting> <method> --help` is invoked.
#     """
#     if command == "run":
#         if method is not None:
#             return f"Run an experiment consisting of applying method {method.get_full_name()} on the {setting.get_name()} setting. (desc.)"
#         else:
#             return f"Run an experiment in the {setting.get_name()} setting. (desc.)"


def add_args_for_settings_and_methods(command_subparser: ArgumentParser):
    """Adds a subparser for each Setting class and method subparsers for each of those.

    NOTE: Only adds subparsers for setting classes that have a non-empty 'available_datasets'
    attribute, so that choosing `Setting`, `SLSetting` or `RLSetting` isn't an option.

    This is used by the `sequoia run` and `sequoia sweep` commands.
    """
    # ===== RUN ========
    setting_subparsers = command_subparser.add_subparsers(
        title="setting_choice",
        description="choice of experimental setting",
        dest="setting_type",
        metavar="<setting>",
        required=True,
    )

    def key_fn(setting_class: Type[Setting]):
        return (
            len(setting_class.parents()),
            setting_class.__name__,
        )

    # Sort the settings so the actions come up in a nice order.
    for setting in sorted(all_settings, key=key_fn):
        setting_name = setting.get_name()

        # IDEA:
        if not getattr(setting, "available_datasets", {}):
            # Don't add a parser for this setitng, since it has no available datasets.
            # e.g.: Setting, SL, RL
            continue

        setting_parser: ArgumentParser = setting_subparsers.add_parser(
            setting_name,
            help=get_help(setting),
            description=f"Run an experiment in the {setting.get_name()} setting.",
            add_dest_to_option_strings=False,
            formatter_class=SimpleHelpFormatter,
        )
        setting_parser.set_defaults(**{"setting_type": setting})

        # NOTE: By removing the `dest` argument to `add_argparse_args, we're moving the place where
        # the setting's values are stored from 'setting' to `camel_case(setting_class.__name__).
        # Alternative would be to just assume that the settings are dataclasses and add arguments
        # for the setting at destination 'setting' as before.
        setting.add_argparse_args(parser=setting_parser)
        # setting_parser.add_arguments(setting, dest="setting")

        method_subparsers = setting_parser.add_subparsers(
            title="method",
            dest="method_name",
            metavar="<method>",
            description=f"which method to apply to the {setting_name} Setting.",
            required=True,
        )
        for method in setting.get_applicable_methods():
            method_name = method.get_full_name()
            method_parser: ArgumentParser = method_subparsers.add_parser(
                method_name,
                help=get_help(method),
                description=(
                    f"Run an experiment where the {method_name} method is "
                    f"applied to the {setting.get_name()} setting."
                ),
                formatter_class=SimpleHelpFormatter,
            )
            method_parser.set_defaults(method_type=method)
            # TODO: Could also pass the setting to the method's `add_argparse_args` so
            # that it gets to change its default values!
            # method.add_argparse_args_for_setting(
            #     parser=method_parser, setting=setting,
            # )
            method.add_argparse_args(parser=method_parser)


if __name__ == "__main__":
    main()


================================================
FILE: sequoia/methods/README.md
================================================
# Sequoia - Methods

### Adding a new Method:

#### Prerequisites:
**- First, please take a look at the [examples](examples/)**

#### Steps:

1. Choose a target setting from the tree (See the "Available Settings" section below).

2. Create a new subclass of [`Method`](settings/base/bases.py), with the chosen target setting.

    Your class should implement the following methods:
    - `fit(train_env, valid_env)`
    - `get_actions(observations, action_space) -> Actions`
    
    The following methods are optional, but can be very useful to help customize how your method is used at train/test time:
    - `configure(setting: Setting)`
    - `on_task_switch(task_id: Optional[int])`
    - `test(test_env)`

    ```python
    class MyNewMethod(Method, target_setting=ClassIncrementalSetting):
        ... # Your code here.

        def fit(self, train_env: DataLoader, valid_env: DataLoader):
            # Train your model however you want here.
            self.trainer.fit(
                self.model,
                train_dataloader=train_env,
                val_dataloaders=valid_env,
            )
        
        def get_actions(self,
                        observations: Observations,
                        observation_space: gym.Space) -> Actions:
            # Return an "Action" (prediction) for the given observations.
            # Each Setting has its own Observations, Actions and Rewards types,
            # which are based on those of their parents.
            return self.model.predict(observations.x)

        def on_task_switch(self, task_id: Optional[int]):
            #This method gets called if task boundaries are known in the current
            #setting. Furthermore, if task labels are available, task_id will be
            # the index of the new task. If not, task_id will be None.
            # For example, you could do something like this:
            self.model.current_output_head = self.model.output_heads[task_id]
    ```

3. Running / Debugging your method:
 
    (at the bottom of your script, for example)

    ```python
    if __name__ == "__main__":
        ## 1. Create the setting you want to apply your method on.
        # First option: Create the Setting directly in code:
        setting = ClassIncrementalSetting(dataset="cifar10", nb_tasks=5)
        # Second option: Create the Setting from the command-line:
        setting = ClassIncrementalSetting.from_args()
        
        ## 2. Create your Method, however you want.
        my_method = MyNewMethod()

        ## 3. Apply your method on the setting to obtain results.
        results = setting.apply(my_method)
        # Optionally, display the results.
        print(results.summary())
        results.make_plots()
    ```

4. (WIP): Adding your new method to the tree:

    - Place the script/package that defines your Method inside of the `methods` folder.

    - Add the `@register_method` decorator to your Method definition, for example:

        ```python
        from sequoia.methods import register_method

        @register_method
        class MyNewMethod(Method, target_setting=ClassIncrementalSetting):
            name: ClassVar[str] = "my_new_method"
            ...
        ```

    - To launch an experiment using your method, run the following command:

        ```console
        python main.py --setting <some_setting_name> --method my_new_method
        ```
        To customize how your method gets created from the command-line, override the two following class methods:
        - `add_argparse_args(cls, parser: ArgumentParser)`
        - `from_argparse_args(cls, args: Namespace) -> Method`

    - Create a `<your_method_script_name>_test.py` file next to your method script. In it, write unit tests for every module/component used in your Method. Have them be easy to read so people can ideally understand how the components of your Method work by simply reading the tests.

        - (WIP) To run the unittests locally, use the following command: `pytest methods/my_new_method_test.py`

    - Then, write a functional test that demonstrates how your new method should behave, and what kind of results it expects to produce. The easiest way to do this is to implement a `validate_results(setting: Setting, results: Results)` method.
        - (WIP) To debug/run the "integration tests" locally, use the following command: `pytest -x methods/my_new_method_test.py --slow`

    - Create a Pull Request, and you're good to go!

<!-- NOTE: Anything below this is auto-generated by the `readme.py` script. -->
<!-- MAKETREE -->


## Registered Methods (so far):

- ## [BaseMethod](sequoia/methods/base_method.py) 

	 - Target setting: [Setting](sequoia/settings/base/setting.py)

	Versatile Baseline method which targets all settings.

	Uses pytorch-lightning's Trainer for training and a LightningModule as a model.

	Uses a [BaseModel](methods/models/base_model/base_model.py), which
	can be used for:
	- Self-Supervised training with modular auxiliary tasks;
	- Semi-Supervised training on partially labeled batches;
	- Multi-Head prediction (e.g. in task-incremental scenario);

- ## [RandomBaselineMethod](sequoia/methods/random_baseline.py) 

	 - Target setting: [Setting](sequoia/settings/base/setting.py)

	Baseline method that gives random predictions for any given setting.

	This method doesn't have a model or any parameters. It just returns a random
	action for every observation.

- ## [pnn.PnnMethod](sequoia/methods/pnn/pnn_method.py) 

	 - Target setting: [IncrementalAssumption](sequoia/settings/assumptions/incremental.py)


	PNN Method.

	Applicable to both RL and SL Settings, as long as there are clear task boundaries
	during training (IncrementalAssumption).

- ## [avalanche.AGEMMethod](sequoia/methods/avalanche/agem.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	Average Gradient Episodic Memory (AGEM) strategy from Avalanche.
	See AGEM plugin for details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.AR1Method](sequoia/methods/avalanche/ar1.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	AR1 strategy from Avalanche.
	See AR1 plugin for details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.CWRStarMethod](sequoia/methods/avalanche/cwr_star.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	CWRStar strategy from Avalanche.
	See CWRStar plugin for details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.EWCMethod](sequoia/methods/avalanche/ewc.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)


	Elastic Weight Consolidation (EWC) strategy from Avalanche.
	See EWC plugin for details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.GEMMethod](sequoia/methods/avalanche/gem.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	Gradient Episodic Memory (GEM) strategy from Avalanche.
	See GEM plugin for details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.GDumbMethod](sequoia/methods/avalanche/gdumb.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	GDumb strategy from Avalanche.
	See GDumbPlugin for more details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.LwFMethod](sequoia/methods/avalanche/lwf.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	Learning without Forgetting strategy from Avalanche.
	See LwF plugin for details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.ReplayMethod](sequoia/methods/avalanche/replay.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	Replay strategy from Avalanche.
	See Replay plugin for details.
	This strategy does not use task identities.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [avalanche.SynapticIntelligenceMethod](sequoia/methods/avalanche/synaptic_intelligence.py) 

	 - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

	The Synaptic Intelligence strategy from Avalanche.

	This is the Synaptic Intelligence PyTorch implementation of the
	algorithm described in the paper
	"Continuous Learning in Single-Incremental-Task Scenarios"
	(https://arxiv.org/abs/1806.08568)

	The original implementation has been proposed in the paper
	"Continual Learning Through Synaptic Intelligence"
	(https://arxiv.org/abs/1703.04200).

	The Synaptic Intelligence regularization can also be used in a different
	strategy by applying the :class:`SynapticIntelligencePlugin` plugin.

	See the parent class `AvalancheMethod` for the other hyper-parameters and methods.

- ## [sb3.A2CMethod](sequoia/methods/stable_baselines3_methods/a2c.py) 

	 - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)

	Method that uses the A2C model from stable-baselines3. 

- ## [sb3.DQNMethod](sequoia/methods/stable_baselines3_methods/dqn.py) 

	 - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)

	Method that uses a DQN model from the stable-baselines3 package. 

- ## [sb3.DDPGMethod](sequoia/methods/stable_baselines3_methods/ddpg.py) 

	 - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)

	Method that uses the DDPG model from stable-baselines3. 

- ## [sb3.TD3Method](sequoia/methods/stable_baselines3_methods/td3.py) 

	 - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)

	Method that uses the TD3 model from stable-baselines3. 

- ## [sb3.SACMethod](sequoia/methods/stable_baselines3_methods/sac.py) 

	 - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)

	Method that uses the SAC model from stable-baselines3. 

- ## [sb3.PPOMethod](sequoia/methods/stable_baselines3_methods/ppo.py) 

	 - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)

	Method that uses the PPO model from stable-baselines3. 

- ## [EwcMethod](sequoia/methods/ewc_method.py) 

	 - Target setting: [IncrementalAssumption](sequoia/settings/assumptions/incremental.py)

	Subclass of the BaseMethod, which adds the EWCTask to the `BaseModel`.

	This Method is applicable to any CL setting (RL or SL) where there are clear task
	boundaries, regardless of if the task labels are given or not.

- ## [ExperienceReplayMethod](sequoia/methods/experience_replay.py) 

	 - Target setting: [IncrementalSLSetting](sequoia/settings/sl/incremental/setting.py)

	Simple method that uses a replay buffer to reduce forgetting.

- ## [HatMethod](sequoia/methods/hat.py) 

	 - Target setting: [TaskIncrementalSLSetting](sequoia/settings/sl/task_incremental/setting.py)

	Hard Attention to the Task

	```
	@inproceedings{serra2018overcoming,
	    title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
	    author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
	    booktitle={International Conference on Machine Learning},
	    pages={4548--4557},
	    year={2018}
	}
	```


================================================
FILE: sequoia/methods/__init__.py
================================================
""" Methods: solutions to research problems (Settings).

Methods contain the logic related to the training of the algorithm. Methods are
encouraged to use a model to keep the networks / architecture / engineering code
separate from the training loop.

Sequoia includes a `BaseMethod`, along with an accompanying `Model`, which can be
used as a jumping-off point for new users. 
You're obviously also free to write your own method/model from scratch if you want!

The recommended way to start is by creating a new subclass of the Base
The best way to do so is to create your new model as a subclass of the `Model`,
which already has some neat capabilities, and can easily be extended/customized.

This `Model` is an instance of Pytorch-Lightning's `LightningModule` class, and can be
trained on the environments/dataloaders of Sequoia with a `pl.Trainer`, enabling all the
goodies associated with Pytorch-Lightning.

You can also easily add callbacks to measure your own metrics and such as you would in
Pytorch-Lightning.
"""
import glob
import inspect
import os
import warnings
from functools import lru_cache
from importlib import import_module
from os.path import abspath, basename, dirname, isfile, join
from pathlib import Path
from typing import Dict, List, Type

import pkg_resources
from pkg_resources import EntryPoint
from setuptools import find_packages

from sequoia.settings.base import Method
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


AbstractMethod = Method

_registered_methods: List[Type[Method]] = []


"""
TODO: IDEA: Add arguments to register_method that help configure the tests we
add the that method! E.g.:

```
@register_method(slow=True, requires_cuda=True, required_memory_gb=4)
class MyMethod(Method, target_setting=ContinualRLSetting):
    ...
```
"""


def register_method(
    method_class: Type[Method] = None, *, name: str = None, family: str = None
) -> Type[Method]:
    """Decorator around a method class, which is used to register the method.

    Can set the name of the method as well as the family when they are passed, and also
    adds the Method to the list of registered methods.
    """

    def _register_method(
        method_class: Type[Method] = None, *, name: str = None, family: str = None
    ) -> Type[Method]:
        if name is not None:
            method_class.name = name
        if family is not None:
            method_class.family = family

        if not issubclass(method_class, Method):
            raise TypeError(
                "The `register_method` decorator should only be used on subclasses of " "`Method`."
            )

        if method_class not in _registered_methods:
            _registered_methods.append(method_class)

        return method_class

    # This is based on `dataclasses.dataclass`:
    def wrap(method_class: Type[Method]) -> Type[Method]:
        return _register_method(method_class, name=name, family=family)

    # See if we're being called as @register_method or @register_method().
    if method_class is None:
        # We're called with parens.
        return wrap

    # We're called as @register_method without parens.
    return wrap(method_class)


from .base_method import BaseMethod, BaseModel
from .ewc_method import EwcMethod
from .experience_replay import ExperienceReplayMethod
from .hat import HatMethod
from .pnn import PnnMethod
from .random_baseline import RandomBaselineMethod


@lru_cache(1)
def get_external_methods() -> Dict[str, Type[Method]]:
    """Returns a dictionary of the Methods defined outside of Sequoia.

    Packages outside of Sequoia can register methods by putting a `Method` entry-point
    in their setup.py, like so:

    ```python
    # (inside <some_package_dir>/setup.py)

    setup(
        name="my_package",
        packages=setuptools.find_packages(include=["cn_dpm*"])
        ...
        entry_points={
            "Method": [
                "foo_method = my_package.my_methods.foo_method:FooMethod",
                "bar_method = my_package.my_methods.bar_method:BarMethod",
            ],
        },
    )
    ```

    Compared with using the `@register_method` decorator, this has the benefit that the
    module containing the Method does not need to be imported/"live" for the method to
    be available. This is very relevant when using Sequoia through the command-line, for
    instance, since Sequoia would have no way of knowing what other methods are
    available:

    ```console
    sequoia setting foo_setting method foo_method
    ```
    """
    methods: Dict[str, Type[Method]] = {}
    for entry_point in pkg_resources.iter_entry_points("Method"):
        entry_point: EntryPoint
        try:
            method_class = entry_point.load()
        except Exception as exc:
            logger.error(
                f"Unable to load external Method: '{entry_point.name}', from package "
                f"{entry_point.dist.project_name}, version={entry_point.dist.version}: "
                f"{exc}"
            )
        else:
            logger.debug(
                f"Imported an external Method: '{entry_point.name}', from package "
                f"{entry_point.dist.project_name}, (version = {entry_point.dist.version})."
            )
            methods[entry_point.name] = method_class
    return methods


# Keeping a pointer to the old name, just to help with backward-compatibility a bit.
BaselineMethod = BaseMethod


# TODO: Eventually these could become external repos, with their own tests / etc, based
# on a 'cookiecutter' repo of some sort. This would make it easier to maintain and to
# delegate work!

# IDEA: Could also do the same for the datasets somehow? Like have an extendable
# `sequoia.datasets` cookiecutter repo? How would that work with Settings?
# Assumption + Assumption -> Assumption (combined)
# Setting := fn(dataset, **kwargs) -> Callable[[Method], Results]


AVALANCHE_INSTALLED = False
try:
    from avalanche.training.strategies import BaseStrategy  # type: ignore

    AVALANCHE_INSTALLED = True
except ImportError:
    pass

if AVALANCHE_INSTALLED:
    from sequoia.methods.avalanche_methods import *


SB3_INSTALLED = False
try:
    import stable_baselines3

    SB3_INSTALLED = True
except ImportError:
    pass

if SB3_INSTALLED:
    from sequoia.methods.stable_baselines3_methods import *


try:
    from sequoia.methods.pl_bolts_methods import *
except ImportError:
    pass


def add_external_methods(all_methods: List[Type[Method]]) -> List[Type[Method]]:
    for name, method_class in get_external_methods().items():
        if method_class not in all_methods:
            logger.debug(f"Adding method {name} from external package.")
            all_methods.append(method_class)
    return all_methods


def get_all_methods() -> List[Type[Method]]:
    # This may change over time, and includes ALL subclasses of 'Method'.
    # methods = Method.__subclasses__()
    # This includes all registered methods, e.g. not any base classes.
    methods = _registered_methods
    methods = add_external_methods(methods)
    methods = list(set(methods))
    return list(sorted(methods, key=lambda method: method.get_full_name()))


================================================
FILE: sequoia/methods/aux_tasks/__init__.py
================================================
from .auxiliary_task import AuxiliaryTask
from .ewc import EWCTask
from .reconstruction import AEReconstructionTask, VAEReconstructionTask
from .transformation_based import RotationTask

VAE: str = VAEReconstructionTask.name
AE: str = AEReconstructionTask.name
EWC: str = EWCTask.name


================================================
FILE: sequoia/methods/aux_tasks/auxiliary_task.py
================================================
import typing
from abc import abstractmethod
from dataclasses import dataclass
from typing import Callable, ClassVar, Dict, Optional, Tuple

import torch
from pytorch_lightning import LightningModule
from torch import Tensor, nn

from sequoia.common.hparams import HyperParameters, uniform
from sequoia.common.loss import Loss

if typing.TYPE_CHECKING:
    from sequoia.methods.models.base_model import Model


class AuxiliaryTask(nn.Module):
    """Represents an additional loss to apply to a `Classifier`.

    The main logic should be implemented in the `get_loss` method.

    In general, it should apply some deterministic transformation to its input,
    and treat that same transformation as a label to predict.
    That loss should be backpropagatable through the feature extractor (the
    `encoder` attribute).
    """

    name: ClassVar[str] = ""
    input_shape: ClassVar[Tuple[int, ...]] = ()
    hidden_size: ClassVar[int] = -1

    _model: ClassVar["Model"]
    # Class variables for holding the Modules shared with the classifier.
    encoder: ClassVar[nn.Module]
    output_head: ClassVar[nn.Module]  # type: ignore

    preprocessing: ClassVar[Callable[[Tensor, Optional[Tensor]], Tuple[Tensor, Optional[Tensor]]]]

    @dataclass
    class Options(HyperParameters):
        """Settings for this Auxiliary Task."""

        # Coefficient used to scale the task loss before adding it to the total.
        coefficient: float = uniform(0.0, 1.0, default=1.0)

    def __init__(self, *args, options: Options = None, name: str = None, **kwargs):
        """Creates a new Auxiliary Task to further train the encoder.

        Can use the `encoder` and `classifier` components of the parent
        `Classifier` instance.

        NOTE: Since this object will be stored inside the `tasks` dict in the
        model, we can't pass a reference to the parent here, otherwise the
        parent would hold a reference to itself inside its `.modules()`, so
        there would be an infinite recursion problem.

        Parameters
        ----------
        - options : AuxiliaryTask.Options, optional, by default None

            The `Options` related to this task, containing the loss
            coefficient used to scale this task, as well as any other additional
            hyperparameters specific to this `AuxiliaryTask`.
        - name: str, optional, by default None

            The name of this auxiliary task. When not given, the name of the
            class is used.
        """
        super().__init__()
        # If we are given the coefficient as a constructor argument, for
        # instance, then we create the Options for this auxiliary task.
        self.name = name or type(self).name
        self.options = options or type(self).Options(*args, **kwargs)
        self.device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self._disabled = False

    def encode(self, x: Tensor) -> Tensor:
        # x, _ = AuxiliaryTask.preprocessing(x, None)
        return AuxiliaryTask.encoder(x)

    def logits(self, h_x: Tensor) -> Tensor:
        return AuxiliaryTask.output_head(h_x)

    @abstractmethod
    def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss:
        """Calculates the Auxiliary loss for the input `x`.

        The parameters `h_x`, `y_pred` are given for convenience, so we don't
        re-calculate the forward pass multiple times on the same input.

        Parameters
        ----------
        - forward_pass: Dict[str, Tensor] containing:
            - 'x' : Tensor

                The input samples.
            - 'h_x' : Tensor

                The hidden vector, or hidden features, which corresponds to the
                output of the feature extractor (should be equivalent to
                `self.encoder(x)`). Given for convenience, when available.

            - 'y_pred' : Tensor

                The predicted labels.
        - y : Tensor, optional, by default None

            The true labels for each sample. Note that this is the label of the
            output head's task, not of an auxiliary task.

        Returns
        -------
        Tensor
            The loss, not scaled.
        """

    @property
    def coefficient(self) -> float:
        return self.options.coefficient

    @coefficient.setter
    def coefficient(self, value: float) -> None:
        if self.enabled and value == 0:
            self.disable()
        elif self.disabled and value != 0:
            self.enable()
        self.options.coefficient = value

    def enable(self) -> None:
        """Enable this auxiliary task.
        This could be used to create/allocate resources to this task.

        NOTE: The task will not work, even after being enabled, if its
        coefficient is set to 0!
        """
        self._disabled = False

    def disable(self) -> None:
        """Disable this auxiliary task and sets its coefficient to 0.
        This could be used to delete/deallocate resources used by this task.
        """
        self._disabled = True

    @property
    def enabled(self) -> bool:
        return not self._disabled

    @property
    def disabled(self) -> bool:
        return self._disabled or self.coefficient == 0.0

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Executed when the task switches (to either a new or known task)."""

    @property
    def model(self) -> LightningModule:
        return type(self)._model

    @staticmethod
    def set_model(model: "Model") -> None:
        AuxiliaryTask._model = model

    def shared_modules(self) -> Dict[str, nn.Module]:
        """Returns any trainable modules if `self` that are shared across tasks.

        By giving this information, these weights can then be used in
        regularization-based auxiliary tasks like EWC, for example.

        By default, for auxiliary tasks, this returns nothing, for instance.
        For the base model, this returns a dictionary with the encoder, for example.
        When using only one output head (i.e. when `self.hp.multihead` is `False`), then
        this dict also includes the output head.

        Returns
        -------
        Dict[str, nn.Module]:
            Dictionary mapping from name to the shared modules, if any.
        """
        return {}


================================================
FILE: sequoia/methods/aux_tasks/ewc.py
================================================
"""Elastic Weight Consolidation as an Auxiliary Task.

This is a simplified version of EWC, that only currently uses the L2 norm, rather
than the Fisher Information Matrix.

TODO: If it's worth it, we could re-add the 'real' EWC using the nngeometry
package, (which I don't think we need to have as a submodule).
"""

from collections import deque
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass
from typing import Deque, List, Optional, Type

from gym.spaces.utils import flatdim
from nngeometry.metrics import FIM
from nngeometry.object.pspace import PMatAbstract, PMatDiag, PMatKFAC, PVector
from simple_parsing import choice
from torch import Tensor
from torch.utils.data import DataLoader

from sequoia.common.hparams import categorical, uniform
from sequoia.common.loss import Loss
from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.methods.models.output_heads import ClassificationHead, RegressionHead
from sequoia.settings.base.objects import Observations
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import dict_intersection

logger = get_logger(__name__)


class EWCTask(AuxiliaryTask):
    """Elastic Weight Consolidation, implemented as a 'self-supervision-style'
    Auxiliary Task.

    ```bibtex
    @article{kirkpatrick2017overcoming,
        title={Overcoming catastrophic forgetting in neural networks},
        author={Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness,
        Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan,
        John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others},
        journal={Proceedings of the national academy of sciences},
        volume={114},
        number={13},
        pages={3521--3526},
        year={2017},
        publisher={National Acad Sciences}
    }
    ```
    """

    name: str = "ewc"

    @dataclass
    class Options(AuxiliaryTask.Options):
        """Options of the EWC auxiliary task."""

        # Coefficient of the EWC auxilary task.
        # NOTE: It seems to be the case that, at least just for EWC, the coefficient
        # can be often be much greater than 1, hence why we overwrite the prior over
        # that hyper-parameter here.
        coefficient: float = uniform(0.0, 100.0, default=1.0)
        # Batchsize to be used when computing FIM (unused atm)
        batch_size_fim: int = 32
        # Number of observations to use for FIM calculation
        sample_size_fim: int = categorical(2, 4, 8, 16, 32, 64, 128, 256, 512, default=8)
        # Fisher information representation type  (diagonal or block diagonal).
        fim_representation: Type[PMatAbstract] = choice(
            {"diagonal": PMatDiag, "block_diagonal": PMatKFAC},
            default=PMatDiag,
        )

    def __init__(self, *args, name: str = None, options: "EWCTask.Options" = None, **kwargs):
        super().__init__(*args, options=options, name=name, **kwargs)
        self.options: EWCTask.Options

        # The id of the current/most recent task the model has been trained on.
        self.current_training_task: Optional[int] = None
        # The id of the previous task the model was trained on.
        self.previous_training_task: Optional[int] = None
        # The ids of all the tasks trained on so far, not including the current task.
        self.previous_training_tasks: List[Optional[int]] = []

        self.previous_model_weights: Optional[PVector] = None
        self.observation_collector: Deque[Observations] = deque(maxlen=self.options.sample_size_fim)
        self.fisher_information_matrices: List[PMatAbstract] = []
        # When True, ignore task boundaries (no EWC update).
        # This is used mainly because of the need for executing forward passes when
        # calculating the new FIMs, and the MultiheadModel class might then call
        # `on_task_switch`, so we don't want to recurse.
        self._ignore_task_boundaries: bool = False

        if not self.model.shared_modules():
            # TODO: This might cause a bug, if  some auxiliary task were to replace the
            # encoder and also be 'activated' after this task. This is a really obscure
            # edge case though.
            logger.warning(
                RuntimeWarning(
                    "Disabling the EWC auxiliary task, since there appears to be no "
                    "shared weights between tasks!"
                )
            )
            self.disable()

    def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss:
        """Gets the EWC loss."""
        if self.training:
            self.observation_collector.append(forward_pass.observations)

        if not self.enabled or self.previous_model_weights is None:
            # We're in the first task: do nothing.
            return Loss(name=self.name)

        loss = 0.0
        v_current = self.get_current_model_weights()

        for fim in self.fisher_information_matrices:
            diff = v_current - self.previous_model_weights
            loss += fim.vTMv(diff)

        ewc_loss = Loss(name=self.name, loss=loss)
        return ewc_loss

    def on_task_switch(self, task_id: Optional[int]):
        """Executed when the task switches (to either a known or unknown task)."""
        if not self.enabled:
            return
        logger.debug(f"On task switch called: task_id={task_id}")

        if self._ignore_task_boundaries:
            logger.info("Ignoring task boundary (probably from recursive call)")
            return

        if not self.training:
            logger.debug("Task boundary at test time, no EWC update.")
            return
        # Two cases:
        # - Setting without task IDs --> still calculate the FIMs at each task boundary.
        # - Setting with IDs --> calculate the FIMs before training on new tasks.

        # Setting without task labels. Task ids: None -> None -> None  (always None)
        if task_id is None:
            # Here we use the number of task boundaries as a 'fake' task id, meaning we
            # treat each task as if it has never been encountered before.
            if self.current_training_task is None:
                # Start of first task, no EWC update.
                self.current_training_task = 0
            else:
                self.previous_training_task = self.current_training_task
                self.current_training_task += 1
                self.update_anchor_weights(new_task_id=self.current_training_task)

        # Setting with task labels. Task ids: 0 -> 1 -> 2 -> 1 -> 3 -> 5 -> 11 -> 5 etc.
        else:
            if self.current_training_task is None:
                logger.info("Starting the first task, no EWC update.")
                self.current_training_task = task_id
            elif task_id == self.current_training_task:
                logger.info("Switching to same task, no EWC update.")
            elif task_id in self.previous_training_tasks:
                logger.info(f"Switching to known task {task_id}, no EWC update.")
            else:
                logger.info(f"Switching to new task {task_id}, updating EWC params.")
                self.previous_training_task = self.current_training_task
                self.previous_training_tasks.append(self.current_training_task)
                self.current_training_task = task_id
                self.update_anchor_weights(new_task_id=self.current_training_task)

    def update_anchor_weights(self, new_task_id: int) -> None:
        """Update the FIMs and other EWC params before starting training on a new task.

        Parameters
        ----------
        new_task_id : int
            The ID of the new task.
        """
        # we dont want to go here at test time.
        # NOTE: We also switch between unknown tasks.
        logger.info(
            f"Updating the EWC 'anchor' weights before starting training on " f"task {new_task_id}"
        )
        self.previous_model_weights = self.get_current_model_weights().clone().detach()

        # Create a Dataloader from the stored observations.
        obs_type: Type[Observations] = type(self.observation_collector[0])
        dataset = [obs.as_namedtuple() for obs in self.observation_collector]
        # Or, alternatively (see the note below on why we don't use this):
        # stacked_observations: Observations = obs_type.stack(self.observation_collector)
        # dataset = TensorDataset(*stacked_observations.as_namedtuple())

        # NOTE: This is equivalent to just using the same batch size as during
        # training, as each Observations in the list is already a batch.
        # NOTE: We keep the same batch size here as during training because for
        # instance in RL, it would be weird to suddenly give some new batch size,
        # since the buffers would get cleared and re-created just for these forward
        # passes
        dataloader = DataLoader(dataset, batch_size=None, collate_fn=None)
        # TODO: Would be nice to have a progress bar here.

        # Create the parameters to be passed to the FIM function. These may vary a
        # bit, depending on if we're being applied in a classification setting or in
        # a regression setting (not done yet)
        variant: str
        # TODO: Change this conditional to be based on the type of action space, rather
        # than of output head.
        if isinstance(self._model.output_head, ClassificationHead):
            variant = "classif_logits"
            n_output = self._model.action_space.n

            def fim_function(*inputs) -> Tensor:
                observations = obs_type(*inputs).to(self._model.device)
                forward_pass: ForwardPass = self._model(observations)
                actions = forward_pass.actions
                return actions.logits

        elif isinstance(self._model.output_head, RegressionHead):
            # NOTE: This hasn't been tested yet.
            variant = "regression"
            n_output = flatdim(self._model.action_space)

            def fim_function(*inputs) -> Tensor:
                observations = obs_type(*inputs).to(self._model.device)
                forward_pass: ForwardPass = self._model(observations)
                actions = forward_pass.actions
                return actions.y_pred

        else:
            raise NotImplementedError("TODO")

        with self._ignoring_task_boundaries():
            # Prevent recursive calls to `on_task_switch` from affecting us (can be
            # called from MultiheadModel). (TODO: MultiheadModel will be fixed soon.)
            # layer_collection = LayerCollection.from_model(self.model.shared_modules())
            # nngeometry BUG: this doesn't work when passing the layer
            # collection instead of the model
            new_fim = FIM(
                model=self.model.shared_modules(),
                loader=dataloader,
                representation=self.options.fim_representation,
                n_output=n_output,
                variant=variant,
                function=fim_function,
                device=self._model.device,
                layer_collection=None,
            )

        # TODO: There was maybe an idea to use another fisher information matrix for
        # the critic in A2C, but not doing that atm.
        new_fims = [new_fim]
        self.consolidate(new_fims, task=new_task_id)
        self.observation_collector.clear()

    @contextmanager
    def _ignoring_task_boundaries(self):
        """Contextmanager used to temporarily ignore task boundaries (no EWC update)."""
        self._ignore_task_boundaries = True
        yield
        self._ignore_task_boundaries = False

    def consolidate(self, new_fims: List[PMatAbstract], task: Optional[int]) -> None:
        """Consolidates the new and current fisher information matrices.

        Parameters
        ----------
        new_fims : List[PMatAbstract]
            The list of new fisher information matrices.
        task : Optional[int]
            The id of the previous task, when task labels are available, or the number
            of task switches encountered so far when task labels are not available.
        """
        if not self.fisher_information_matrices:
            self.fisher_information_matrices = new_fims
            return

        assert task is not None, "Should have been given an int task id (even if fake)."

        for i, (fim_previous, fim_new) in enumerate(
            zip(self.fisher_information_matrices, new_fims)
        ):
            # consolidate the FIMs
            if fim_previous is None:
                self.fisher_information_matrices[i] = fim_new
            else:
                # consolidate the fim_new into fim_previous in place
                if isinstance(fim_new, PMatDiag):
                    # TODO: This is some kind of weird online-EWC related magic:
                    fim_previous.data = (deepcopy(fim_new.data) + fim_previous.data * (task)) / (
                        task + 1
                    )

                elif isinstance(fim_new.data, dict):
                    # TODO: This is some kind of weird online-EWC related magic:
                    for _, (prev_param, new_param) in dict_intersection(
                        fim_previous.data, fim_new.data
                    ):
                        for prev_item, new_item in zip(prev_param, new_param):
                            prev_item.data = (prev_item.data * task + deepcopy(new_item.data)) / (
                                task + 1
                            )

                self.fisher_information_matrices[i] = fim_previous

    def get_current_model_weights(self) -> PVector:
        return PVector.from_model(self.model.shared_modules())


================================================
FILE: sequoia/methods/aux_tasks/reconstruction/__init__.py
================================================
""" Auxiliary tasks based on reconstructing an input given a hidden vector.

TODO: Add some denoising autoencoders maybe as a reconstruction task?
"""
from .ae import AEReconstructionTask
from .decoder_for_dataset import get_decoder_class_for_dataset
from .decoders import CifarDecoder, MnistDecoder
from .vae import VAEReconstructionTask


================================================
FILE: sequoia/methods/aux_tasks/reconstruction/ae.py
================================================
""" Defines an Auto-Encoder-based Auxiliary task.
"""
from typing import ClassVar, Dict, Optional, Tuple, Union

import torch
from torch import Tensor, nn

from sequoia.common.loss import Loss

from ..auxiliary_task import AuxiliaryTask
from .decoder_for_dataset import get_decoder_class_for_dataset


class AEReconstructionTask(AuxiliaryTask):
    """Task that adds the AE loss (reconstruction loss).

    Uses the feature extractor (`encoder`) of the parent model as the encoder of
    an AE. Contains trainable `decoder` module, which is
    used to get the AE loss to train the feature extractor with.
    """

    name: ClassVar[str] = "ae"

    def __init__(self, coefficient: float = None, options: AuxiliaryTask.Options = None):
        super().__init__(coefficient=coefficient, options=options)
        self.loss = nn.MSELoss(reduction="sum")

        # BUG: The decoder for mnist has output shape of [1, 28, 28], but the
        # transforms 'fix' that shape to be [3, 28, 28].
        # Therefore: TODO: Should we adapt the output shape of the decoder
        # depending on the shape of the input?
        self.decoder: Optional[nn.Module] = None

    def create_decoder(self, input_shape: Union[torch.Size, Tuple[int, ...]]) -> nn.Module:
        """Creates a decoder to reconstruct the input from the hidden vectors."""
        if len(input_shape) == 4:
            # discard the batch dimension.
            input_shape = input_shape[1:]
        # At the moment we have a 'fixed' set of image sizes (28, 32, 224, iirc)
        # and we just use the decoder type for the given dataset.
        # TODO: Create the decoder dynamically, depending on the required shape.
        decoder_class = get_decoder_class_for_dataset(input_shape)
        decoder: nn.Module = decoder_class(
            code_size=AuxiliaryTask.hidden_size,
        )
        decoder = decoder.to(self.device)
        return decoder

    def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss:
        x = forward_pass["x"]
        h_x = forward_pass["h_x"]
        # y_pred = forward_pass["y_pred"]
        z = h_x.view([h_x.shape[0], -1])
        if self.decoder is None or self.decoder.output_shape != x.shape:
            self.decoder = self.create_decoder(x.shape)
        x_hat = self.decoder(z)
        assert x_hat.shape == x.shape, (
            f"reconstructed x should have same shape as original x! "
            f"({x_hat.shape} != {x.shape})"
        )
        recon_loss = self.reconstruction_loss(x_hat, x)
        loss_info = Loss(name=self.name, loss=recon_loss)
        return loss_info

    def forward(self, h_x: Tensor) -> Tensor:  # type: ignore
        z = h_x.view([h_x.shape[0], -1])
        x_hat = self.decoder(z)
        return x_hat

    def reconstruct(self, x: Tensor) -> Tensor:
        h_x = self.encode(x)
        x_hat = self.forward(h_x)
        return x_hat.view(x.shape)

    def reconstruction_loss(self, recon_x: Tensor, x: Tensor) -> Tensor:
        return self.loss(recon_x, x)


================================================
FILE: sequoia/methods/aux_tasks/reconstruction/decoder_for_dataset.py
================================================
from typing import Dict, Tuple, Type, Union

from torch import nn

from .decoders import CifarDecoder, ImageNetDecoder, MnistDecoder

# Dict mapping from image (height, width) to the type of decoder to use.
# TODO: Add some more decoders for other image datasets/shapes.
registered_decoders: Dict[Tuple[int, int], Type[nn.Module]] = {
    (28, 28): MnistDecoder,
    (32, 32): CifarDecoder,
    (224, 224): ImageNetDecoder,
}


def get_decoder_class_for_dataset(input_shape: Union[Tuple[int, int, int]]) -> Type[nn.Module]:
    assert len(input_shape) == 3, input_shape
    channels: int
    width: int
    height: int
    if input_shape[0] == min(input_shape):
        # Image is in C, H, W format
        channels, height, width = input_shape
    elif input_shape[-1] == min(input_shape):
        height, width, channels = input_shape
    if (height, width) in registered_decoders:
        return registered_decoders[(height, width)]
    raise RuntimeError(f"No decoder available for input shape {input_shape}")


================================================
FILE: sequoia/methods/aux_tasks/reconstruction/decoders.py
================================================
from abc import ABC
from typing import Tuple

from torch import nn

from sequoia.common.layers import DeConvBlock, Reshape


class Decoder(nn.Sequential, ABC):
    """A base class for the decoders (mostly for typing purposes)."""

    code_size: int
    output_shape: Tuple[int, int, int]


class MnistDecoder(Decoder):
    """Decoder that generates images of shape [`out_channels`, 28, 28]"""

    def __init__(self, code_size: int, out_channels: int = 3):
        self.code_size = code_size
        self.output_shape: Tuple[int, int, int] = (out_channels, 28, 28)
        super().__init__(
            Reshape([self.code_size, 1, 1]),
            nn.ConvTranspose2d(self.code_size, 32, kernel_size=4, stride=1),
            nn.BatchNorm2d(32),
            nn.ELU(alpha=1.0, inplace=True),
            nn.ConvTranspose2d(32, 16, kernel_size=5, stride=2),
            nn.BatchNorm2d(16),
            nn.ELU(alpha=1.0, inplace=True),
            nn.ConvTranspose2d(16, 16, kernel_size=5, stride=2),
            nn.BatchNorm2d(16),
            nn.ELU(alpha=1.0, inplace=True),
            nn.ConvTranspose2d(16, out_channels, kernel_size=4, stride=1),
            nn.Sigmoid(),
        )


class CifarDecoder(Decoder):
    """Decoder that generates images of shape [3, 32, 32]"""

    def __init__(self, code_size: int):
        self.code_size = code_size
        self.output_shape: Tuple[int, int, int] = (3, 32, 32)
        super().__init__(
            Reshape([self.code_size, 1, 1]),
            DeConvBlock(self.code_size, 16),
            DeConvBlock(16, 32),
            DeConvBlock(32, 64),
            DeConvBlock(64, 64),
            DeConvBlock(64, 3, last_relu=False),
            nn.Sigmoid(),
        )


class ImageNetDecoder(Decoder):
    """Decoder that generates images of shape [3, 224, 224]"""

    def __init__(self, code_size: int):
        self.code_size = code_size
        self.output_shape: Tuple[int, int, int] = (3, 224, 224)
        super().__init__(
            Reshape([self.code_size, 1, 1]),
            DeConvBlock(self.code_size, 16),
            DeConvBlock(16, 32),
            DeConvBlock(32, 64),
            DeConvBlock(64, 128),
            DeConvBlock(128, 224),
            DeConvBlock(224, 3, last_relu=False),
            nn.Sigmoid(),
        )


================================================
FILE: sequoia/methods/aux_tasks/reconstruction/vae.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Dict

import torch
from torch import Tensor, nn

from sequoia.common.loss import Loss

from ..auxiliary_task import AuxiliaryTask
from .ae import AEReconstructionTask
from .decoder_for_dataset import get_decoder_class_for_dataset


class VAEReconstructionTask(AEReconstructionTask):
    """Task that adds the VAE loss (reconstruction + KL divergence).

    Uses the feature extractor (`encoder`) of the parent model as the encoder of
    a VAE. Contains trainable `mu`, `logvar`, and `decoder` modules, which are
    used to get the VAE loss to train the feature extractor with.
    """

    name: ClassVar[str] = "vae"

    @dataclass
    class Options(AEReconstructionTask.Options):
        """Settings & Hyper-parameters related to the VAEReconstructionTask."""

        code_size: int = 50  # dimensions of the VAE code-space.
        beta: float = 1.0  # Beta term, multiplies the KL divergence term.

    def __init__(self, coefficient: float = None, options: "VAEReconstructionTask.Options" = None):
        super().__init__(coefficient=coefficient, options=options)
        self.options: VAEReconstructionTask.Options
        self.code_size = self.options.code_size
        # add the rest of the VAE layers: (Mu, Sigma, and the decoder)
        self.mu = nn.Linear(AuxiliaryTask.hidden_size, self.code_size)
        self.logvar = nn.Linear(AuxiliaryTask.hidden_size, self.code_size)
        decoder_class = get_decoder_class_for_dataset(AuxiliaryTask.input_shape)
        self.decoder: nn.Module = decoder_class(
            code_size=self.code_size,
        )

    def forward(self, h_x: Tensor) -> Tensor:  # type: ignore
        h_x = h_x.view([h_x.shape[0], -1])
        mu, logvar = self.mu(h_x), self.logvar(h_x)
        z = self.reparameterize(mu, logvar)
        x_hat = self.decoder(z)
        return x_hat

    def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss:
        x = forward_pass["x"]
        h_x = forward_pass["h_x"]
        h_x = h_x.view([h_x.shape[0], -1])
        mu, logvar = self.mu(h_x), self.logvar(h_x)
        z = self.reparameterize(mu, logvar)
        x_hat = self.decoder(z)

        recon_loss = self.reconstruction_loss(x_hat, x)
        kl_loss = self.options.beta * self.kl_divergence_loss(mu, logvar)
        loss = Loss(self.name, tensors=dict(mu=mu, logvar=logvar, z=z, x_hat=x_hat))
        loss += Loss("recon", loss=recon_loss)
        loss += Loss("kl", loss=kl_loss)
        return loss

    def generate(self, z: Tensor) -> Tensor:
        z = z.to(self.device)
        return self.forward(z)

    @staticmethod
    def kl_divergence_loss(mu: Tensor, logvar: Tensor) -> Tensor:
        # see Appendix B from VAE paper:
        # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
        # https://arxiv.org/abs/1312.6114
        # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
        return -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())


================================================
FILE: sequoia/methods/aux_tasks/transformation_based/__init__.py
================================================
from .bases import ClassifyTransformationTask, RegressTransformationTask, TransformationBasedTask
from .rotation import RotationTask


================================================
FILE: sequoia/methods/aux_tasks/transformation_based/bases.py
================================================
from dataclasses import dataclass
from functools import wraps
from typing import Any, Callable, List, Tuple

import torch
from torch import Tensor, nn
from torchvision.transforms import functional as TF

from sequoia.common.loss import Loss
from sequoia.common.metrics import Metrics, get_metrics
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import fix_channels

from ..auxiliary_task import AuxiliaryTask

logger = get_logger(__name__)


def wrap_pil_transform(function: Callable):
    def _transform(img_x, arg):
        x = TF.to_pil_image(img_x.cpu())
        x = function(x, arg)
        return TF.to_tensor(x).view(img_x.shape).to(img_x)

    @wraps(function)
    def _pil_transform(x: Tensor, arg: Any):
        return torch.cat([_transform(x_i, arg) for x_i in x]).view(x.shape)

    return _pil_transform


class TransformationBasedTask(AuxiliaryTask):
    """
    Generates an AuxiliaryTask for an arbitrary transformation function.

    Tries to classify or regress which argument was passed to the function,
    given only the transformed code, if `compare_with_original` is False, else
    given the original and transformed codes.

    NOTE: For now, the same function is applied to all the images within the
    batch. Therefore, the function_args is one value per batch of transformed
    images, and not one value per image.
    """

    @dataclass
    class Options(AuxiliaryTask.Options):
        """Command-line options for the Transformation-based auxiliary task."""

        # Wether or not both the original and transformed codes should be passed
        # to the auxiliary layer in order to detect the transformation.
        compare_with_original: bool = True

    def __init__(
        self,
        function: Callable[[Tensor, Any], Tensor],
        function_args: List[Any],
        loss: Callable,
        name: str = None,
        auxiliary_layer: nn.Module = None,
        options: Options = None,
    ):
        """Creates a transformation-based task to predict alpha given the codes.

        Args:
            function (Callable[[Tensor, Any], Tensor]): A function to apply to x
            before it is passed to the encoder.

            function_args (List[Any]): The arguments to be passed to the
            `function`.

            loss (Callable): A loss function, which will be called with
            `alpha_pred` and `alpha` to get a loss for each argument in `function_args`.

            name (str, optional): [description]. Defaults to None.

            auxiliary_layer (nn.Module, optional): [description]. Defaults to None.

            options (Options, optional): [description]. Defaults to None.
        """
        super().__init__(options=options)
        self.function = function
        self.name = name or self.function.__name__
        self.function_args = function_args
        self.alphas: Tensor = torch.Tensor(self.function_args)
        self.options: TransformationBasedTask.Options = options or self.Options()
        self.nargs = len(self.function_args)
        # which loss to use. CrossEntropy when classifying, or MSE when regressing.
        self.loss = loss

        if auxiliary_layer is not None:
            self.auxiliary_layer = auxiliary_layer
        else:
            input_dims = AuxiliaryTask.hidden_size
            if self.options.compare_with_original:
                input_dims *= 2
            self.auxiliary_layer = nn.Sequential(
                nn.Flatten(),
                nn.Linear(input_dims, self.nargs),
            )

    def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss:
        loss_info = Loss(self.name)
        batch_size = x.shape[0]
        assert self.alphas is not None, "set the `self.alphas` attribute in the base class."
        assert (
            self.function_args is not None
        ), "set the `self.function_args` attribute in the base class."

        # Get the loss for each transformation argument.
        for fn_arg, alpha in zip(self.function_args, self.alphas):
            loss_i = self.get_loss_for_arg(x=x, h_x=h_x, fn_arg=fn_arg, alpha=alpha)
            loss_info += loss_i
            # print(f"{self.name}_{fn_arg}", loss_i.metrics)

        # Fuse all the sub-metrics into a total metric.
        # For instance, all the "rotate_0", "rotate_90", "rotate_180", etc.
        metrics = loss_info.metrics
        total_metrics = sum(loss_info.metrics.values(), Metrics())
        # we actually add up all the metrics to get the "overall" metric.
        metrics.clear()
        metrics[self.name] = total_metrics
        return loss_info

    def get_loss_for_arg(self, x: Tensor, h_x: Tensor, fn_arg: Any, alpha: Tensor) -> Loss:
        alpha = alpha.to(x.device)
        # TODO: Transform before or after the `preprocess_inputs` function?
        x = fix_channels(x)
        # Transform X using the function.
        x_t = self.function(x, fn_arg)
        # Get the code for the transformed x.
        h_x_t = self.encode(x_t)

        aux_layer_input = h_x_t
        if self.options.compare_with_original:
            aux_layer_input = torch.cat([h_x, h_x_t], dim=-1)

        # Get the predicted argument of the transformation.
        alpha_t = self.auxiliary_layer(aux_layer_input)

        # get the metrics for this particular argument (accuracy, mse, etc.)
        if isinstance(fn_arg, int):
            name = f"{fn_arg}"
        else:
            name = f"{fn_arg:.3f}"
        loss = Loss(name)
        loss.loss = self.loss(alpha_t, alpha)
        loss.metrics[name] = get_metrics(x=x_t, h_x=h_x_t, y_pred=alpha_t, y=alpha)

        # Save some tensors for debugging purposes:
        loss.tensors["x_t"] = x_t
        loss.tensors["h_x_t"] = h_x_t
        loss.tensors["alpha_t"] = alpha_t
        return loss


class ClassifyTransformationTask(TransformationBasedTask):
    """
    Generates an AuxiliaryTask for an arbitrary transformation function.

    Tries to classify which argument was passed to the function.
    `self.alphas` is the classification target. It indicates which
    transformation argument was used.
    I.e. a vector of 0's for function_args[0], 1's for function_args[1], etc.
    """

    def __init__(
        self,
        function: Callable[[Tensor, Any], Tensor],
        function_args: List[Any],
        name: str = None,
        options: TransformationBasedTask.Options = None,
    ):
        super().__init__(
            function=function,
            function_args=function_args,
            name=name,
            loss=nn.CrossEntropyLoss(),
            options=options,
        )
        self.labels = torch.arange(len(function_args), dtype=torch.long)

    def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss:
        batch_size = x.shape[0]
        self.alphas = self.labels.view(-1, 1).repeat(1, batch_size)
        return super().get_loss(x=x, h_x=h_x, y_pred=y_pred, y=y)


class RegressTransformationTask(TransformationBasedTask):
    """
    Generates an AuxiliaryTask for an arbitrary transformation function.

    Tries to Regress which argument value was passed to the function.
    x -----------------------encoder(x)-> h_x -----|
    x --f(x, alpha)--> x_t --encoder(x)-> h_x_t ---|----A(h_x, h_x_t) --> alpha_pred <-MSE-> alpha

    Can either use a list of function arguments, or a range from which to sample
    the argument values uniformly.
    """

    def __init__(
        self,
        function: Callable[[Tensor, Any], Tensor],
        function_args: List[Any] = None,
        name: str = None,
        function_arg_range: Tuple[float, float] = None,
        n_calls: int = 2,
        options: TransformationBasedTask.Options = None,
    ):
        super().__init__(
            function=function,
            function_args=[],
            name=name,
            loss=nn.MSELoss(),
            options=options,
        )
        if function_arg_range:
            self.function_arg_range = function_arg_range
            self.n_calls = n_calls
        elif function_args:
            self.function_arg_range = (min(function_args), max(function_args))
            self.n_calls = len(function_args)
        else:
            raise RuntimeError("`function_args` or `function_arg_range` must be set.")

        self.arg_min = self.function_arg_range[0]
        self.arg_max = self.function_arg_range[1]
        self.arg_med = (self.arg_min + self.arg_max) / 2
        self.arg_amp = self.arg_max - self.arg_min

        input_dims = AuxiliaryTask.hidden_size
        if self.options.compare_with_original:
            input_dims *= 2
        self.auxiliary_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_dims, 1),
            nn.Sigmoid(),
            ScaleToRange(arg_min=self.arg_min, arg_amp=self.arg_amp),
        )

    def get_function_args(self) -> Tensor:
        # sample random arguments in the range [self.min_arg, self.max_arg]
        args = torch.rand(self.n_calls)
        args *= self.arg_amp
        args += self.arg_min
        return args

    def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss:
        batch_size = x.shape[0]
        random_alphas = self.get_function_args()
        self.function_args = random_alphas.tolist()
        self.alphas = random_alphas.view(-1, 1, 1).repeat(1, batch_size, 1)
        loss = super().get_loss(x=x, h_x=h_x, y_pred=y_pred, y=y)
        return loss


class ScaleToRange(nn.Module):
    def __init__(self, arg_min: float, arg_amp: float):
        super().__init__()
        self.arg_min = arg_min
        self.arg_max = arg_amp

    def forward(self, x: Tensor) -> Tensor:
        return self.arg_min + self.arg_amp * x


================================================
FILE: sequoia/methods/aux_tasks/transformation_based/rotation.py
================================================
from dataclasses import dataclass

from torch import Tensor

from .bases import ClassifyTransformationTask


def rotate(x: Tensor, angle: int) -> Tensor:
    """Rotates the given tensor `x` by an angle `angle`.

    Currently only supports multiples of 90 degrees.

    Args:
        x (Tensor): An image or a batch of images, with shape [(b), C, H, W]
        angle (int): An angle. Currently only supports {0, 90, 180, 270}.

    Returns:
        Tensor: The tensor x, rotated by `angle` degrees counter-clockwise.

    Example:
    >>> import torch
    >>> x = torch.Tensor([
    ...   [1, 2, 3],
    ...   [4, 5, 6],
    ...   [7, 8, 9],
    ... ])
    >>> print(x)
    tensor([[1., 2., 3.],
            [4., 5., 6.],
            [7., 8., 9.]])
    >>> x = x.view(1, 3, 3)
    >>> x_rot = rotate(x, 90)
    >>> print(x_rot.shape)
    torch.Size([1, 3, 3])
    >>> print(x_rot)
    tensor([[[3., 6., 9.],
             [2., 5., 8.],
             [1., 4., 7.]]])
    """

    # TODO: Test that this works.
    assert angle % 90 == 0, "can only rotate 0, 90, 180, or 270 degrees for now."
    k = angle // 90
    # BUG: Very rarely, this condition won't work! (More specifically, only on the last batch of data!)
    # assert min(x.shape) == x.shape[-3], f"Image should be in [(b) C H W] format. (image shape: {x.shape}"
    return x.rot90(k, dims=(-2, -1))


if __name__ == "__main__":
    import doctest

    doctest.testmod()


class RotationTask(ClassifyTransformationTask):
    @dataclass
    class Options(ClassifyTransformationTask.Options):
        """Command-line options for the Transformation-based auxiliary task."""

        # Wether or not both the original and transformed codes should be passed
        # to the auxiliary layer in order to detect the transformation.
        # TODO: Maybe try with this set to False, to learn "innate" orientation rather than relative orientation.
        compare_with_original: bool = True

    def __init__(self, name="rotation", options: "RotationTask.Options" = None):
        super().__init__(
            function=rotate,
            function_args=[0, 90, 180, 270],
            name=name,
            options=options or RotationTask.Options(),
        )


================================================
FILE: sequoia/methods/avalanche_methods/__init__.py
================================================
""" Adapters for Avalanche Strategies, so they can be used as Methods in Sequoia.

See the Avalanche repo for more info: https://github.com/ContinualAI/avalanche
"""

# from .agem import AGEMMethod
# from .ar1 import AR1Method
# from .base import AvalancheMethod
# from .cwr_star import CWRStarMethod
# from .ewc import EWCMethod

# # Still quite buggy, needs to be fixed on the avalanche side.
# from .gdumb import GDumbMethod
# from .gem import GEMMethod
# from .lwf import LwFMethod
# from .naive import NaiveMethod
# from .replay import ReplayMethod
# from .synaptic_intelligence import SynapticIntelligenceMethod


================================================
FILE: sequoia/methods/avalanche_methods/agem.py
================================================
""" Method based on AGEM from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.agem.AGEMPlugin` or
`avalanche.training.strategies.strategy_wrappers.AGEM` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Type

import pytest
from avalanche.training.strategies import AGEM, BaseStrategy
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform

from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting

from .base import AvalancheMethod


@register_method
@dataclass
class AGEMMethod(AvalancheMethod[AGEM]):
    """Average Gradient Episodic Memory (AGEM) strategy from Avalanche.
    See AGEM plugin for details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    # number of patterns per experience in the memory
    patterns_per_exp: int = uniform(10, 1000, default=100)
    # number of patterns in memory sample when computing reference gradient.
    sample_size: int = uniform(16, 256, default=64)

    strategy_class: ClassVar[Type[BaseStrategy]] = AGEM


if __name__ == "__main__":
    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(AGEMMethod, "method")
    args = parser.parse_args()
    method: AGEMMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/agem_test.py
================================================
""" WIP: Tests for the AGEM Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .agem import AGEMMethod
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod


class TestAGEMMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = AGEMMethod


================================================
FILE: sequoia/methods/avalanche_methods/ar1.py
================================================
""" Method based on AR1 from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.strategies.ar1.AR1` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Type

from avalanche.training.strategies import AR1, BaseStrategy
from simple_parsing.helpers.hparams import log_uniform, uniform

from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting

from .base import AvalancheMethod


@register_method
@dataclass
class AR1Method(AvalancheMethod[AR1]):
    """AR1 strategy from Avalanche.
    See AR1 plugin for details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    # The learning rate (SGD optimizer).
    lr: float = log_uniform(1e-6, 1e-2, default=0.001)
    # The momentum (SGD optimizer).
    momentum: float = uniform(0.9, 0.999, default=0.9)
    # The L2 penalty used for weight decay.
    l2: float = uniform(1e-6, 1e-3, default=0.0005)
    # The number of training epochs. Defaults to 4.
    train_epochs: int = uniform(1, 50, default=4)
    # The initial update rate of BatchReNorm layers.
    init_update_rate: float = 0.01
    # The incremental update rate of BatchReNorm layers.
    inc_update_rate: float = 0.00005
    # The maximum r value of BatchReNorm layers.
    max_r_max: float = 1.25
    # The maximum d value of BatchReNorm layers.
    max_d_max: float = 0.5
    # The incremental step of r and d values of BatchReNorm layers.
    inc_step: float = 4.1e-05
    # The size of the replay buffer. The replay buffer is shared across classes.
    rm_sz: int = uniform(500, 2000, default=1500)
    # A string describing the name of the layer to use while freezing the lower
    # (nearest to the input) part of the model. The given layer is not frozen
    # (exclusive).
    freeze_below_layer: str = "lat_features.19.bn.beta"
    # The number of the layer to use as the Latent Replay Layer. Usually this is the
    # same of `freeze_below_layer`.
    latent_layer_num: int = 19
    # The Synaptic Intelligence lambda term. Defaults to 0, which means that the
    # Synaptic Intelligence regularization will not be applied.
    ewc_lambda: float = uniform(0, 1, default=0)
    # The train minibatch size. Defaults to 128.
    train_mb_size: int = uniform(1, 512, default=128)
    # The eval minibatch size. Defaults to 128.
    eval_mb_size: int = uniform(1, 512, default=128)

    strategy_class: ClassVar[Type[BaseStrategy]] = AR1


if __name__ == "__main__":
    from simple_parsing import ArgumentParser

    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(AR1Method, "method")
    args = parser.parse_args()
    method: AR1Method = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/ar1_test.py
================================================
""" WIP: Tests for the AR1 Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

import pytest
from avalanche.models import SimpleCNN, SimpleMLP
from torch.nn import Module

from sequoia.common.config import Config
from sequoia.conftest import xfail_param
from sequoia.settings.sl import TaskIncrementalSLSetting

from .ar1 import AR1Method
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .patched_models import MTSimpleCNN, MTSimpleMLP


@pytest.mark.xfail(reason="AR1 isn't super well supported yet.")
class TestAR1Method(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = AR1Method

    @pytest.mark.timeout(60)
    @pytest.mark.parametrize(
        "model_type",
        [
            xfail_param(
                SimpleCNN,
                reason="seems like the model in AR1 is supposed to be larger?",
            ),
            SimpleMLP,
            xfail_param(
                MTSimpleCNN,
                reason="IndexError Bug inside `avalanche/models/dynamic_modules.py",
            ),
            xfail_param(
                MTSimpleMLP,
                reason="IndexError Bug inside `avalanche/models/dynamic_modules.py",
            ),
        ],
    )
    def test_short_task_incremental_setting(
        self,
        model_type: Type[Module],
        short_task_incremental_setting: TaskIncrementalSLSetting,
        config: Config,
    ):
        method = self.Method(model=model_type)
        results = short_task_incremental_setting.apply(method, config)
        assert 0.05 < results.average_final_performance.objective


================================================
FILE: sequoia/methods/avalanche_methods/base.py
================================================
""" Adapter for the `BaseStrategy` from Avalanche, wrapping it up into a Sequoia Method.

See the Avalanche repo for more info: https://github.com/ContinualAI/avalanche
"""
import inspect
import warnings
from dataclasses import dataclass, fields
from typing import ClassVar, Dict, Generic, List, Optional, Type, TypeVar, Union

import gym
import torch
import tqdm
from avalanche.benchmarks.scenarios import Experience
from avalanche.evaluation.metrics import accuracy_metrics, forgetting_metrics, loss_metrics
from avalanche.logging import InteractiveLogger
from avalanche.logging.wandb_logger import WandBLogger as _WandBLogger
from avalanche.models import SimpleCNN, SimpleMLP
from avalanche.models.utils import avalanche_forward
from avalanche.training.plugins import EvaluationPlugin, StrategyPlugin
from avalanche.training.strategies import BaseStrategy
from gym import spaces
from gym.spaces.utils import flatdim
from gym.utils import colorize
from simple_parsing.helpers import choice, field, list_field
from simple_parsing.helpers.hparams import HyperParameters, log_uniform, uniform
from torch import nn, optim
from torch.nn import Module
from torch.optim import SGD
from torch.optim.optimizer import Optimizer

from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings.sl import (
    ClassIncrementalSetting,
    ContinualSLSetting,
    PassiveEnvironment,
    SLSetting,
)
from sequoia.settings.sl.continual import Actions, ContinualSLTestEnvironment, Observations, Rewards
from sequoia.settings.sl.continual.setting import smart_class_prediction
from sequoia.utils import get_logger

from .experience import SequoiaExperience
from .patched_models import MTSimpleCNN, MTSimpleMLP

logger = get_logger(__name__)

StrategyType = TypeVar("StrategyType", bound=BaseStrategy)


# "Patch" for the WandbLogger of Avalanche


class WandBLogger(_WandBLogger):

    # def before_run(self):
    #     if self.wandb is None:
    #         self.import_wandb()
    #     if self.init_kwargs:
    #         self.wandb.init(**self.init_kwargs)
    #     else:
    #         self.wandb.init()

    def import_wandb(self):
        try:
            import wandb
        except ImportError:
            raise ImportError('Please run "pip install wandb" to install wandb')
        self.wandb = wandb

    def args_parse(self):
        self.init_kwargs = {"project": self.project_name, "name": self.run_name}
        if self.params:
            self.init_kwargs.update(self.params)

    def before_run(self):
        if self.wandb is None:
            self.import_wandb()
        if self.init_kwargs:
            if not self.wandb.run:
                self.wandb.init(**self.init_kwargs)
        else:
            if not self.wandb.run:
                self.wandb.init()


@dataclass
class AvalancheMethod(
    Method,
    HyperParameters,
    Generic[StrategyType],
    target_setting=ContinualSLSetting,
):
    """Base class for all the Methods adapted from Avalanche."""

    # Name for the 'family' of methods, use to differentiate methods with the same name.
    family: ClassVar[str] = "avalanche"

    # The Strategy class to use for this Method. Subclasses have to add this property.
    strategy_class: ClassVar[Type[StrategyType]] = BaseStrategy

    # TODO: Maybe use a 'PluginClass', so that we can avoid subclassing both the
    # plugin and the strategy when we need to patch something in the plugin.
    plugin_class: ClassVar[Optional[Type[StrategyPlugin]]]

    # Class Variable to hold the types of models available as options for the `model`
    # field below.
    available_models: ClassVar[Dict[str, Type[nn.Module]]] = {
        "simple_cnn": SimpleCNN,
        "simple_mlp": SimpleMLP,
        "mt_simple_cnn": MTSimpleCNN,
        "mt_simple_mlp": MTSimpleMLP,
    }
    # Class Variable to hold the types of optimizers available for the `optimizer` field
    # below.
    available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = {
        "sgd": SGD,
        "adam": optim.Adam,
        "rmsprop": optim.RMSprop,
    }
    # Class variable to hold the types of loss functions available for the `criterion`
    # field below.
    available_criterions: ClassVar[Dict[str, Type[nn.Module]]] = {
        "cross_entropy_loss": nn.CrossEntropyLoss,
    }

    # The model.
    model: Union[Module, Type[Module]] = choice(available_models, default=SimpleCNN)
    # The optimizer to use.
    optimizer: Union[Optimizer, Type[Optimizer]] = choice(available_optimizers, default=optim.Adam)
    # The loss criterion to use.
    criterion: Union[Module, Type[Module]] = choice(
        available_criterions, default=nn.CrossEntropyLoss
    )
    # The train minibatch size.
    train_mb_size: int = uniform(1, 2048, default=64)
    # The number of training epochs.
    train_epochs: int = uniform(1, 100, default=5)
    # The eval minibatch size.
    eval_mb_size: int = 1
    #  The device to use. Defaults to None (cpu).
    device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Plugins to be added. Defaults to None.
    plugins: Optional[List[StrategyPlugin]] = list_field(default=None, cmd=False, to_dict=False)
    # (optional) instance of EvaluationPlugin for logging and metric computations.
    evaluator: Optional[EvaluationPlugin] = field(None, cmd=False, to_dict=False)
    # The frequency of the calls to `eval` inside the training loop.
    # if -1: no evaluation during training.
    # if  0: calls `eval` after the final epoch of each training
    #     experience.
    # if >0: calls `eval` every `eval_every` epochs and at the end
    #     of all the epochs for a single experience.
    eval_every: int = -1
    # Learning rate of the optimizer.
    learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
    # L2 regularization term for the model weights.
    weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
    # Hidden size of the model, when applicable.
    hidden_size: int = uniform(128, 1024, default=512)
    # Number of workers of the dataloader. Defaults to 4.
    num_workers: int = 4

    def __post_init__(self):
        super().__post_init__()
        # Count the number of calls to `configure`. (useful when running sweeps, as we
        # reuse the Method instance.)
        self._n_configures: int = 0
        self.setting: ClassIncrementalSetting
        self.cl_strategy: StrategyType

    def configure(self, setting: ClassIncrementalSetting) -> None:
        self.setting = setting
        self.model = self.create_model(setting).to(self.device)

        # Select the loss function to use.
        if not isinstance(self.criterion, nn.Module):
            self.criterion = self.criterion()

        metrics = [
            accuracy_metrics(epoch=True, experience=True, stream=True),
            forgetting_metrics(experience=True, stream=True),
            loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
        ]
        loggers = [
            # BUG: evaluation.py:94, _update_metrics:
            # before_training() takes 2 positional arguments but 3 were given
            # default_logger,
            InteractiveLogger(),
        ]
        if setting.wandb and setting.wandb.project:
            wandb_logger = WandBLogger(
                project_name=setting.wandb.project,
                run_name=setting.wandb.run_name,
                params=setting.wandb.wandb_init_kwargs(),
            )
            loggers.append(wandb_logger)

        self.evaluator = EvaluationPlugin(
            *metrics,
            loggers=loggers,
        )

        self.optimizer = self.make_optimizer()
        # Actually initialize the strategy using the fields on `self`.
        self.cl_strategy: StrategyType = self.create_cl_strategy(setting)

        if setting.monitor_training_performance and (
            type(self).environment_to_experience is AvalancheMethod.environment_to_experience
        ):
            warnings.warn(
                UserWarning(
                    colorize(
                        "This Setting would like to monitor the online training "
                        "performance, which means that the rewards/labels (`y`) are "
                        "returned after sending an action (prediction) to the training "
                        "environment."
                        "\n"
                        "However, Avalanche does not currently support training on "
                        "'active' dataloaders or gym environments, and needs access to "
                        "the 'x' and 'y' at the same time, as is usually the case in "
                        "Supervised CL."
                        "\n"
                        "Therefore, the current solution I've found for this issue is "
                        "to iterate once over the training environment, sending it "
                        "(by default random) actions, in order to create an "
                        "'Experience' object expected by the Avalanche Strategies."
                        "\n"
                        "Concretely, this means that, unless you overwrite the "
                        "`environment_to_experience` method, **your online performance "
                        "score will be limited to chance accuracy!**",
                        "yellow",
                    )
                )
            )

    def create_cl_strategy(self, setting: ClassIncrementalSetting) -> StrategyType:
        strategy_constructor_params: List[str] = list(
            inspect.signature(self.strategy_class.__init__).parameters.keys()
        )
        cl_strategy_kwargs = {
            f.name: getattr(self, f.name)
            for f in fields(self)
            if f.name in strategy_constructor_params
        }
        return self.strategy_class(**cl_strategy_kwargs)

    def create_model(self, setting: ClassIncrementalSetting) -> Module:
        """Create the Model for the setting.

        Parameters
        ----------
        setting : ClassIncrementalSetting
            The Setting on which this Method will be applied.

        Returns
        -------
        Module
            The Model to be used, which will be passed to the Strategy constructor.
        """
        image_space: Image = setting.observation_space.x
        input_dims = flatdim(image_space)
        assert isinstance(
            setting.action_space, spaces.Discrete
        ), "assume a classification problem for now."
        num_classes = setting.action_space.n

        if setting.task_labels_at_train_time:
            if setting.task_labels_at_test_time:
                if self.model is SimpleCNN and MTSimpleCNN in self.available_models.values():
                    self.model = MTSimpleCNN
                    logger.info(
                        f"Upgrading the model to a {MTSimpleCNN}, since task-labels "
                        f"are available at train and test time."
                    )
                if self.model is SimpleMLP and MTSimpleMLP in self.available_models.values():
                    self.model = MTSimpleMLP
                    logger.info(
                        f"Upgrading the model to a {MTSimpleMLP}, since task-labels "
                        f"are available at train and test time."
                    )

        if isinstance(self.model, nn.Module):
            if self._n_configures > 0:
                logger.info("Resetting the model, since this isn't the first run.")
                self.model = type(self.model)
                self._n_configures += 1
            else:
                logger.info(f"Using model {self.model}.")
                return self.model

        if self.model is SimpleMLP:
            return self.model(
                input_size=input_dims,
                hidden_size=self.hidden_size,
                num_classes=num_classes,
            )
        if self.model is MTSimpleMLP:
            return self.model(input_size=input_dims, hidden_size=self.hidden_size)
        if self.model is SimpleCNN:
            return self.model(num_classes=num_classes)
        # self.model is most probably a type of nn.Module, so we instantiate it.
        # These other models (MTSimpleCNN) don't seem to take any kwargs.
        return self.model()

    def make_optimizer(self) -> Optimizer:
        """Creates the Optimizer."""
        optimizer_class = self.optimizer
        if isinstance(self.optimizer, Optimizer):
            optimizer_class = type(self.optimizer)
        return optimizer_class(
            self.model.parameters(),
            lr=self.learning_rate,
            weight_decay=self.weight_decay,
        )

    def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
        train_exp = self.environment_to_experience(train_env, setting=self.setting)
        valid_exp = self.environment_to_experience(valid_env, setting=self.setting)
        self.cl_strategy.train(train_exp, eval_streams=[valid_exp], num_workers=self.num_workers)

    def get_actions(
        self,
        observations: ClassIncrementalSetting.Observations,
        action_space: gym.Space,
    ) -> ClassIncrementalSetting.Actions:
        observations = observations.to(self.device)

        with torch.no_grad():
            x = observations.x
            task_labels = observations.task_labels
            logits = avalanche_forward(self.model, x=x, task_labels=task_labels)
            if task_labels is not None:
                # If task labels are available, figure out the possible classes for
                # each task, and 'mask out' those so they aren't predicted.
                y_pred = smart_class_prediction(
                    logits, task_labels, setting=self.setting, train=False
                )
            else:
                y_pred = logits.argmax(-1)
            return self.target_setting.Actions(y_pred=y_pred)

    def set_testing(self):
        self.model.current_task_id = None
        return super().set_testing()

    def on_task_switch(self, task_id: Optional[int]) -> None:
        if self.training:
            # No need to tell the cl_strategy, because we call `.train` which calls
            # `before_training_exp` with the current exp (the current task).
            self.model.current_task_id = task_id
        else:
            # TODO: In Sequoia, the test 'epoch' goes through the sequence of tasks, not
            # necessarily in the same order as during training, while in Avalanche the
            # 'eval' occurs on a per-task basis.
            # TODO: There is a bug with task-incremental setting, where during testing
            # the algo might be tested on tasks it hasn't built an output layer for yet,
            # but building this layer requires calling `adaptation(dataset)` and this
            # dataset will be iterated on, which isn't great in the case of the test
            # env...
            # encountered before.
            # During test-time, there might be a task boundary, and we need to let the
            # cl_strategy and the plugins know.
            # TODO: Get this working, figure out what the plugins expect to retrieve
            # from the cl_strategy in this callback.
            pass

    def get_search_space(self, setting: ClassIncrementalSetting):
        return self.get_orion_space()

    def adapt_to_new_hparams(self, new_hparams: Dict):
        for k, v in new_hparams.items():
            if isinstance(v, dict):
                raise NotImplementedError(f"todo: set hparam {k} to value {v}")
            setattr(self, k, v)

    def environment_to_experience(self, env: PassiveEnvironment, setting: SLSetting) -> Experience:
        """
        "Converts" the PassiveEnvironments (dataloaders) from Sequoia
        into an Experience object usable by the Avalanche Strategies. By default, this
        just iterates through the environment, giving back the actions from the
        `get_actions` method.

        NOTE: You could instead train an online model here, in order to get better
        online performance!
        """
        all_observations: List[Observations] = []
        all_rewards: List[Rewards] = []

        for batch in tqdm.tqdm(env, desc="Converting environment into TensorDataset"):
            observations: Observations
            rewards: Optional[Rewards]
            if isinstance(batch, Observations):
                observations = batch
                rewards = None
            else:
                assert isinstance(batch, tuple) and len(batch) == 2
                observations, rewards = batch

            if rewards is None:
                # Need to send actions to the env before we can actually get the
                # associated Reward. Here there are (at least) three options to choose
                # from:

                # Option 1: Select action at random:
                action = env.action_space.sample()
                if observations.batch_size != action.shape[0]:
                    action = action[: observations.batch_size]
                rewards: Rewards = env.send(action)

                # Option 2: Use the current model, in 'inference' mode:
                # action = self.get_actions(observations, action_space=env.action_space)
                # rewards: Rewards = env.send(action)

                # Option 3: Train an online model:
                # # NOTE: You might have to change this for your strategy. For instance,
                # # currently does not take any plugins into consideration.
                # self.cl_strategy.optimizer.zero_grad()

                # x = observations.x.to(self.cl_strategy.device)
                # task_labels = observations.task_labels
                # logits = avalanche_forward(self.model, x=x, task_labels=task_labels)
                # y_pred = logits.argmax(-1)
                # action = self.target_setting.Actions(y_pred=y_pred)

                # rewards: Rewards = env.send(action)

                # y = rewards.y.to(self.cl_strategy.device)
                # # Train the model:
                # loss = self.cl_strategy.criterion(logits, y)
                # loss.backward()
                # self.cl_strategy.optimizer.step()

            all_observations.append(observations)
            all_rewards.append(rewards)

        # Stack all the observations into a single `Observations` object:
        stacked_observations: Observations = Observations.concatenate(all_observations)
        stacked_rewards: Rewards = Rewards.concatenate(all_rewards)
        # BUG: Cuda errors, probably due to indexing into a tensor on different device
        # /numpy/etc.
        stacked_observations = stacked_observations.cpu()
        stacked_rewards = stacked_rewards.cpu()

        x = stacked_observations.x
        task_labels = stacked_observations.task_labels
        y = stacked_rewards.y
        return SequoiaExperience(env=env, setting=setting, x=x, y=y, task_labels=task_labels)


def test_epoch(strategy, test_env: ContinualSLTestEnvironment, **kwargs):
    strategy.is_training = False
    strategy.model.eval()
    strategy.model.to(strategy.device)

    # strategy.before_eval(**kwargs)

    # Data Adaptation
    # strategy.before_eval_dataset_adaptation(**kwargs)
    # strategy.eval_dataset_adaptation(**kwargs)
    # strategy.after_eval_dataset_adaptation(**kwargs)
    # strategy.make_eval_dataloader(**kwargs)

    # strategy.before_eval_exp(**kwargs)
    # strategy.eval_epoch(**kwargs)
    test_epoch_gym_env(strategy, test_env)
    # strategy.after_eval_exp(**kwargs)


def test_epoch_gym_env(strategy: BaseStrategy, test_env: ContinualSLTestEnvironment, **kwargs):
    strategy.mb_it = 0
    episode = 0
    strategy.experience = test_env
    total_steps = 0
    max_episodes = 1  # Only one 'episode' / 'epoch'.
    while not test_env.is_closed() and episode < max_episodes:
        observations: Observations = test_env.reset()
        done = False
        step = 0
        with tqdm.tqdm(desc="Eval epoch") as pbar:
            while not done:
                # strategy.before_eval_iteration(**kwargs)
                strategy.mb_x = observations.x
                strategy.mb_task_id = observations.task_labels

                strategy.mb_x = strategy.mb_x.to(strategy.device)
                # IDEA: Should probably return a random action whenever we have task
                # labels in the test loop the task id isn't a known one in the model:

                # strategy.before_eval_forward(**kwargs)

                strategy.logits = avalanche_forward(
                    model=strategy.model,
                    x=strategy.mb_x,
                    task_labels=strategy.mb_task_id,
                )

                y_pred = strategy.logits.argmax(-1)
                actions = Actions(y_pred=y_pred)

                observations, rewards, done, info = test_env.step(actions)
                step += 1
                pbar.update()
                total_steps += 1

                if not isinstance(done, bool):
                    assert False, done

                strategy.mb_y = rewards.y.to(strategy.device) if rewards is not None else None
                # strategy.after_eval_forward(**kwargs)
                strategy.mb_it += 1

                strategy.loss = strategy.criterion(strategy.logits, strategy.mb_y)

                # strategy.after_eval_iteration(**kwargs)

                pbar.set_postfix(
                    {
                        "Episode": f"{episode}/{max_episodes}",
                        "step": f"{step}",
                        "total_steps": f"{total_steps}",
                        "loss": f"{strategy.loss.item()}",
                    }
                )
        episode += 1


================================================
FILE: sequoia/methods/avalanche_methods/base_test.py
================================================
import inspect
from inspect import Signature, _empty, getsourcefile
from typing import ClassVar, List, Optional, Type

import pytest
import tqdm
from avalanche.models import SimpleCNN, SimpleMLP
from avalanche.models.utils import avalanche_forward
from avalanche.training.strategies import BaseStrategy

from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.method_test import MethodTests
from sequoia.settings.sl import ClassIncrementalSetting, SLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards

from .base import AvalancheMethod
from .experience import SequoiaExperience
from .patched_models import MTSimpleCNN, MTSimpleMLP


class _TestAvalancheMethod(MethodTests):
    Method: ClassVar[Type[AvalancheMethod]] = AvalancheMethod

    # Names of (hyper-)parameters which are allowed to have a different default value in
    # Sequoia compared to their implementations in Avalanche.
    ignored_parameter_differences: ClassVar[List[str]] = [
        "plugins",
        "device",
        "eval_mb_size",
        "criterion",
        "train_mb_size",
        "train_epochs",
        "evaluator",
    ]

    @classmethod
    @pytest.fixture(params=[SimpleCNN, SimpleMLP, MTSimpleCNN, MTSimpleMLP])
    def method(cls, config: Config, request) -> AvalancheMethod:
        """Fixture that returns the Method instance to use when testing/debugging."""
        model_type = request.param
        return cls.Method(model=model_type, train_mb_size=10, train_epochs=1)

    def test_hparams_have_same_defaults_as_in_avalanche(self):
        strategy_type: Type[BaseStrategy] = self.Method.strategy_class
        method = self.Method()
        strategy_constructor: Signature = inspect.signature(strategy_type.__init__)
        strategy_init_params = strategy_constructor.parameters

        # TODO: Use the plugin constructor as the reference, rather than the Strategy
        # constructor.
        # plugin_constructor

        for parameter_name, parameter in strategy_init_params.items():
            if parameter.default is _empty:
                continue
            assert hasattr(method, parameter_name)
            method_value = getattr(method, parameter_name)
            # Ignore mismatches in some parameters, like `device`.
            if parameter_name in self.ignored_parameter_differences:
                continue

            assert method_value == parameter.default, (
                f"{self.Method.__name__} in Sequoia has different default value for "
                f"hyper-parameter '{parameter_name}' than in Avalanche: \n"
                f"\t{method_value} != {parameter.default}\n"
                f"Path to sequoia implementation: {getsourcefile(self.Method)}\n"
                f"Path to SB3 implementation: {getsourcefile(strategy_type)}\n"
            )

    def validate_results(
        self,
        setting: SLSetting,
        method: AvalancheMethod,
        results: SLSetting.Results,
    ) -> None:
        assert results
        assert results.objective
        # TODO: Set some 'reasonable' bounds on the performance here, depending on the
        # setting/dataset.# def validate_results

    @slow
    @pytest.mark.timeout(60)
    def test_short_sl_track(
        self,
        method: AvalancheMethod,
        short_sl_track_setting: ClassIncrementalSetting,
        config: Config,
    ):
        # Use the same batch size as the setting, since it's shorter than usual.
        method.train_mb_size = short_sl_track_setting.batch_size
        results = short_sl_track_setting.apply(method, config=config)
        # TODO: Set up a more reasonable bound on the expected performance. For now this
        # is fine as we're just debugging: the test passes as long as there is a results
        # object that contains a non-zero online performance (meaning that the setting
        # was monitoring training performance correctly).
        assert 0 < results.average_online_performance.objective
        assert 0 < results.average_final_performance.objective


def test_warning_if_environment_to_experience_isnt_overwritten(short_sl_track_setting):
    """When"""
    method = AvalancheMethod()
    assert short_sl_track_setting.monitor_training_performance
    with pytest.warns(UserWarning, match="chance accuracy"):
        method.configure(short_sl_track_setting)


class MyDummyMethod(AvalancheMethod):
    def environment_to_experience(self, env, setting):
        all_observations: List[Observations] = []
        all_rewards: List[Rewards] = []

        for batch in tqdm.tqdm(env, desc="Converting environment into TensorDataset"):
            observations: Observations
            rewards: Optional[Rewards]
            if isinstance(batch, Observations):
                observations = batch
                rewards = None
            else:
                assert isinstance(batch, tuple) and len(batch) == 2
                observations, rewards = batch

            if rewards is None:
                # Need to send actions to the env before we can actually get the
                # associated Reward. Here there are (at least) three options to choose
                # from:

                # Option 1: Select action at random:
                # action = env.action_space.sample()
                # if observations.batch_size != action.shape[0]:
                #     action = action[: observations.batch_size]
                # rewards: Rewards = env.send(action)

                # Option 2: Use the current model, in 'inference' mode:
                # action = self.get_actions(observations, action_space=env.action_space)
                # rewards: Rewards = env.send(action)

                # Option 3: Train an online model:
                # NOTE: You might have to change this for your strategy. For instance,
                # currently does not take any plugins into consideration.
                self.cl_strategy.optimizer.zero_grad()

                x = observations.x.to(self.cl_strategy.device)
                task_labels = observations.task_labels
                logits = avalanche_forward(self.model, x=x, task_labels=task_labels)
                y_pred = logits.argmax(-1)
                action = self.target_setting.Actions(y_pred=y_pred)

                rewards: Rewards = env.send(action)

                y = rewards.y.to(self.cl_strategy.device)
                # Train the model:
                loss = self.cl_strategy.criterion(logits, y)
                loss.backward()
                self.cl_strategy.optimizer.step()

            all_observations.append(observations)
            all_rewards.append(rewards)

        # Stack all the observations into a single `Observations` object:
        stacked_observations: Observations = Observations.concatenate(all_observations)
        x = stacked_observations.x
        task_labels = stacked_observations.task_labels
        stacked_rewards: Rewards = Rewards.concatenate(all_rewards)
        y = stacked_rewards.y
        return SequoiaExperience(env=env, setting=setting, x=x, y=y, task_labels=task_labels)


def test_no_warning_if_environment_to_experience_is_overwritten(short_sl_track_setting):
    """When the Method doesn't overwrite the `environment_to_experience` method, we
    raise a Warning to let the User know that they can only expect chance online
    accuracy.
    """
    method = MyDummyMethod()
    assert short_sl_track_setting.monitor_training_performance
    with pytest.warns(None) as record:
        method.configure(short_sl_track_setting)
    assert len(record) == 0


================================================
FILE: sequoia/methods/avalanche_methods/conftest.py
================================================
from pathlib import Path

import pytest
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset

from sequoia.common.config import Config

collect_ignore = []
collect_ignore_glob = []
try:
    from avalanche.training.strategies import BaseStrategy  # type: ignore
except ImportError:
    # pytest.skip(reason="Needs avalanche", allow_module_level=True)
    collect_ignore_glob.append("sequoia/methods/avalanche/**.py")


# FIXME: Overwriting the 'config' fixture from before so it's 'session' scoped instead.
@pytest.fixture(scope="session")
def config(tmp_path_factory):
    test_log_dir = tmp_path_factory.mktemp("test_log_dir")
    return Config(debug=True, seed=123, log_dir=test_log_dir)


@pytest.fixture(scope="session")
def fast_scenario(use_task_labels=False, shuffle=True):
    """Copied directly from Avalanche in "tests/unit_tests_utils.py".

    Not used anywhere atm, but could be used as inspiration for writing quicker tests
    in Sequoia.
    """
    n_samples_per_class = 100
    dataset = make_classification(
        n_samples=10 * n_samples_per_class,
        n_classes=10,
        n_features=6,
        n_informative=6,
        n_redundant=0,
    )

    X = torch.from_numpy(dataset[0]).float()
    y = torch.from_numpy(dataset[1]).long()

    train_X, test_X, train_y, test_y = train_test_split(
        X, y, train_size=0.6, shuffle=True, stratify=y
    )
    from avalanche.benchmarks import nc_benchmark  # type: ignore

    train_dataset = TensorDataset(train_X, train_y)
    test_dataset = TensorDataset(test_X, test_y)
    my_nc_benchmark = nc_benchmark(
        train_dataset, test_dataset, 5, task_labels=use_task_labels, shuffle=shuffle
    )
    return my_nc_benchmark


================================================
FILE: sequoia/methods/avalanche_methods/cwr_star.py
================================================
""" Method based on CWRStar from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.cwr_star.CWRStarPlugin` or
`avalanche.training.strategies.strategy_wrappers.CWRStar` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Optional, Type

from avalanche.training.strategies import BaseStrategy, CWRStar

from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting

from .base import AvalancheMethod


@register_method
@dataclass
class CWRStarMethod(AvalancheMethod[CWRStar]):
    """CWRStar strategy from Avalanche.
    See CWRStar plugin for details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    # Name of the CWR layer. Defaults to None, which means that the last fully connected
    # layer will be used.
    cwr_layer_name: Optional[str] = None

    strategy_class: ClassVar[Type[BaseStrategy]] = CWRStar


if __name__ == "__main__":
    from simple_parsing import ArgumentParser

    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(CWRStarMethod, "method")
    args = parser.parse_args()
    method: CWRStarMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/cwr_star_test.py
================================================
""" WIP: Tests for the CWRStar Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .cwr_star import CWRStarMethod


class TestCWRStarMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = CWRStarMethod


================================================
FILE: sequoia/methods/avalanche_methods/ewc.py
================================================
""" Method based on EWC from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.ewc.EWCPlugin` or
`avalanche.training.strategies.strategy_wrappers.EWC` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Optional, Type, Union

from avalanche.models import SimpleCNN, SimpleMLP
from avalanche.training.strategies import EWC, BaseStrategy
from simple_parsing import ArgumentParser
from simple_parsing.helpers import choice
from simple_parsing.helpers.hparams import categorical, uniform
from torch import nn

from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting

from .base import AvalancheMethod


@register_method
@dataclass
class EWCMethod(AvalancheMethod[EWC]):
    """
    Elastic Weight Consolidation (EWC) strategy from Avalanche.
    See EWC plugin for details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    strategy_class: ClassVar[Type[BaseStrategy]] = EWC

    # Class Variable to hold the types of models available as options for the `model`
    # field below.
    available_models: ClassVar[Dict[str, Type[nn.Module]]] = {
        "simple_cnn": SimpleCNN,
        "simple_mlp": SimpleMLP,
        # "mt_simple_cnn": MTSimpleCNN,  # These two still have some bugs in their loss
        # "mt_simple_mlp": MTSimpleMLP,  # These two still have some bugs in their loss
    }

    # The model.
    model: Union[nn.Module, Type[nn.Module]] = choice(available_models, default=SimpleCNN)

    # Hyperparameter to weigh the penalty inside the total loss. The larger the lambda,
    # the larger the regularization.
    ewc_lambda: float = uniform(1e-3, 1.0, default=0.1)  # todo: set the right value to use here.
    # `separate` to keep a separate penalty for each previous experience. `online` to
    # keep a single penalty summed with a decay factor over all previous tasks.
    mode: str = categorical("separate", "online", default="separate")
    # Used only if `mode` is 'online'. It specify the decay term of the
    # importance matrix.
    decay_factor: Optional[float] = uniform(0.0, 1.0, default=0.9)
    # if True, keep in memory both parameter values and importances for all previous
    # task, for all modes. If False, keep only last parameter values and importances. If
    # mode is `separate`, the value of `keep_importance_data` is set to be True.
    keep_importance_data: bool = categorical(True, False, default=False)


if __name__ == "__main__":

    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(EWCMethod, "method")
    args = parser.parse_args()
    method: EWCMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/ewc_test.py
================================================
""" WIP: Tests for the EWC Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, List, Type

import pytest
from avalanche.models import SimpleCNN, SimpleMLP
from torch.nn import Module

from sequoia.common import Config
from sequoia.conftest import xfail_param
from sequoia.settings.sl import IncrementalSLSetting, TaskIncrementalSLSetting

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .ewc import EWCMethod
from .patched_models import MTSimpleCNN, MTSimpleMLP


class TestEWCMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = EWCMethod
    ignored_parameter_differences: ClassVar[
        List[str]
    ] = _TestAvalancheMethod.ignored_parameter_differences + [
        "decay_factor",
    ]

    @classmethod
    @pytest.fixture(
        params=[
            SimpleCNN,
            SimpleMLP,
            xfail_param(
                MTSimpleCNN,
                reason=(
                    "Shape Mismatch between the saved parameter importance and the "
                    "current weight tensor in EWC plugin."
                ),
            ),
            xfail_param(
                MTSimpleMLP,
                reason=(
                    "Shape Mismatch between the saved parameter importance and the "
                    "current weight tensor in EWC plugin."
                ),
            ),
        ]
    )
    def method(cls, config: Config, request) -> AvalancheMethod:
        """Fixture that returns the Method instance to use when testing/debugging."""
        model_type = request.param
        return cls.Method(model=model_type, train_mb_size=10, train_epochs=1)

    @pytest.mark.timeout(60)
    @pytest.mark.parametrize(
        "model_type",
        [
            SimpleCNN,
            SimpleMLP,
            # MTSimpleCNN,
            xfail_param(
                MTSimpleCNN,
                reason=(
                    "Shape Mismatch between the saved parameter importance and the "
                    "current weight tensor in EWC plugin."
                ),
            ),
            # MTSimpleMLP,
            xfail_param(
                MTSimpleMLP,
                reason=(
                    "Shape Mismatch between the saved parameter importance and the "
                    "current weight tensor in EWC plugin."
                ),
            ),
        ],
    )
    def test_short_task_incremental_setting(
        self,
        model_type: Type[Module],
        short_task_incremental_setting: TaskIncrementalSLSetting,
        config: Config,
    ):
        method = self.Method(model=model_type, train_mb_size=10, train_epochs=1)
        results = short_task_incremental_setting.apply(method, config)
        assert 0.05 < results.average_final_performance.objective

    @pytest.mark.timeout(60)
    @pytest.mark.parametrize(
        "model_type",
        [
            SimpleCNN,
            SimpleMLP,
            xfail_param(
                MTSimpleCNN,
                reason=(
                    "Shape Mismatch between the saved parameter importance and the "
                    "current weight tensor in EWC plugin."
                ),
            ),
            # MTSimpleMLP,
            xfail_param(
                MTSimpleMLP,
                reason=(
                    "Shape Mismatch between the saved parameter importance and the "
                    "current weight tensor in EWC plugin."
                ),
            ),
        ],
    )
    def test_short_class_incremental_setting(
        self,
        model_type: Type[Module],
        short_class_incremental_setting: IncrementalSLSetting,
        config: Config,
    ):
        method = self.Method(model=model_type, train_mb_size=10, train_epochs=1)
        results = short_class_incremental_setting.apply(method, config)
        assert 0.05 < results.average_final_performance.objective

    # @pytest.mark.timeout(60)
    # @pytest.mark.parametrize(
    #     "model_type",
    #     [
    #         SimpleCNN,
    #         SimpleMLP,
    #         xfail_param(
    #             MTSimpleCNN,
    #             reason=(
    #                 "Shape Mismatch between the saved parameter importance and the "
    #                 "current weight tensor in EWC plugin."
    #             ),
    #         ),
    #         # MTSimpleMLP,
    #         xfail_param(
    #             MTSimpleMLP,
    #             reason=(
    #                 "Shape Mismatch between the saved parameter importance and the "
    #                 "current weight tensor in EWC plugin."
    #             ),
    #         ),
    #     ],
    # )
    # def test_short_continual_sl_setting(
    #     self,
    #     model_type: Type[Module],
    #     short_continual_sl_setting: ContinualSLSetting,
    #     config: Config,
    # ):
    #     super().test_short_continual_sl_setting(
    #         model_type=model_type,
    #         short_continual_sl_setting=short_continual_sl_setting,
    #         config=config,
    #     )

    # @pytest.mark.timeout(60)
    # @pytest.mark.parametrize(
    #     "model_type",
    #     [
    #         SimpleCNN,
    #         SimpleMLP,
    #         xfail_param(
    #             MTSimpleCNN,
    #             reason=(
    #                 "Shape Mismatch between the saved parameter importance and the "
    #                 "current weight tensor in EWC plugin."
    #             ),
    #         ),
    #         # MTSimpleMLP,
    #         xfail_param(
    #             MTSimpleMLP,
    #             reason=(
    #                 "Shape Mismatch between the saved parameter importance and the "
    #                 "current weight tensor in EWC plugin."
    #             ),
    #         ),
    #     ],
    # )
    # def test_short_discrete_task_agnostic_sl_setting(
    #     self,
    #     model_type: Type[Module],
    #     short_discrete_task_agnostic_sl_setting: DiscreteTaskAgnosticSLSetting,
    #     config: Config,
    # ):
    #     super().test_short_discrete_task_agnostic_sl_setting(
    #         model_type=model_type,
    #         short_discrete_task_agnostic_sl_setting=short_discrete_task_agnostic_sl_setting,
    #         config=config,
    #     )


================================================
FILE: sequoia/methods/avalanche_methods/experience.py
================================================
""" 'Wrapper' around a PassiveEnvironment from Sequoia, disguising it as an 'Experience'
from Avalanche.
"""
from typing import List, Optional

import tqdm
from avalanche.benchmarks.scenarios import Experience
from avalanche.benchmarks.utils.avalanche_dataset import AvalancheDataset, AvalancheDatasetType
from torch import Tensor
from torch.utils.data import TensorDataset

from sequoia.common.gym_wrappers.utils import IterableWrapper
from sequoia.settings.sl import IncrementalSLSetting, PassiveEnvironment, SLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards


class SequoiaExperience(IterableWrapper, Experience):
    def __init__(
        self,
        env: PassiveEnvironment,
        setting: IncrementalSLSetting,
        x: Tensor = None,
        y: Tensor = None,
        task_labels: Tensor = None,
    ):
        super().__init__(env=env)
        self.setting = setting
        self.type: str
        if isinstance(setting, IncrementalSLSetting):
            self.task_id = setting.current_task_id
        else:
            # No known task, or we don't have access to the task ID, so just consider
            # this to come from the first task.
            self.task_id = 0

        if env is setting.train_env:
            self.type = "Train"
            self.transforms = setting.train_transforms
        elif env is setting.val_env:
            self.type = "Valid"
            self.transforms = setting.val_transforms
        else:
            self.type = "Test"
            assert env is setting.test_env
            self.transforms = setting.test_transforms
        self.name = f"{self.type}_{self.task_id}"

        if x is None and y is None and task_labels is None:
            # Collect the x, y, and perhaps t if they aren't provided.
            all_observations: List[Observations] = []
            all_rewards: List[Rewards] = []

            for batch in tqdm.tqdm(self, desc="Converting environment into TensorDataset"):
                observations: Observations
                rewards: Optional[Rewards]
                if isinstance(batch, Observations):
                    observations = batch
                    rewards = None
                else:
                    assert isinstance(batch, tuple) and len(batch) == 2
                    observations, rewards = batch

                if rewards is None:
                    # Need to send actions to the env before we can actually get the
                    # associated Reward.
                    # Here we sample a random action (no other choice really..) and so we
                    # are going to get bad results in case the online performance is being
                    # evaluated.
                    action = self.env.action_space.sample()
                    if observations.batch_size != action.shape[0]:
                        action = action[: observations.batch_size]

                    rewards = self.env.send(action)

                all_observations.append(observations)
                all_rewards.append(rewards)
            # TODO: This will be absolutely unfeasable for larger dataset like ImageNet.
            stacked_observations: Observations = Observations.concatenate(all_observations)
            x = stacked_observations.x
            task_labels = stacked_observations.task_labels
            assert all(
                y_i is not None for y in all_rewards for y_i in y
            ), "Need fully labeled train dataset for now."
            stacked_rewards: Rewards = Rewards.concatenate(all_rewards)
            y = stacked_rewards.y

        if task_labels is not None and all(t is None for t in task_labels):
            # The task labels are None, even at training time, which indicates this
            # is probably a `ContinualSLSetting`
            task_labels = None
        elif isinstance(task_labels, Tensor):
            task_labels = task_labels.cpu().numpy().tolist()

        dataset = TensorDataset(x, y)
        self._tensor_dataset = dataset
        self._dataset = AvalancheDataset(
            dataset=dataset,
            task_labels=task_labels,
            targets=y.tolist(),
            dataset_type=AvalancheDatasetType.CLASSIFICATION,
        )
        # self.task_pattern_indices = {}
        # self.task_set = ...

        # class DummyDataset(AvalancheDataset):
        #     pass
        #     def train(self):
        #         return self

        # self._dataset = self
        # self.tasks_pattern_indices = {} #dict({0: np.arange(len(self._dataset))})
        # self.task_set = ... #_TaskSubsetDict(self._dataset)
        # self._dataset = env
        # from avalanche.benchmarks import GenericScenarioStream
        # class FakeStream(GenericScenarioStream):
        #     pass
        # self.origin_stream = FakeStream("train", scenario="whatever")
        # self.origin_stream.name = "train"

    @property
    def dataset(self) -> AvalancheDataset:
        return self._dataset

    @dataset.setter
    def dataset(self, value: AvalancheDataset) -> None:
        self._dataset = value

    @property
    def task_label(self):
        """
        The task label. This value will never have value "None". However,
        for scenarios that don't produce task labels a placeholder value like 0
        is usually set. Beware that this field is meant as a shortcut to obtain
        a unique task label: it assumes that only patterns labeled with a
        single task label are present. If this experience contains patterns from
        multiple tasks, accessing this property will result in an exception.
        """
        if not self.setting.task_labels_at_test_time:
            return 0
        if self.type == "Test" and self.setting.task_labels_at_test_time:
            raise RuntimeError("More than one tasks present, can't use this property.")
        return self.task_id

    @property
    def task_labels(self):
        return self._tensor_dataset.tensors[-1]

    @property
    def current_experience(self):
        # Return the index of the
        return self.task_id

    @property
    def origin_stream(self) -> SLSetting:
        # NOTE: This
        class DummyStream(list):
            name = self.name

        # raise NotImplementedError
        return DummyStream()

    # def train(self):
    #     return self


================================================
FILE: sequoia/methods/avalanche_methods/gdumb.py
================================================
""" Method based on GDumb from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.gdumb.GDumbPlugin` or
`avalanche.training.strategies.strategy_wrappers.GDumb` for more info.

BUG: There appears to be a bug in the GDumb plugin, caused by a mismatch in the tensor
shapes when concatenating them into a TensorDataset, when batch size > 1.
"""
from collections import defaultdict
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type

import torch
import tqdm
from avalanche.benchmarks.utils import AvalancheConcatDataset
from avalanche.training.plugins.gdumb import GDumbPlugin as _GDumbPlugin
from avalanche.training.strategies import BaseStrategy, GDumb
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform
from torch import Tensor
from torch.utils.data import TensorDataset

from sequoia.methods import register_method
from sequoia.settings.sl import ClassIncrementalSetting, TaskIncrementalSLSetting
from sequoia.utils.logging_utils import get_logger

from .base import AvalancheMethod

logger = get_logger(__name__)


class GDumbPlugin(_GDumbPlugin):
    """Patched version of the GDumbPlugin from Avalanche.

    The base implementation is quite inefficient: for each new item, it does an entire
    concatenation with the current dataset.
    This uses lists instead, and only concatenates once.

    It also uses the task labels from each sample in the dataset, rather than from the
    current experience, as there might be more than one task in the dataset.
    """

    def __init__(self, mem_size: int = 200):
        super().__init__(mem_size=mem_size)
        self.ext_mem: Dict[Any, Tuple[List[Tensor], List[Tensor]]] = {}
        # count occurrences for each class
        self.counter: Dict[Any, Dict[Any, int]] = {}

    def after_train_dataset_adaptation(self, strategy: BaseStrategy, **kwargs):
        """Before training we make sure to organize the memory following
        GDumb approach and updating the dataset accordingly.
        """

        # for each pattern, add it to the memory or not
        dataset = strategy.experience.dataset

        pbar = tqdm.tqdm(dataset, desc="Exhausting dataset to create GDumb buffer")
        for pattern, target, task_id in pbar:
            target = torch.as_tensor(target)
            target_value = target.item()

            if len(pattern.size()) == 1:
                pattern = pattern.unsqueeze(0)

            current_counter = self.counter.setdefault(task_id, defaultdict(int))
            current_mem = self.ext_mem.setdefault(task_id, ([], []))

            if current_counter == {}:
                # any positive (>0) number is ok
                patterns_per_class = 1
            else:
                patterns_per_class = int(self.mem_size / len(current_counter.keys()))

            if (
                target_value not in current_counter
                or current_counter[target_value] < patterns_per_class
            ):
                # add new pattern into memory
                if sum(current_counter.values()) >= self.mem_size:
                    # full memory: replace item from most represented class
                    # with current pattern
                    to_remove = max(current_counter, key=current_counter.get)

                    # dataset_size = len(current_mem)
                    # for j in range(dataset_size):
                    #     if current_mem.tensors[1][j].item() == to_remove:
                    #         current_mem.tensors[0][j] = pattern
                    #         current_mem.tensors[1][j] = target
                    #         break

                    dataset_size = len(current_mem[0])
                    for j in range(dataset_size):
                        if current_mem[1][j].item() == to_remove:
                            current_mem[0][j] = pattern
                            current_mem[1][j] = target
                            break
                    current_counter[to_remove] -= 1
                else:
                    # memory not full: add new pattern
                    current_mem[0].append(pattern)
                    current_mem[1].append(target)

                # Indicate that we've changed the number of stored instances of this
                # class.
                current_counter[target_value] += 1

        task_datasets: Dict[Any, TensorDataset] = {}
        for task_id, task_mem_tuple in self.ext_mem.items():
            patterns, targets = task_mem_tuple
            task_dataset = TensorDataset(torch.stack(patterns, dim=0), torch.stack(targets, dim=0))
            task_datasets[task_id] = task_dataset
            logger.debug(
                f"There are {len(task_dataset)} entries from task {task_id} in the new " f"dataset."
            )

        adapted_dataset = AvalancheConcatDataset(task_datasets.values())
        strategy.adapted_dataset = adapted_dataset


@register_method
@dataclass
class GDumbMethod(AvalancheMethod[GDumb]):
    """GDumb strategy from Avalanche.
    See GDumbPlugin for more details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    name: ClassVar[str] = "gdumb"

    # replay buffer size.
    mem_size: int = uniform(100, 1_000, default=200)

    # The number of training epochs.
    train_epochs: int = uniform(1, 100, default=20)

    strategy_class: ClassVar[Type[BaseStrategy]] = GDumb

    def create_cl_strategy(self, setting: ClassIncrementalSetting) -> GDumb:
        strategy = super().create_cl_strategy(setting)
        # TODO: Replace the GDumbPlugin with our own version, with the same parameters.
        old_gdumb_plugin_index: Optional[int] = None
        for i, plugin in enumerate(strategy.plugins):
            if isinstance(plugin, _GDumbPlugin):
                old_gdumb_plugin_index = i
                break

        if old_gdumb_plugin_index is None:
            raise RuntimeError("Couldn't find the Strategy's GDumb plugin!")

        old_gdumb_plugin: _GDumbPlugin = strategy.plugins.pop(old_gdumb_plugin_index)
        logger.info("Replacing the GDumbPlugin with our 'patched' version.")

        new_gdumb_plugin = GDumbPlugin(mem_size=old_gdumb_plugin.mem_size)
        # NOTE: Might not be necessarily, since those should be empty, but here we also
        # copy the state from the old plugin to the new one.
        new_gdumb_plugin.ext_mem = old_gdumb_plugin.ext_mem
        new_gdumb_plugin.counter = old_gdumb_plugin.counter

        strategy.plugins.insert(old_gdumb_plugin_index, new_gdumb_plugin)
        return strategy


if __name__ == "__main__":
    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(GDumbMethod, "method")
    args = parser.parse_args()
    method: GDumbMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/gdumb_test.py
================================================
""" WIP: Tests for the GDumb Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .gdumb import GDumbMethod


class TestGDumbMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = GDumbMethod


================================================
FILE: sequoia/methods/avalanche_methods/gem.py
================================================
""" Method based on GEM from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.gem.GEMPlugin` or
`avalanche.training.strategies.strategy_wrappers.GEM` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Type

from avalanche.training.strategies import GEM, BaseStrategy
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform

from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting

from .base import AvalancheMethod


@register_method
@dataclass
class GEMMethod(AvalancheMethod[GEM]):
    """Gradient Episodic Memory (GEM) strategy from Avalanche.
    See GEM plugin for details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    # number of patterns per experience in the memory
    patterns_per_exp: int = uniform(10, 1000, default=100)
    # Offset to add to the projection direction in order to favour backward transfer
    # (gamma in original paper).
    memory_strength: float = uniform(1e-2, 1.0, default=0.5)

    strategy_class: ClassVar[Type[BaseStrategy]] = GEM


if __name__ == "__main__":
    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(GEMMethod, "method")
    args = parser.parse_args()
    method: GEMMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/gem_test.py
================================================
""" WIP: Tests for the GEM Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .gem import GEMMethod


class TestGEMMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = GEMMethod


================================================
FILE: sequoia/methods/avalanche_methods/lwf.py
================================================
""" Method based on LwF from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.lwf.LwFPlugin` or
`avalanche.training.strategies.strategy_wrappers.LwF` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Optional, Sequence, Type, Union

from avalanche.training.plugins.lwf import LwFPlugin as LwFPlugin_
from avalanche.training.strategies import LwF
from simple_parsing.helpers.hparams import uniform
from torch import Tensor

from sequoia.methods import register_method
from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting

from .base import AvalancheMethod


class LwFPlugin(LwFPlugin_):
    """Patching a little error that happens in the 'LwFPlugin' which happens when a
    Multi-Task model is used, and when we grow the output space after each task.
    """

    def _distillation_loss(self, out: Tensor, prev_out: Tensor) -> Tensor:
        """
        Compute distillation loss between output of the current model and
        and output of the previous (saved) model.
        """
        # Little "patch" to make sure this doesn't break if the shapes aren't exactly
        # the same:
        if out.shape != prev_out.shape:
            prev_outputs = prev_out.shape[-1]
            current_outputs = out.shape[-1]
            assert prev_outputs < current_outputs
            # Only consider the loss for the overlapping classes. We assume that the
            # first columns are for the same class, so this should be fine.
            out = out[..., :prev_outputs]

        return super()._distillation_loss(out=out, prev_out=prev_out)


@register_method
@dataclass
class LwFMethod(AvalancheMethod[LwF]):
    """Learning without Forgetting strategy from Avalanche.
    See LwF plugin for details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    # changing the 'name' in this case here, because the default name would be
    # 'lw_f'.
    name: ClassVar[str] = "lwf"
    # distillation hyperparameter. It can be either a float number or a list containing
    # alpha for each experience.
    alpha: Union[float, Sequence[float]] = uniform(
        1e-2, 1, default=1
    )  # TODO: Check if the range makes sense.
    # softmax temperature for distillation
    temperature: float = uniform(1, 10, default=2)  # TODO: Check if the range makes sense.

    strategy_class: ClassVar[Type[LwF]] = LwF

    def create_cl_strategy(self, setting: SLSetting) -> LwF:
        strategy = super().create_cl_strategy(setting)

        # Find and replace the 'LwFPlugin' with our "patched" version:
        plugin_index: Optional[int] = None
        for i, plugin in enumerate(strategy.plugins):
            if type(plugin) is LwFPlugin_:
                plugin_index = i
                break
        assert plugin_index is not None, "LwF strategy should have an LwF Plugin, no?"
        assert isinstance(plugin_index, int)

        old_plugin: LwFPlugin_ = strategy.plugins[plugin_index]
        new_plugin = LwFPlugin(alpha=old_plugin.alpha, temperature=old_plugin.temperature)
        new_plugin.prev_model = old_plugin.prev_model
        strategy.plugins[plugin_index] = new_plugin

        return strategy


if __name__ == "__main__":
    from simple_parsing import ArgumentParser

    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(LwFMethod, "method")
    args = parser.parse_args()
    method: LwFMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/lwf_test.py
================================================
""" WIP: Tests for the LwF Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .lwf import LwFMethod


class TestLwFMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = LwFMethod


================================================
FILE: sequoia/methods/avalanche_methods/naive.py
================================================
""" 'Naive' method from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.strategies.Naive` for more info.
"""
from typing import ClassVar, Type

from avalanche.training.strategies import BaseStrategy, Naive

from sequoia.settings.sl import TaskIncrementalSLSetting

from .base import AvalancheMethod


class NaiveMethod(AvalancheMethod[Naive]):
    """'Naive' Strategy from [Avalanche](https://github.com/ContinualAI/avalanche).

    The simplest (and least effective) Continual Learning strategy. Naive just
    incrementally fine tunes a single model without employing any method
    to contrast the catastrophic forgetting of previous knowledge.
    This strategy does not use task identities.

    Naive is easy to set up and its results are commonly used to show the worst
    performing baseline.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    strategy_class: ClassVar[Type[BaseStrategy]] = Naive


if __name__ == "__main__":
    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    method = NaiveMethod()
    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/naive_test.py
================================================
""" WIP: Tests for the Naive Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .naive import NaiveMethod


class TestNaiveMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = NaiveMethod


================================================
FILE: sequoia/methods/avalanche_methods/patched_models.py
================================================
""" Patch for the multi-task models in Avalanche, so that we can evaluate on future
tasks, by selecting random prediction.
"""
import warnings
from abc import abstractmethod
from typing import Any, List, Optional

import torch
from avalanche.models import MTSimpleCNN as _MTSimpleCNN
from avalanche.models import MTSimpleMLP as _MTSimpleMLP
from avalanche.models import MultiHeadClassifier as _MultiHeadClassifier
from avalanche.models.dynamic_modules import MultiTaskModule
from torch import Tensor
from torch.nn import functional as F

from sequoia.utils import get_logger

logger = get_logger(__name__)


class PatchedMultiTaskModule(MultiTaskModule):
    @property
    @abstractmethod
    def known_task_ids(self) -> List[Any]:
        pass

    def task_inference_forward_pass(self, x: Tensor) -> Tensor:
        """Forward pass with a simple form of task inference."""
        # We don't have access to task labels (`task_labels` is None).
        # --> Perform a simple kind of task inference:
        # 1. Perform a forward pass with each task's output head;
        # 2. Merge these predictions into a single prediction somehow.

        # NOTE: This assumes that the observations are batched.
        # These are used below to indicate the shape of the different tensors.
        B = x.shape[0]
        T = len(self.known_task_ids)
        # N = self.action_space.n
        # Tasks encountered previously and for which we have an output head.
        # TODO: This assumes that the keys of the ModuleDict are integers.
        known_task_ids: List[int] = list(int(t) for t in self.known_task_ids)
        assert known_task_ids
        # Placeholder for the predictions from each output head for each item in the
        # batch
        task_outputs = [None for _ in known_task_ids]  # [T, B, N]

        # Get the forward pass for each task.
        for task_id in known_task_ids:
            # Create 'fake' Observations for this forward pass, with 'fake' task labels.
            # NOTE: We do this so we can call `self.forward` and not get an infinite
            # recursion.
            task_labels = torch.full([B], task_id, device=x.device, dtype=int)
            # task_observations = replace(observations, task_labels=task_labels)

            # Setup the model for task `task_id`, and then do a forward pass.
            task_forward_pass = self.forward(x, task_labels=task_labels)

            task_outputs[task_id] = task_forward_pass
        if len(task_outputs) == 1:
            return task_outputs[0]

        N = max(task_output.shape[-1] for task_output in task_outputs)

        # 'Merge' the predictions from each output head using some kind of task
        # inference.
        assert all(item is not None for item in task_outputs)
        # Stack the predictions (logits) from each output head.
        # NOTE: Here in Avalanche it's possible that each output head's output had a
        # different shape. Therefore we need to handle it like a list of tensors rather
        # than a stacked tensor.
        if all(not task_output.shape[-1] == N for task_output in task_outputs):
            raise NotImplementedError("TODO: Output heads didn't give outputs of the same shape!")
            # logits_from_each_head = task_outputs
            # probs_from_each_head = [
            #     torch.softmax(head_logits, dim=-1) for head_logits in logits_from_each_head
            # ]
            # IDEA: Add zeros to the outputs of a different shape.
        else:
            logits_from_each_head = torch.stack(task_outputs, dim=1)
            # Normalize the logits from each output head with softmax.
            # Example with batch size of 1, output heads = 2, and classes = 4:
            # logits from each head:  [[[123, 456, 123, 123], [1, 1, 2, 1]]]
            # 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]]
            probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1)

        assert probs_from_each_head.shape == (B, T, N)
        # Simple kind of task inference:
        # For each item in the batch, use the class that has the highest probability
        # accross all output heads.
        max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1)
        assert max_probs_across_heads.shape == (B, N)
        assert chosen_head_per_class.shape == (B, N)
        # Example (continued):
        # max probs across heads:        [[0.2, 0.6, 0.4, 0.2]]
        # chosen output heads per class: [[1, 0, 1, 1]]

        # Determine which output head has highest "confidence":
        max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1)
        assert max_prob_value.shape == (B,)
        assert most_probable_class.shape == (B,)
        # Example (continued):
        # max_prob_value: [0.6]
        # max_prob_class: [1]

        # A bit of boolean trickery to get what we need, which is, for each item, the
        # index of the output head that gave the most confident prediction.
        mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=x.device)
        chosen_output_head_per_item = chosen_head_per_class[mask]
        assert mask.shape == (B, N)
        assert chosen_output_head_per_item.shape == (B,)
        # Example (continued):
        # mask: [[False, True, False, True]]
        # chosen_output_head_per_item: [0]

        # Create a bool tensor to select items associated with the chosen output head.
        selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=x.device)
        assert selected_mask.shape == (B, T)
        # Select the logits using the mask:
        selected_outputs = logits_from_each_head[selected_mask]
        assert selected_outputs.shape == (B, N)
        return selected_outputs


from avalanche.benchmarks.utils import AvalancheDataset


class MultiHeadClassifier(_MultiHeadClassifier):
    def __init__(self, in_features: int, initial_out_features: int = 2):
        """Multi-head classifier with separate classifiers for each task.

        Typically used in task-incremental scenarios where task labels are
        available and provided to the model.

        :param in_features: number of input features.
        :param initial_out_features: initial number of classes (can be
            dynamically expanded).
        """
        super().__init__(in_features=in_features, initial_out_features=initial_out_features)

    def adaptation(self, dataset: AvalancheDataset):
        """If `dataset` contains new tasks, a new head is initialized.

        :param dataset: data from the current experience.
        :return:
        """
        super().adaptation(dataset)

    def forward(self, x: Tensor, task_labels: Optional[Tensor]) -> Tensor:
        if task_labels is None:
            # We don't do task inference in this layer, since it's handled in the
            # patched models below.
            raise NotImplementedError("Shouldn't get None task labels in the MultiHeadClassifier!")
        else:
            assert isinstance(task_labels, Tensor)
        return super().forward(x, task_labels)

    def forward_single_task(self, x: Tensor, task_label: Optional[Tensor]):
        """compute the output given the input `x`. This module uses the task
        label to activate the correct head.

        :param x:
        :param task_label:
        :return:
        """
        if task_label is not None:
            if not isinstance(task_label, int):
                task_label = task_label.item()
        # TODO: If/when we make the context variable truly continuous, then this
        # won't work.
        assert task_label is None or isinstance(task_label, int), task_label

        if str(task_label) not in self.classifiers:
            # TODO: Let's use the most 'recent' output head instead?
            known_task_labels = list(self.classifiers.keys())
            assert known_task_labels, "Need to have seen at least one task!"
            last_known_task = known_task_labels[-1]
            task_label = last_known_task
            warnings.warn(
                RuntimeWarning(
                    f"performing forward pass on previously unseen task, will pretend "
                    f"inputs come from task {last_known_task} instead."
                )
            )
        return super().forward_single_task(x, task_label)


class MTSimpleCNN(_MTSimpleCNN, PatchedMultiTaskModule):
    def __init__(self):
        super().__init__()
        self.classifier = MultiHeadClassifier(in_features=64)

    def forward(self, x: Tensor, task_labels: Optional[Tensor] = None) -> Tensor:
        if task_labels is None:
            # NOTE: When training, we could rely on a property like `current_task_id`
            # being set within the `on_task_switch` callback.
            # The reason for this is that in some of the strategies, `GEM` strategy (and
            # others), when training they sometimes don't pass a task index! In the case
            # of GEM though, it doesnt pass the task id when calculating the
            # reference gradient, so I'm not sure we want to be using this in this case.
            if self.training:
                warnings.warn(
                    RuntimeWarning("Using task inference in the forward pass while training?")
                )
            return self.task_inference_forward_pass(x=x)
        return super().forward(x=x, task_labels=task_labels)

    @property
    def known_task_ids(self) -> List[Any]:
        return list(self.classifier.classifiers.keys())


class MTSimpleMLP(_MTSimpleMLP, PatchedMultiTaskModule):
    def __init__(self, input_size: int = 28 * 28, hidden_size: int = 512):
        """
        Multi-task MLP with multi-head classifier.
        """
        super().__init__(input_size=input_size, hidden_size=hidden_size)
        self.classifier = MultiHeadClassifier(in_features=hidden_size)

    def forward(self, x: Tensor, task_labels: Optional[Tensor] = None) -> Tensor:
        if task_labels is None:
            if self.training:
                warnings.warn(
                    RuntimeWarning("Using task inference in the forward pass while training?")
                )
            return self.task_inference_forward_pass(x=x)
        return super().forward(x=x, task_labels=task_labels)

    @property
    def known_task_ids(self) -> List[Any]:
        return list(self.classifier.classifiers.keys())


================================================
FILE: sequoia/methods/avalanche_methods/plugins.py
================================================
""" WIP: @lebrice: Plugins that I was using while trying to get the BaseStrategy and
plugins from Avalanche to work directly with the Sequoia environments.
"""
from typing import List

import numpy as np
import torch
from avalanche.training.plugins import StrategyPlugin
from avalanche.training.strategies import BaseStrategy
from torch import Tensor
from torch.utils.data import TensorDataset


class GatherDataset(StrategyPlugin):
    """IDEA: A Plugin that accumulates the tensors from the env to create a "proper"
    Dataset to be used by the plugins.
    """

    def __init__(self):
        self.train_xs: List[Tensor] = []
        self.train_ys: List[Tensor] = []
        self.train_ts: List[Tensor] = []
        self.train_dataset: TensorDataset
        self.train_datasets: List[TensorDataset] = []
        self.eval_xs: List[Tensor] = []
        self.eval_ys: List[Tensor] = []
        self.eval_ts: List[Tensor] = []
        self.eval_dataset: TensorDataset
        self.eval_datasets: List[TensorDataset] = []

    def after_forward(self, strategy, **kwargs):
        x, y, t = strategy.mb_x, strategy.mb_task_id, strategy.mb_y
        self.train_xs.append(x)
        self.train_ys.append(y)
        self.train_ts.append(t)
        return super().after_forward(strategy, **kwargs)

    def after_training_epoch(self, strategy, **kwargs):
        self.train_dataset = TensorDataset(
            torch.cat(self.train_xs), torch.cat(self.train_ys), torch.cat(self.train_ts)
        )
        self.train_xs.clear()
        self.train_ys.clear()
        self.train_ts.clear()
        return super().after_training_epoch(strategy, **kwargs)

    def after_eval_forward(self, strategy, **kwargs):
        x, y, t = strategy.mb_x, strategy.mb_task_id, strategy.mb_y
        self.eval_xs.append(x)
        self.eval_ys.append(y)
        self.eval_ts.append(t)
        return super().after_eval_forward(strategy, **kwargs)

    def after_eval_exp(self, strategy, **kwargs):
        self.eval_dataset = TensorDataset(
            torch.cat(self.eval_xs), torch.cat(self.eval_ys), torch.cat(self.eval_ts)
        )
        self.eval_xs.clear()
        self.eval_ys.clear()
        self.eval_ts.clear()
        if strategy.setting:
            strategy.experience.dataset = self.eval_dataset
        self.eval_datasets.append(self.eval_dataset)
        return super().after_eval_exp(strategy, **kwargs)

    def train(self):
        return self.train_dataset

    def eval(self):
        return self.eval_dataset

    def after_training_exp(self, strategy: "BaseStrategy", **kwargs):
        """
        Compute importances of parameters after each experience.
        """
        if strategy.setting:
            strategy.experience.dataset = self.train_dataset
        self.train_datasets.append(self.train_dataset)
        return super().after_training_exp(strategy, **kwargs)

    # def after_eval_exp(self, strategy: "BaseStrategy", **kwargs):
    #     """
    #     Compute importances of parameters after each experience.
    #     """
    #     return super().after_eval_exp(strategy, **kwargs)


class OnlineAccuracyPlugin(StrategyPlugin):
    def __init__(self):
        self.current_task_accuracies: List[float] = []
        self.all_task_accuracies: List[List[float]] = []
        self.enabled: bool = True

    def _calc_accuracy(self, strategy: "BaseStrategy") -> float:
        y_pred = strategy.logits.argmax(-1)
        y = strategy.mb_y
        acc = ((y_pred == y).sum() / len(y_pred)).item()
        return acc

    def after_forward(self, strategy: "BaseStrategy", **kwargs):
        if not self.enabled:
            return
        acc = self._calc_accuracy(strategy)
        self.current_task_accuracies.append(acc)
        return super().after_forward(strategy, **kwargs)

    def after_training_epoch(self, strategy, **kwargs):
        # Turn off at the end of the first epoch.
        self.all_task_accuracies.append(np.mean(self.current_task_accuracies))
        self.current_task_accuracies.clear()
        self.enabled = False
        return super().after_training_epoch(strategy, **kwargs)


================================================
FILE: sequoia/methods/avalanche_methods/replay.py
================================================
""" Method based on Replay from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.replay.ReplayPlugin` or
`avalanche.training.strategies.strategy_wrappers.Replay` for more info.
"""
import warnings
from dataclasses import dataclass
from typing import ClassVar, Optional, Type

from avalanche.training.plugins.replay import (
    ExperienceBalancedStoragePolicy as ExperienceBalancedStoragePolicy_,
)
from avalanche.training.plugins.replay import ReplayPlugin as ReplayPlugin_
from avalanche.training.plugins.replay import StoragePolicy
from avalanche.training.strategies import BaseStrategy, Replay
from simple_parsing.helpers.hparams import uniform

from sequoia.methods import register_method
from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting

from .base import AvalancheMethod


class ReplayPlugin(ReplayPlugin_):
    def __init__(self, mem_size: int = 200, storage_policy: Optional["StoragePolicy"] = None):
        super().__init__(mem_size=mem_size, storage_policy=storage_policy)
        # "patch" the ExperienceBalanchedStoragePolicy:
        if type(self.storage_policy) is ExperienceBalancedStoragePolicy_:
            self.storage_policy = ExperienceBalancedStoragePolicy(
                ext_mem=self.storage_policy.ext_mem,
                mem_size=self.storage_policy.mem_size,
                adaptive_size=self.storage_policy.adaptive_size,
                num_experiences=self.storage_policy.num_experiences,
            )


class ExperienceBalancedStoragePolicy(ExperienceBalancedStoragePolicy_):
    def __call__(self, strategy: BaseStrategy, **kwargs):
        num_exps = strategy.training_exp_counter + 1
        num_exps = num_exps if self.adaptive_size else self.num_experiences
        curr_data = strategy.experience.dataset

        # new group may be bigger because of the remainder.
        group_size = self.mem_size // num_exps
        new_group_size = group_size + (self.mem_size % num_exps)

        self.subsample_all_groups(group_size * (num_exps - 1))
        curr_data = self.subsample_single(curr_data, new_group_size)
        self.ext_mem[strategy.training_exp_counter + 1] = curr_data

        # buffer size should always equal self.mem_size
        len_tot = sum(len(el) for el in self.ext_mem.values())

        # TODO: Just disabling the failing assert check for now. Should check if this
        # makes any difference in the performance of the plugin:
        # assert len_tot == self.mem_size
        warnings.warn(
            RuntimeWarning(
                f"Ignoring a failing assert in Avalanche's Replay plugin: "
                f"len_tot ({len_tot}) != self.mem_size ({self.mem_size})"
            )
        )

        # NOTE: Could also avoid copying the code from their method here by suppressing
        # AssertionErrors:
        # import contextlib
        # with contextlib.suppress(AssertionError):
        #     return super().__call__(strategy=strategy, **kwargs)


@register_method
@dataclass
class ReplayMethod(AvalancheMethod[Replay]):
    """Replay strategy from Avalanche.
    See Replay plugin for details.
    This strategy does not use task identities.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    # Replay buffer size.
    mem_size: int = uniform(100, 2_000, default=200)

    strategy_class: ClassVar[Type[BaseStrategy]] = Replay

    def create_cl_strategy(self, setting: SLSetting) -> Replay:
        strategy = super().create_cl_strategy(setting)

        # Find and replace the original plugin with our "patched" version:
        plugin_index: Optional[int] = None
        for i, plugin in enumerate(strategy.plugins):
            if type(plugin) is ReplayPlugin_:
                plugin_index = i
                break
        assert plugin_index is not None, "strategy should have the Plugin, no?"
        assert isinstance(plugin_index, int)

        old_plugin: ReplayPlugin_ = strategy.plugins[plugin_index]
        new_plugin = ReplayPlugin(
            mem_size=old_plugin.mem_size,
            storage_policy=old_plugin.storage_policy,
        )
        strategy.plugins[plugin_index] = new_plugin
        return strategy


if __name__ == "__main__":
    from simple_parsing import ArgumentParser

    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(ReplayMethod, "method")
    args = parser.parse_args()
    method: ReplayMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/replay_test.py
================================================
""" WIP: Tests for the Replay Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .replay import ReplayMethod


class TestReplayMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = ReplayMethod


================================================
FILE: sequoia/methods/avalanche_methods/synaptic_intelligence.py
================================================
""" Method based on SynapticIntelligence from [Avalanche](https://github.com/ContinualAI/avalanche).

See `avalanche.training.plugins.synaptic_intelligence.SynapticIntelligencePlugin` or
`avalanche.training.strategies.strategy_wrappers.SynapticIntelligence` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Optional, Set, Type

import numpy as np
import torch
from avalanche.training.plugins.synaptic_intelligence import EwcDataType, ParamDict
from avalanche.training.plugins.synaptic_intelligence import (
    SynapticIntelligencePlugin as SynapticIntelligencePlugin_,
)
from avalanche.training.plugins.synaptic_intelligence import SynDataType
from avalanche.training.strategies import BaseStrategy, SynapticIntelligence
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform
from torch import Tensor
from torch.nn import Module

from sequoia.methods import register_method
from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting

from .base import AvalancheMethod


class SynapticIntelligencePlugin(SynapticIntelligencePlugin_):
    # TODO: Why do they have everything as a static method rather than as a classmethod?
    # Makes it almost impossible to extend this SynapticIntelligencePlugin!
    @staticmethod
    @torch.no_grad()
    def extract_weights(model: Module, target: ParamDict, excluded_parameters: Set[str]):
        params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters)
        # Getting this error:
        # RuntimeError: The expanded size of the tensor (128) must match the existing
        # size (256) at non-singleton dimension 0.  Target sizes: [128].
        # Tensor sizes: [256]
        # TODO: @lebrice For now I'll just replace the entries in that 'target' dict if
        # the shapes don't match, and hope it still works.
        for name, param in params:
            # target[name][...] = param.detach().cpu().flatten()
            if param.shape == target[name].shape:
                target[name][...] = param.detach().cpu().flatten()
            else:
                # Replace the entries with a different shape, rather than replacing their data
                # as done above?
                target[name].data = param.detach().cpu().flatten()

    @staticmethod
    @torch.no_grad()
    def extract_grad(model, target: ParamDict, excluded_parameters: Set[str]):
        params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters)

        # Store the gradients into target
        for name, param in params:
            # BUG: Getting AttributeError: 'NoneType' object has no attribute 'detach'
            if param.grad is not None:
                target[name][...] = param.grad.detach().cpu().flatten()

    @staticmethod
    def compute_ewc_loss(
        model, ewc_data: EwcDataType, excluded_parameters: Set[str], device, lambd=0.0
    ):
        params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters)

        loss = None
        for name, param in params:
            weights = param.to(device).flatten()  # Flat, not detached
            param_ewc_data_0 = ewc_data[0][name].to(device)  # Flat, detached
            param_ewc_data_1 = ewc_data[1][name].to(device)  # Flat, detached

            # BUG: Getting RuntimeError: inconsistent tensor size, expected tensor [128]
            # and src [256] to have the same number of elements, but got 128 and 256
            # elements respectively
            if param_ewc_data_1.shape == param_ewc_data_0.shape == weights.shape:
                syn_loss: Tensor = torch.dot(
                    param_ewc_data_1, (weights - param_ewc_data_0) ** 2
                ) * (lambd / 2)
            else:
                # FIXME: For now, I'll just consider the 'common' elements?
                param_0_cols = param_ewc_data_0.shape[-1]
                param_1_cols = param_ewc_data_1.shape[-1]
                # Weird: why does param_0 have *more* columns than param_1?
                assert param_0_cols > param_1_cols
                # Assuming that the first indices are the common weights between tasks:
                param_ewc_data_0 = param_ewc_data_0[..., :param_1_cols]
                weights = weights[..., :param_1_cols]

                syn_loss: Tensor = torch.dot(
                    param_ewc_data_1, (weights - param_ewc_data_0) ** 2
                ) * (lambd / 2)

            if loss is None:
                loss = syn_loss
            else:
                loss += syn_loss

        return loss

    @staticmethod
    @torch.no_grad()
    def post_update(model, syn_data: SynDataType, excluded_parameters: Set[str]):
        SynapticIntelligencePlugin_.extract_weights(
            model, syn_data["new_theta"], excluded_parameters
        )
        SynapticIntelligencePlugin_.extract_grad(model, syn_data["grad"], excluded_parameters)

        for param_name in syn_data["trajectory"]:
            # BUG: Getting RuntimeError: The size of tensor a (128) must match the size
            # of tensor b (256) at non-singleton dimension 0
            # syn_data['trajectory'][param_name] += \
            #     syn_data['grad'][param_name] * (
            #             syn_data['new_theta'][param_name] -
            #             syn_data['old_theta'][param_name])
            destination: Tensor = syn_data["trajectory"][param_name]
            grad: Tensor = syn_data["grad"][param_name]
            new_theta: Tensor = syn_data["new_theta"][param_name]
            old_theta: Tensor = syn_data["old_theta"][param_name]
            if not (destination.shape == grad.shape == new_theta.shape == old_theta.shape):
                destination_cols = destination.shape[-1]
                grad_cols = grad.shape[-1]
                new_theta_cols = new_theta.shape[-1]
                old_theta_cols = old_theta.shape[-1]
                assert grad_cols < new_theta_cols and new_theta_cols == old_theta_cols
                # FIXME: @lebrice Chop the last two? or extend the grad? Extending the
                # grad with zeros for now (no idea what that implies though!)
                grad_extension = grad.new_zeros(size=[*grad.shape[:-1], new_theta_cols - grad_cols])
                grad = torch.cat([grad, grad_extension], -1)

                destination_extension = destination.new_zeros(
                    size=[*destination.shape[:-1], new_theta_cols - destination_cols]
                )
                destination = torch.cat([destination, destination_extension], -1)

            assert destination.shape == grad.shape == new_theta.shape == old_theta.shape
            destination += grad * (new_theta - old_theta)
            # Replace the entry (in case we replaced the `destination` variable above).
            syn_data["trajectory"][param_name] = destination

    @staticmethod
    @torch.no_grad()
    def update_ewc_data(
        net,
        ewc_data: EwcDataType,
        syn_data: SynDataType,
        clip_to: float,
        excluded_parameters: Set[str],
        c=0.0015,
    ):
        SynapticIntelligencePlugin.extract_weights(net, syn_data["new_theta"], excluded_parameters)
        eps = 0.0000001  # 0.001 in few task - 0.1 used in a more complex setup

        for param_name in syn_data["cum_trajectory"]:
            # BUG: Getting RuntimeError: The size of tensor a (128) must match the size
            # of tensor b (256) at non-singleton dimension 0
            # syn_data['cum_trajectory'][param_name] += \
            #     c * syn_data['trajectory'][param_name] / (
            #             np.square(syn_data['new_theta'][param_name] -
            #                       ewc_data[0][param_name]) + eps)
            cum_trajectory = syn_data["cum_trajectory"][param_name]
            trajectory = syn_data["trajectory"][param_name]
            new_theta = syn_data["new_theta"][param_name]
            ewc_data_0 = ewc_data[0][param_name]

            if not (
                cum_trajectory.shape == trajectory.shape == new_theta.shape == ewc_data_0.shape
            ):
                cum_trajectory_cols = cum_trajectory.shape[-1]
                trajectory_cols = trajectory.shape[-1]
                new_theta_cols = new_theta.shape[-1]
                ewc_data_0_cols = ewc_data_0.shape[-1]
                assert cum_trajectory_cols < trajectory_cols == new_theta_cols == ewc_data_0_cols

                # FIXME: @lebrice Extending the cum_trajectory with zeros for now (no
                # idea what that implies though!)
                cum_trajectory_extension = cum_trajectory.new_zeros(
                    size=[
                        *cum_trajectory.shape[:-1],
                        trajectory_cols - cum_trajectory_cols,
                    ]
                )
                cum_trajectory = torch.cat([cum_trajectory, cum_trajectory_extension], -1)

            cum_trajectory += c * trajectory / (np.square(new_theta - ewc_data_0) + eps)
            # Reset the cum_trajectory variable in the dict, just in case we replaced
            # the variable above.
            syn_data["cum_trajectory"][param_name] = cum_trajectory

        for param_name in syn_data["cum_trajectory"]:
            ewc_data[1][param_name] = torch.empty_like(
                syn_data["cum_trajectory"][param_name]
            ).copy_(-syn_data["cum_trajectory"][param_name])

        # change sign here because the Ewc regularization
        # in Caffe (theta - thetaold) is inverted w.r.t. syn equation [4]
        # (thetaold - theta)
        for param_name in ewc_data[1]:
            ewc_data[1][param_name] = torch.clamp(ewc_data[1][param_name], max=clip_to)
            ewc_data[0][param_name] = syn_data["new_theta"][param_name].clone()


# TODO: Why do they have everything as a static method rather than as a classmethod?
# Makes it almost impossible to extend this SynapticIntelligencePlugin!
SynapticIntelligencePlugin_.extract_weights = SynapticIntelligencePlugin.extract_weights
SynapticIntelligencePlugin_.extract_grad = SynapticIntelligencePlugin.extract_grad
SynapticIntelligencePlugin_.compute_ewc_loss = SynapticIntelligencePlugin.compute_ewc_loss
SynapticIntelligencePlugin_.post_update = SynapticIntelligencePlugin.post_update
SynapticIntelligencePlugin_.update_ewc_data = SynapticIntelligencePlugin.update_ewc_data


@register_method
@dataclass
class SynapticIntelligenceMethod(AvalancheMethod[SynapticIntelligence]):
    """The Synaptic Intelligence strategy from Avalanche.

    This is the Synaptic Intelligence PyTorch implementation of the
    algorithm described in the paper
    "Continuous Learning in Single-Incremental-Task Scenarios"
    (https://arxiv.org/abs/1806.08568)

    The original implementation has been proposed in the paper
    "Continual Learning Through Synaptic Intelligence"
    (https://arxiv.org/abs/1703.04200).

    The Synaptic Intelligence regularization can also be used in a different
    strategy by applying the :class:`SynapticIntelligencePlugin` plugin.

    See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
    """

    # Synaptic Intelligence lambda term.
    si_lambda: float = uniform(1e-2, 1.0, default=0.5)  # TODO: Check the range.

    strategy_class: ClassVar[Type[BaseStrategy]] = SynapticIntelligence

    def create_cl_strategy(self, setting: SLSetting) -> SynapticIntelligence:
        strategy = super().create_cl_strategy(setting)

        # Find and replace the original plugin with our "patched" version:
        plugin_index: Optional[int] = None
        for i, plugin in enumerate(strategy.plugins):
            if type(plugin) is SynapticIntelligencePlugin_:
                plugin_index = i
                break
        assert plugin_index is not None, "strategy should have the Plugin, no?"
        assert isinstance(plugin_index, int)

        old_plugin: SynapticIntelligencePlugin_ = strategy.plugins[plugin_index]
        new_plugin = SynapticIntelligencePlugin(
            si_lambda=old_plugin.si_lambda,
            excluded_parameters=old_plugin.excluded_parameters,
            # device=old_plugin.device,
        )
        new_plugin.ewc_data = old_plugin.ewc_data
        new_plugin.syn_data = old_plugin.syn_data
        new_plugin._device = old_plugin._device

        strategy.plugins[plugin_index] = new_plugin
        return strategy


if __name__ == "__main__":

    setting = TaskIncrementalSLSetting(
        dataset="mnist", nb_tasks=5, monitor_training_performance=True
    )
    # Create the Method, either manually or through the command-line:
    parser = ArgumentParser(__doc__)
    parser.add_arguments(SynapticIntelligenceMethod, "method")
    args = parser.parse_args()
    method: SynapticIntelligenceMethod = args.method

    results = setting.apply(method)


================================================
FILE: sequoia/methods/avalanche_methods/synaptic_intelligence_test.py
================================================
""" WIP: Tests for the SynapticIntelligence Method.

For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type

from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .synaptic_intelligence import SynapticIntelligenceMethod


class TestSynapticIntelligenceMethod(_TestAvalancheMethod):
    Method: ClassVar[Type[AvalancheMethod]] = SynapticIntelligenceMethod


================================================
FILE: sequoia/methods/base_method.py
================================================
""" Defines a Method, which is a "solution" for a given "problem" (a Setting).

The Method could be whatever you want, really. For the 'baselines' we have here,
we use pytorch-lightning, and a few little utility classes such as `Metrics` and
`Loss`, which are basically just like dicts/objects, with some cool other
methods.

TODO: Add a wrapper to limit the 'epoch' length in RL, and then use an early-stopping
callback to also perform validation like in SL.
"""
import warnings
from dataclasses import dataclass, fields, is_dataclass
from pathlib import Path
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Type, Union

import gym
import torch
from pytorch_lightning import Callback, Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from simple_parsing import mutable_field
from wandb.wandb_run import Run

from sequoia.common import Config
from sequoia.common.spaces import Image
from sequoia.methods import register_method
from sequoia.settings import RLSetting, SLSetting
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import Method
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import Actions, Observations, Rewards
from sequoia.settings.base.results import Results
from sequoia.settings.base.setting import Setting, SettingType
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable
from sequoia.utils.serialization import Serializable
from sequoia.utils.utils import compute_identity

from .models import BaseModel
from .trainer import Trainer, TrainerConfig

logger = get_logger(__name__)

# TODO: Set the target setting back to Setting once we fix the PL + RL issues.
@register_method
@dataclass
class BaseMethod(Method, Serializable, Parseable, target_setting=SLSetting):
    """Versatile Base method which targets all settings.

    Uses pytorch-lightning's Trainer for training and LightningModule as model.

    Uses a [BaseModel](methods/models/base_model/base_model.py), which
    can be used for:
    - Self-Supervised training with modular auxiliary tasks;
    - Semi-Supervised training on partially labeled batches;
    - Multi-Head prediction (e.g. in task-incremental scenario);
    """

    # NOTE: these two fields are also used to create the command-line arguments.
    # HyperParameters of the method.
    hparams: BaseModel.HParams = mutable_field(BaseModel.HParams)
    # Configuration options.
    config: Config = mutable_field(Config)
    # Options for the Trainer object.
    trainer_options: TrainerConfig = mutable_field(TrainerConfig)

    def __init__(
        self,
        hparams: BaseModel.HParams = None,
        config: Config = None,
        trainer_options: TrainerConfig = None,
        **kwargs,
    ):
        """Creates a new BaseMethod, using the provided configuration options.

        Parameters
        ----------
        hparams : BaseModel.HParams, optional
            Hyper-parameters of the BaseModel used by this Method. Defaults to None.

        config : Config, optional
            Configuration dataclass with options like log_dir, device, etc. Defaults to
            None.

        trainer_options : TrainerConfig, optional
            Dataclass which holds all the options for creating the `pl.Trainer` which
            will be used for training. Defaults to None.

        **kwargs :
            If any of the above arguments are left as `None`, then they will be created
            using any appropriate value from `kwargs`, if present.

        ## Examples:
        ```
        method = BaseMethod(hparams=BaseModel.HParams(learning_rate=0.01))
        method = BaseMethod(learning_rate=0.01) # Same as above

        method = BaseMethod(config=Config(debug=True))
        method = BaseMethod(debug=True) # Same as above

        method = BaseMethod(hparams=BaseModel.HParams(learning_rate=0.01),
                                config=Config(debug=True))
        method = BaseMethod(learning_rate=0.01, debug=True) # Same as above
        ```
        """
        # TODO: When creating a Method from a script, like `BaseMethod()`,
        # should we expect the hparams to be passed? Should we create them from
        # the **kwargs? Should we parse them from the command-line?

        # Get the type of hparams to use from the field's type annotation.
        hparam_field = [f for f in fields(self) if f.name == "hparams"][0]
        hparam_type = hparam_field.type

        # Option 2: Try to use the keyword arguments to create the hparams,
        # config and trainer options.
        if kwargs:
            logger.info(
                f"using keyword arguments {kwargs} to populate the corresponding "
                f"values in the hparams, config and trainer_options."
            )
            self.hparams = hparams or hparam_type.from_dict(kwargs, drop_extra_fields=True)
            self.config = config or Config.from_dict(kwargs, drop_extra_fields=True)
            self.trainer_options = trainer_options or TrainerConfig.from_dict(
                kwargs, drop_extra_fields=True
            )

        elif self._argv:
            # Since the method was parsed from the command-line, parse those as
            # well from the argv that were used to create the Method.
            # Option 3: Parse them from the command-line.
            # assert not kwargs, "Don't pass any extra kwargs to the constructor!"
            self.hparams = hparams or hparam_type.from_args(self._argv, strict=False)
            self.config = config or Config.from_args(self._argv, strict=False)
            self.trainer_options = trainer_options or TrainerConfig.from_args(
                self._argv, strict=False
            )

        else:
            # Option 1: Use the default values:
            self.hparams = hparams or hparam_type()
            self.config = config or Config()
            self.trainer_options = trainer_options or TrainerConfig()
        assert self.hparams
        assert self.config
        assert self.trainer_options

        if self.config.debug:
            # Disable wandb logging if debug is True.
            self.trainer_options.no_wandb = True

        # The model and Trainer objects will be created in `self.configure`.
        # NOTE: This right here doesn't create the fields, it just gives some
        # type information for static type checking.
        self.trainer: Trainer
        self.model: BaseModel

        self.additional_train_wrappers: List[Callable] = []
        self.additional_valid_wrappers: List[Callable] = []

        self.setting: Setting

    def configure(self, setting: SettingType) -> None:
        """Configures the method for the given Setting.

        Concretely, this creates the model and Trainer objects which will be
        used to train and test a model for the given `setting`.

        Args:
            setting (SettingType): The setting the method will be evaluated on.
        """
        # Note: this here is temporary, just tinkering with wandb atm.
        method_name: str = self.get_name()

        # Set the default batch size to use, depending on the kind of Setting.
        if self.hparams.batch_size is None:
            if isinstance(setting, RLSetting):
                # Default batch size of 1 in RL
                self.hparams.batch_size = 1
            elif isinstance(setting, SLSetting):
                self.hparams.batch_size = 32
            else:
                warnings.warn(
                    UserWarning(
                        f"Dont know what batch size to use by default for setting "
                        f"{setting}, will try 16."
                    )
                )
                self.hparams.batch_size = 16
        # Set the batch size on the setting.
        setting.batch_size = self.hparams.batch_size

        # TODO: Should we set the 'config' on the setting from here?
        if setting.config and setting.config == self.config:
            pass
        elif self.config != Config():
            assert (
                setting.config is None or setting.config == Config()
            ), "method.config has been modified, and so has setting.config!"
            setting.config = self.config
        elif setting.config:
            assert setting.config != Config(), "Weird, both configs have default values.."
            self.config = setting.config

        setting_name: str = setting.get_name()
        dataset = setting.dataset

        if isinstance(setting, IncrementalAssumption):
            if self.hparams.multihead is None:
                # Use a multi-head model by default if the task labels are
                # available at training time and has more than one task.
                if setting.task_labels_at_test_time:
                    assert setting.task_labels_at_train_time
                self.hparams.multihead = setting.nb_tasks > 1

        if not setting.known_task_boundaries_at_train_time:
            # If we won't have access to the task boundaries, so we can only do one
            # epoch.
            self.trainer_options.max_epochs = 1

        if isinstance(setting, ContinualRLSetting):
            setting.add_done_to_observations = True
            setting.prefer_tensors = True
            if isinstance(setting.observation_space.x, Image):
                if self.hparams.encoder is None:
                    self.hparams.encoder = "simple_convnet"
                # TODO: Add 'proper' transforms for cartpole, specifically?
                from sequoia.common.transforms import Transforms

                transforms = [
                    Transforms.three_channels,
                    Transforms.to_tensor,
                    Transforms.resize_64x64,
                ]
                setting.transforms = transforms
                setting.train_transforms = transforms
                setting.val_transforms = transforms
                setting.test_transforms = transforms

            # Configure the baseline specifically for an RL setting.
            # TODO: Select which output head to use from the command-line?
            # Limit the number of epochs so we never iterate on a closed env.
            # TODO: Would multiple "epochs" be possible?
            if setting.train_max_steps is not None:
                self.trainer_options.max_epochs = 1
                self.trainer_options.limit_train_batches = setting.train_max_steps // (
                    setting.batch_size or 1
                )
                self.trainer_options.limit_val_batches = min(
                    setting.train_max_steps // (setting.batch_size or 1), 1000
                )
                # TODO: Test batch size is limited to 1 for now.
                # NOTE: This isn't used, since we don't call `trainer.test()`.
                self.trainer_options.limit_test_batches = setting.train_max_steps

        # TODO: Debug the multi-GPU setup with DP accelerator and pytorch lightning.
        self.model = self.create_model(setting).to(self.config.device)

        # The PolicyHead actually does its own backward pass, so we disable
        # automatic optimization when using it.
        from .models.output_heads import PolicyHead

        if isinstance(self.model.output_head, PolicyHead):
            # Doing the backward pass manually, since there might not be a loss
            # at each step.
            self.trainer_options.automatic_optimization = False

        self.trainer = self.create_trainer(setting)
        self.setting = setting

    def fit(
        self,
        train_env: Environment[Observations, Actions, Rewards],
        valid_env: Environment[Observations, Actions, Rewards],
    ):
        """Called by the Setting to train the method.
        Could be called more than once before training is 'over', for instance
        when training on a series of tasks.
        Overwrite this to customize training.
        """
        assert self.model is not None, (
            "Setting should have been called method.configure(setting=self) "
            "before calling `fit`!"
        )
        # TODO: Figure out if there is a smarter way to reset the state of the Trainer,
        # rather than just creating a new one every time.
        self.trainer = self.create_trainer(self.setting)

        # NOTE: It doesn't seem sufficient to just do this, since for instance the
        # early-stopping callback would prevent training on future tasks, since they
        # have higher validation loss:
        # self.trainer.current_epoch = 0

        success = self.trainer.fit(
            model=self.model,
            train_dataloader=train_env,
            val_dataloaders=valid_env,
        )
        # BUG: After `fit`, it seems like the output head of the model is on the CPU?
        self.model.to(self.config.device)

        return success

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        """Get a batch of predictions (actions) for a batch of observations.

        This gets called by the Setting during the test loop.

        TODO: There is a mismatch here between the type of the output of this
        method (`Actions`) and the type of `action_space`: we should either have
        a `Discrete` action space, and this method should return ints, or this
        method should return `Actions`, and the `action_space` should be a
        `TypedDictSpace` or something similar.
        Either way, `get_actions(obs, action_space) in action_space` should
        always be `True`.
        """
        self.model.eval()
        with torch.no_grad():
            forward_pass = self.model.forward(observations)
        actions: Actions = forward_pass.actions
        action_numpy = actions.actions_np
        assert action_numpy in action_space, (action_numpy, action_space)
        return actions

    def create_model(self, setting: SettingType) -> BaseModel[SettingType]:
        """Creates the BaseModel (a LightningModule) for the given Setting.

        You could extend this to customize which model is used depending on the
        setting.

        TODO: As @oleksost pointed out, this might allow the creation of weird
        'frankenstein' methods that are super-specific to each setting, without
        really having anything in common.

        Args:
            setting (SettingType): An experimental setting.

        Returns:
            BaseModel[SettingType]: The BaseModel that is to be applied
            to that setting.
        """
        # Create the model, passing the setting, hparams and config.
        return BaseModel(setting=setting, hparams=self.hparams, config=self.config)

    def create_trainer(self, setting: SettingType) -> Trainer:
        """Creates a Trainer object from pytorch-lightning for the given setting.

        NOTE: At the moment, uses the KNN and VAE callbacks.
        To use different callbacks, overwrite this method.

        Args:

        Returns:
            Trainer: the Trainer object.
        """
        # We use this here to create loggers!
        # No need to use this, we can use
        callbacks = self.configure_callbacks(setting)
        loggers = []
        if setting.wandb and setting.wandb.project:
            wandb_logger = setting.wandb.make_logger()
            loggers.append(wandb_logger)
        trainer = self.trainer_options.make_trainer(
            config=self.config,
            callbacks=callbacks,
            loggers=loggers,
        )
        return trainer

    def get_experiment_name(self, setting: Setting, experiment_id: str = None) -> str:
        """Gets a unique name for the experiment where `self` is applied to `setting`.

        This experiment name will be passed to `orion` when performing a run of
        Hyper-Parameter Optimization.

        Parameters
        ----------
        - setting : Setting

            The `Setting` onto which this method will be applied. This method will be used when

        - experiment_id: str, optional

            A custom hash to append to the experiment name. When `None` (default), a
            unique hash will be created based on the values of the Setting's fields.

        Returns
        -------
        str
            The name for the experiment.
        """
        if not experiment_id:
            setting_dict = setting.to_dict()
            # BUG: Some settings have non-string keys/value or something?
            from sequoia.utils.utils import flatten_dict

            d = flatten_dict(setting_dict)
            experiment_id = compute_identity(size=5, **d)
        assert isinstance(setting.dataset, str), "assuming that dataset is a str for now."
        return f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}"

    def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        return {
            "hparams": self.hparams.get_orion_space(),
            "trainer_options": self.trainer_options.get_orion_space(),
        }

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        # Here we overwrite the corresponding attributes with the new suggested values
        # leaving other fields unchanged.
        self.hparams = self.hparams.replace(**new_hparams["hparams"])
        # BUG with the `replace` function and Union[int, float] type, it doesn't
        # preserve the type of the field when serializing/deserializing!
        self.trainer_options.max_epochs = new_hparams["trainer_options"]["max_epochs"]

    def hparam_sweep(
        self,
        setting: Setting,
        search_space: Dict[str, Union[str, Dict]] = None,
        experiment_id: str = None,
        database_path: Union[str, Path] = None,
        max_runs: int = None,
        hpo_algorithm: Union[str, Dict] = "BayesianOptimizer",
        debug: bool = False,
    ) -> Tuple[BaseModel.HParams, float]:
        # Setting max epochs to 1, just to keep runs somewhat short.
        # NOTE: Now we're actually going to have the max_epochs as a tunable
        # hyper-parameter, so we're not hard-setting this value anymore.
        # self.trainer_options.max_epochs = 1

        # Call 'configure', so that we create `self.model` at least once, which will
        # update the hparams.output_head field to be of the right type. This is
        # necessary in order for the `get_orion_space` to retrieve all the hparams
        # of the output head.
        self.configure(setting)

        return super().hparam_sweep(
            setting=setting,
            search_space=search_space,
            experiment_id=experiment_id,
            database_path=database_path,
            max_runs=max_runs,
            debug=debug or self.config.debug,
            hpo_algorithm=hpo_algorithm,
        )

    def receive_results(self, setting: Setting, results: Results):
        """Receives the results of an experiment, where `self` was applied to Setting
        `setting`, which produced results `results`.
        """
        super().receive_results(setting, results=results)

    def configure_callbacks(self, setting: SettingType = None) -> List[Callback]:
        """Create the PytorchLightning Callbacks for this Setting.

        These callbacks will get added to the Trainer in `create_trainer`.

        Parameters
        ----------
        setting : SettingType
            The `Setting` on which this Method is going to be applied.

        Returns
        -------
        List[Callback]
            A List of `Callaback` objects to use during training.
        """
        setting = setting or self.setting
        # TODO: Move this to something like a `configure_callbacks` method in the model,
        # once PL adds it.
        # from sequoia.common.callbacks.vae_callback import SaveVaeSamplesCallback
        return [
            EarlyStopping(monitor="val/loss"),
            # self.hparams.knn_callback,
            # SaveVaeSamplesCallback(),
        ]

    def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type[Setting], Results]:
        """(WIP): Runs this Method on all its applicable settings.

        Returns
        -------

            Dict mapping from setting type to the Results produced by this method.
        """
        applicable_settings = self.get_applicable_settings()

        all_results: Dict[Type[Setting], Results] = {}
        for setting_type in applicable_settings:
            setting = setting_type.from_args(argv)
            results = setting.apply(self)
            all_results[setting_type] = results
        print(f"All results for method of type {type(self)}:")
        print(
            {
                method.get_name(): (results.get_metric() if results else "crashed")
                for method, results in all_results.items()
            }
        )
        return all_results

    def __init_subclass__(cls, target_setting: Type[SettingType] = Setting, **kwargs) -> None:
        """Called when creating a new subclass of Method.

        Args:
            target_setting (Type[Setting], optional): The target setting.
                Defaults to None, in which case the method will inherit the
                target setting of it's parent class.
        """
        if not is_dataclass(cls):
            logger.critical(
                UserWarning(
                    f"The BaseMethod subclass {cls} should be decorated with "
                    f"@dataclass!\n"
                    f"While this isn't strictly necessary for things to work, it is"
                    f"highly recommended, as any dataclass-style class attributes "
                    f"won't have the corresponding command-line arguments "
                    f"generated, which can cause a lot of subtle bugs."
                )
            )
        super().__init_subclass__(target_setting=target_setting, **kwargs)

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching between tasks.

        Args:
            task_id (int, optional): the id of the new task. When None, we are
            basically being informed that there is a task boundary, but without
            knowing what task we're switching to.
        """
        self.model.on_task_switch(task_id)

    def setup_wandb(self, run: Run) -> None:
        """Called by the Setting when using Weights & Biases, after `wandb.init`.

        This method is here to provide Methods with the opportunity to log some of their
        configuration options or hyper-parameters to wandb.

        NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
        this point.

        Parameters
        ----------
        run : wandb.Run
            Current wandb Run.
        """
        # TODO: (@lebrice) I think these will probably be set by the wandb logger,
        # run.config["config"] = self.config.to_dict()
        # Need to check wether this causes any issues.
        # run.config["hparams"] = self.hparams.to_dict()
        # run.config["trainer_config"] = self.trainer_options


================================================
FILE: sequoia/methods/base_method_test.py
================================================
from typing import ClassVar, Dict, Type

import pytest
import torch

from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import (
    ClassIncrementalSetting,
    IncrementalRLSetting,
    Setting,
    TraditionalRLSetting,
)
from sequoia.settings.rl.continual.results import ContinualRLResults

from .base_method import BaseMethod
from .method_test import MethodTests


class TestBaseMethod(MethodTests):
    Method: ClassVar[Type[BaseMethod]] = BaseMethod
    method_debug_kwargs: ClassVar[Dict] = {"max_epochs": 1}

    @classmethod
    @pytest.fixture(scope="module")
    def trainer_options(cls, tmp_path_factory) -> TrainerConfig:
        tmp_path = tmp_path_factory.mktemp("log_dir")
        return TrainerConfig(
            # logger=False,
            max_epochs=1,
            checkpoint_callback=False,
            default_root_dir=tmp_path,
        )

    @classmethod
    @pytest.fixture
    def method(cls, config: Config, trainer_options: TrainerConfig) -> BaseMethod:
        """Fixture that returns the Method instance to use when testing/debugging."""
        trainer_options.max_epochs = 1
        return cls.Method(trainer_options=trainer_options, config=config)

    def validate_results(
        self,
        setting: Setting,
        method: BaseMethod,
        results: Setting.Results,
    ) -> None:
        assert results
        assert results.objective
        # TODO: Set some 'reasonable' bounds on the performance here, depending on the
        # setting/dataset.

    @pytest.mark.xfail(reason="TODO: Re-enable once we fix the bugs for BaseMethod in RL.")
    @slow
    @pytest.mark.timeout(120)
    def test_cartpole_state(self, config: Config, trainer_options: TrainerConfig):
        """Test that the baseline method can learn cartpole (state input)"""
        # TODO: Actually remove the trainer_config class from the BaseMethod?
        trainer_options.max_epochs = 1
        method = self.Method(config=config, trainer_options=trainer_options)
        method.hparams.learning_rate = 0.01

        setting = TraditionalRLSetting(
            dataset="CartPole-v0",
            train_max_steps=5000,
            nb_tasks=1,
            test_max_steps=2_000,
            config=config,
        )
        results: ContinualRLResults = setting.apply(method)

        print(results.to_log_dict())
        # The method should normally get the maximum length (200), but checking with
        # 100 just to account for randomness.
        assert results.average_metrics.mean_episode_length > 100.0

    @pytest.mark.xfail(reason="TODO: Re-enable once we fix the bugs for BaseMethod in RL.")
    @slow
    @pytest.mark.timeout(120)
    def test_incremental_cartpole_state(self, config: Config, trainer_options: TrainerConfig):
        """Test that the baseline method can learn cartpole (state input)"""
        # TODO: Actually remove the trainer_config class from the BaseMethod?
        trainer_options.max_epochs = 1
        method = self.Method(config=config, trainer_options=trainer_options)
        method.hparams.learning_rate = 0.01

        setting = IncrementalRLSetting(
            dataset="cartpole", train_max_steps=5000, nb_tasks=2, test_max_steps=1000
        )
        results: ContinualRLResults = setting.apply(method)

        print(results.to_log_dict())
        # The method should normally get the maximum length (200), but checking with
        # 100 just to account for randomness.
        assert results.mean_episode_length > 100.0

    @pytest.mark.xfail(reason="TODO: Unreliable test.")
    @pytest.mark.timeout(30)
    @pytest.mark.skipif(not torch.cuda.is_available(), reason="Cuda is required.")
    def test_device_of_output_head_is_correct(
        self,
        short_class_incremental_setting: ClassIncrementalSetting,
        trainer_options: TrainerConfig,
        config: Config,
    ):
        """There is a bug happening where the output head is on CPU while the rest of the
        model is on GPU.
        """
        trainer_options.max_epochs = 1
        method = self.Method(trainer_options=trainer_options, config=config)
        results = short_class_incremental_setting.apply(method)
        assert 0.20 <= results.objective


def test_weird_pl_bug():
    replica_device = None

    def find_tensor_with_device(tensor: torch.Tensor) -> torch.Tensor:
        nonlocal replica_device
        if replica_device is None and tensor.device != torch.device("cpu"):
            replica_device = tensor.device
        return tensor

    from pytorch_lightning.utilities.apply_func import apply_to_collection

    from sequoia.settings.sl.incremental.objects import (
        IncrementalSLObservations,
        IncrementalSLRewards,
    )

    # TODO: Not quite sure why there is also a `0` in there.
    input_device = "cuda"
    inputs = (
        (
            IncrementalSLObservations(
                x=torch.rand([32, 3, 28, 28], device=input_device),
                task_labels=torch.zeros([32], device=input_device),
            ),
            IncrementalSLRewards(y=torch.randint(10, [32], device=input_device)),
        ),
        0,
    )

    # from collections.abc import Mapping, Sequence
    apply_to_collection(inputs, dtype=torch.Tensor, function=find_tensor_with_device)

    assert replica_device is not None


BaseMethodTests = TestBaseMethod


================================================
FILE: sequoia/methods/conftest.py
================================================
import pytest

from sequoia.client import SettingProxy
from sequoia.common.config import Config
from sequoia.settings.sl import (
    ClassIncrementalSetting,
    ContinualSLSetting,
    DiscreteTaskAgnosticSLSetting,
    TaskIncrementalSLSetting,
)
from sequoia.settings.sl.continual.setting import random_subset


@pytest.fixture(scope="session")
def short_class_incremental_setting(session_config: Config):
    setting = ClassIncrementalSetting(
        dataset="mnist",
        nb_tasks=5,
        monitor_training_performance=True,
    )
    setting.config = session_config
    setting.prepare_data()
    setting.setup()

    # Testing this out: Shortening the train datasets:
    setting.train_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
    ]
    setting.val_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
    ]
    setting.test_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
    ]
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)

    # Assert that calling setup doesn't overwrite the datasets.
    setting.setup()
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)
    return setting


@pytest.fixture(scope="session")
def short_continual_sl_setting(session_config: Config):
    setting = ContinualSLSetting(
        dataset="mnist",
        monitor_training_performance=True,
    )
    setting.config = session_config
    setting.prepare_data()
    setting.setup()

    # Testing this out: Shortening the train datasets:
    setting.train_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
    ]
    setting.val_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
    ]
    setting.test_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
    ]
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)

    # Assert that calling setup doesn't overwrite the datasets.
    setting.setup()
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)
    return setting


@pytest.fixture(scope="session")
def short_discrete_task_agnostic_sl_setting(session_config: Config):
    setting = DiscreteTaskAgnosticSLSetting(
        dataset="mnist",
        monitor_training_performance=True,
    )
    setting.config = session_config
    setting.prepare_data()
    setting.setup()

    # Testing this out: Shortening the train datasets:
    setting.train_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
    ]
    setting.val_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
    ]
    setting.test_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
    ]
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)

    # Assert that calling setup doesn't overwrite the datasets.
    setting.setup()
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)
    return setting


@pytest.fixture(scope="session")
def short_task_incremental_setting(session_config: Config):
    setting = TaskIncrementalSLSetting(
        dataset="mnist",
        nb_tasks=5,
        monitor_training_performance=True,
    )
    setting.config = session_config
    setting.prepare_data()

    setting.setup()
    # Testing this out: Shortening the train datasets:
    setting.train_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
    ]
    setting.val_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
    ]
    setting.test_datasets = [
        random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
    ]
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)

    # Assert that calling setup doesn't overwrite the datasets.
    setting.setup()
    assert len(setting.train_datasets) == 5
    assert len(setting.val_datasets) == 5
    assert len(setting.test_datasets) == 5
    assert all(len(dataset) == 100 for dataset in setting.train_datasets)
    assert all(len(dataset) == 100 for dataset in setting.val_datasets)
    assert all(len(dataset) == 100 for dataset in setting.test_datasets)

    return setting


@pytest.fixture(scope="session")
def short_sl_track_setting(session_config: Config):
    setting = SettingProxy(
        ClassIncrementalSetting,
        "sl_track",
        # dataset="synbols",
        # nb_tasks=12,
        # class_order=class_order,
        # monitor_training_performance=True,
    )
    setting.config = session_config
    # TODO: This could be a bit more convenient.
    setting.data_dir = session_config.data_dir
    assert setting.config == session_config
    assert setting.data_dir == session_config.data_dir
    assert setting.nb_tasks == 12

    # For now we'll just shorten the tests by shortening the datasets.
    samples_per_task = 100
    setting.batch_size = 10

    setting.setup()
    # Testing this out: Shortening the train datasets:
    setting.train_datasets = [
        random_subset(task_dataset, samples_per_task) for task_dataset in setting.train_datasets
    ]
    setting.val_datasets = [
        random_subset(task_dataset, samples_per_task) for task_dataset in setting.val_datasets
    ]
    setting.test_datasets = [
        random_subset(task_dataset, samples_per_task) for task_dataset in setting.test_datasets
    ]
    assert len(setting.train_datasets) == setting.nb_tasks
    assert len(setting.val_datasets) == setting.nb_tasks
    assert len(setting.test_datasets) == setting.nb_tasks
    assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
    assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
    assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)

    # Assert that calling setup doesn't overwrite the datasets.
    setting.setup()

    assert len(setting.train_datasets) == setting.nb_tasks
    assert len(setting.val_datasets) == setting.nb_tasks
    assert len(setting.test_datasets) == setting.nb_tasks
    assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
    assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
    assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)

    return setting


================================================
FILE: sequoia/methods/d3rlpy_methods/__init__.py
================================================


================================================
FILE: sequoia/methods/d3rlpy_methods/base.py
================================================
from typing import ClassVar, Type, Union

import gym
import numpy as np

try:
    from d3rlpy.algos import *
    from d3rlpy.dataset import MDPDataset
except ImportError as err:
    raise RuntimeError(f"You need to have `d3rlpy` installed to use these methods.") from err

from gym import Space
from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics

from sequoia import Actions, Environment, Method, Observations, Rewards
from sequoia.settings.offline_rl.setting import OfflineRLSetting


class OfflineRLWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.observation_space = env.observation_space.x

    def reset(self):
        observation = super().reset()
        return observation.x

    def step(self, action):
        observation, reward, done, info = super().step(action)
        return observation.x, reward.y, done, info


class BaseOfflineRLMethod(Method, target_setting=OfflineRLSetting):
    Algo: ClassVar[Type[AlgoBase]] = AlgoBase

    def __init__(
        self,
        train_steps: int = 1_000_000,
        train_steps_per_epoch=1_000_000,
        test_steps=1_000,
        scorers: dict = None,
        use_gpu: bool = False,
        **kwargs,
    ):
        super().__init__()
        self.train_steps = train_steps
        self.train_steps_per_epoch = train_steps_per_epoch
        self.test_steps = test_steps
        self.scorers = scorers
        self.offline_metrics = None
        self.use_gpu = use_gpu
        self.kwargs = kwargs
        self.algo = None

    def configure(self, setting: OfflineRLSetting) -> None:
        super().configure(setting)
        self.setting = setting
        self.algo = type(self).Algo(use_gpu=self.use_gpu, **self.kwargs)

    def fit(
        self,
        train_env: Union[Environment[Observations, Actions, Rewards], MDPDataset],
        valid_env: Union[Environment[Observations, Actions, Rewards], MDPDataset],
    ) -> None:
        """
        Fit self.algo on training and evaluation environment
        Works for both gym environments and d3rlpy datasets
        """
        if isinstance(self.setting, OfflineRLSetting):
            self.offline_metrics = self.algo.fit(
                train_env,
                eval_episodes=valid_env,
                n_steps=self.train_steps,
                n_steps_per_epoch=self.train_steps_per_epoch,
                scorers=self.scorers,
            )
        else:
            train_env = RecordEpisodeStatistics(OfflineRLWrapper(train_env))
            valid_env = RecordEpisodeStatistics(OfflineRLWrapper(valid_env))
            self.algo.fit_online(env=train_env, eval_env=valid_env, n_steps=self.train_steps)

    def get_actions(self, obs: Union[np.ndarray, Observations], action_space: Space) -> np.ndarray:
        """
        Return actions predicted by self.algo for given observation and action space
        """
        if isinstance(obs, Observations):
            obs = obs.x
        obs = np.expand_dims(obs, axis=0)
        action = np.asarray(self.algo.predict(obs)).squeeze(axis=0)
        return action


"""
D3RLPY Methods: target OfflineRL and TraditionalRL assumptions
"""


class DQNMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DQN


class DoubleDQNMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DoubleDQN


class DDPGMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DDPG


class TD3Method(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = TD3


class SACMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = SAC


class DiscreteSACMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DiscreteSAC


class CQLMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = CQL


class DiscreteCQLMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DiscreteCQL


class BEARMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = BEAR


class AWRMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = AWR


class DiscreteAWRMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DiscreteAWR


class BCMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = BC


class DiscreteBCMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DiscreteBC


class BCQMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = BCQ


class DiscreteBCQMethod(BaseOfflineRLMethod):
    Algo: ClassVar[Type[AlgoBase]] = DiscreteBCQ


================================================
FILE: sequoia/methods/d3rlpy_methods/base_test.py
================================================
import pytest
from d3rlpy.constants import ActionSpace

from sequoia import TraditionalRLSetting
from sequoia.methods.d3rlpy_methods.base import *
from sequoia.settings.offline_rl.setting import OfflineRLSetting


class BaseOfflineRLMethodTests:
    Method: ClassVar[Type[BaseOfflineRLMethod]]

    @pytest.fixture
    def method(self):
        return self.Method(train_steps=1, train_steps_per_epoch=1)

    @pytest.mark.parametrize("dataset", OfflineRLSetting.available_datasets)
    def test_offlinerl(self, method, dataset: str):

        setting_offline = OfflineRLSetting(dataset=dataset)

        #
        # Check for mismatch
        if isinstance(setting_offline.env.action_space, gym.spaces.Box):
            if method.algo.get_action_type() not in {ActionSpace.CONTINUOUS, ActionSpace.BOTH}:
                pytest.skip("This setting requires continuous action space algorithm")

        elif isinstance(setting_offline.env.action_space, gym.spaces.discrete.Discrete):
            if method.algo.get_action_type() not in {ActionSpace.DISCRETE, ActionSpace.BOTH}:
                pytest.skip("This setting requires discrete action space algorithm")
        else:
            pytest.skip("Invalid setting action space")

        results = setting_offline.apply(method)

        # Difficult to set a meaningful threshold for 1 step fit
        assert isinstance(results.objective, float)

    @pytest.mark.parametrize("dataset", TraditionalRLSetting.available_datasets)
    def test_traditionalrl(self, method, dataset):

        # BC is a strictly offline method
        if isinstance(method, (BCMethod, BCQMethod, DiscreteBCMethod, DiscreteBCQMethod)):
            pytest.skip("This method only works on OfflineRLSetting")

        setting_online = TraditionalRLSetting(dataset=dataset, test_max_steps=10)

        #
        # Check for mismatch
        if isinstance(setting_online.action_space, gym.spaces.Box):
            if method.algo.get_action_type() != ActionSpace.CONTINUOUS:
                pytest.skip("This setting requires continuous action space algorithm")

        elif isinstance(setting_online.action_space, gym.spaces.discrete.Discrete):
            if method.algo.get_action_type() != ActionSpace.DISCRETE:
                pytest.skip("This setting requires discrete action space algorithm")
        else:
            pytest.skip("Invalid setting action space")

        results = setting_online.apply(method)

        # Difficult to set a meaningful threshold for 1 step fit
        assert isinstance(results.objective, (int, float))


class TestDQNMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DQNMethod


class TestDoubleDQNMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DoubleDQNMethod


class TestDDPGMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DDPGMethod


class TestTD3Method(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = TD3Method


class TestSACMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = SACMethod


class TestDiscreteSACMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteSACMethod


class TestCQLMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = CQLMethod


class TestDiscreteCQLMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteCQLMethod


class TestBEARMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = BEARMethod


class TestAWRMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = AWRMethod


class TestDiscreteAWRMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteAWRMethod


class TestBCMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = BCMethod


class TestDiscreteBCMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteBCMethod


class TestBCQMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = BCQMethod


class TestDiscreteBCQMethod(BaseOfflineRLMethodTests):
    Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteBCQMethod


================================================
FILE: sequoia/methods/ewc_method.py
================================================
"""Defines the EWC method, as a subclass of the BaseMethod.

Likewise, defines the `EwcModel`, which is a very simple subclass of the
`BaseModel`, adding in the Ewc auxiliary task (`EWCTask`).

For a more detailed view of exactly how the EwcTask calculates its loss, see
the `sequoia.methods.aux_tasks.ewc.EwcTask`.
"""
import warnings
from dataclasses import dataclass
from typing import Optional

from gym.utils import colorize
from simple_parsing import ArgumentParser, mutable_field

from sequoia.common.config import Config
from sequoia.methods import register_method
from sequoia.methods.aux_tasks.ewc import EWCTask
from sequoia.methods.base_method import BaseMethod, BaseModel
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Setting, TaskIncrementalRLSetting, IncrementalSLSetting
from sequoia.settings.assumptions.incremental import IncrementalAssumption


class EwcModel(BaseModel):
    """Modified version of the BaseModel, which adds the EWC auxiliary task."""

    @dataclass
    class HParams(BaseModel.HParams):
        """Hyper-parameters of the `EwcModel`."""

        # Hyper-parameters related to the EWC auxiliary task.
        ewc: EWCTask.Options = mutable_field(EWCTask.Options)

    def __init__(self, setting: Setting, hparams: "EwcModel.HParams", config: Config):
        super().__init__(setting=setting, hparams=hparams, config=config)
        self.hp: EwcModel.HParams
        self.add_auxiliary_task(EWCTask(options=self.hp.ewc))

    def get_loss(self, forward_pass, rewards=None, loss_name=""):
        return super().get_loss(forward_pass, rewards=rewards, loss_name=loss_name)


@register_method
@dataclass
class EwcMethod(BaseMethod, target_setting=IncrementalSLSetting):
    """Subclass of the BaseMethod, which adds the EWCTask to the `BaseModel`.

    This Method is applicable to any CL setting (RL or SL) where there are clear task
    boundaries, regardless of if the task labels are given or not.
    """

    hparams: EwcModel.HParams = mutable_field(EwcModel.HParams)

    def __init__(
        self,
        hparams: EwcModel.HParams = None,
        config: Config = None,
        trainer_options: TrainerConfig = None,
        **kwargs,
    ):
        super().__init__(hparams=hparams, config=config, trainer_options=trainer_options, **kwargs)

    def configure(self, setting: IncrementalAssumption):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        super().configure(setting)

        if setting.phases == 1:
            warnings.warn(
                RuntimeWarning(
                    colorize(
                        "Disabling the EWC portion of this Method entirely, as there "
                        "is only one phase of training in this setting (i.e. `fit` is "
                        "only called once).",
                        "red",
                    )
                )
            )
            # We could also just disable the ewc task (after super().configure(setting))
            self.model.tasks["ewc"].disable()

    def on_task_switch(self, task_id: Optional[int]):
        super().on_task_switch(task_id)

    def create_model(self, setting: Setting) -> EwcModel:
        """Create the Model to use for the given Setting.

        In this case, we want to return an `EwcModel` (our customized version of the
        BaseModel).

        Parameters
        ----------
        setting : Setting
            The experimental Setting this Method will be applied to.

        Returns
        -------
        EwcModel
            The Model that will be trained and used for evaluation.
        """
        return EwcModel(setting=setting, hparams=self.hparams, config=self.config)


def demo():
    """Runs the EwcMethod on a simple setting, just to check that it works fine."""

    # Adding arguments for each group directly:
    parser = ArgumentParser(description=__doc__)

    EwcMethod.add_argparse_args(parser, dest="method")
    parser.add_arguments(Config, "config")

    args = parser.parse_args()

    method = EwcMethod.from_argparse_args(args, dest="method")
    config: Config = args.config
    task_schedule = {
        0: {"gravity": 10, "length": 0.2},
        1000: {"gravity": 100, "length": 1.2},
        # 2000:   {"gravity": 10, "length": 0.2},
    }
    setting = TaskIncrementalRLSetting(
        dataset="cartpole",
        train_task_schedule=task_schedule,
        test_task_schedule=task_schedule,
        # max_steps=1000,
    )

    # from sequoia.settings import TaskIncrementalSLSetting, ClassIncrementalSetting
    # setting = ClassIncrementalSetting(dataset="mnist", nb_tasks=5)
    # setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=5)
    results = setting.apply(method, config=config)
    print(results.summary())


if __name__ == "__main__":
    demo()


================================================
FILE: sequoia/methods/ewc_method_test.py
================================================
""" TODO: Tests for the EWC Method. """

from functools import partial
from typing import ClassVar, Type

import numpy as np
import pytest
from torch import Tensor

from sequoia.common import Loss
from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings.rl import IncrementalRLSetting, TaskIncrementalRLSetting, TraditionalRLSetting
from sequoia.settings.sl import (
    ClassIncrementalSetting,
    MultiTaskSLSetting,
    TaskIncrementalSLSetting,
    TraditionalSLSetting,
)

from .base_method_test import TestBaseMethod as BaseMethodTests
from .ewc_method import EwcMethod, EwcModel


class TestEWCMethod(BaseMethodTests):
    Method: ClassVar[Type[Method]] = EwcMethod

    @classmethod
    @pytest.fixture
    def method(cls, config: Config, trainer_options: TrainerConfig) -> EwcMethod:
        """Fixture that returns the Method instance to use when testing/debugging."""
        trainer_options.max_epochs = 1
        return cls.Method(trainer_options=trainer_options, config=config)

    @slow
    @pytest.mark.timeout(300)
    def test_task_incremental_mnist(self, monkeypatch):
        # TODO: Change this to use the 'short task incremental setting'.
        setting = TaskIncrementalSLSetting(dataset="mnist", monitor_training_performance=True)
        total_ewc_losses_per_task = np.zeros(setting.nb_tasks)

        _training_step = EwcModel.training_step

        def wrapped_training_step(self: EwcModel, batch, batch_idx: int, *args, **kwargs):
            step_results = _training_step(self, batch, batch_idx=batch_idx, *args, **kwargs)
            loss_object: Loss = step_results["loss_object"]
            if "ewc" in loss_object.losses:
                ewc_loss_obj = loss_object.losses["ewc"]
                ewc_loss = ewc_loss_obj.total_loss
                if isinstance(ewc_loss, Tensor):
                    ewc_loss = ewc_loss.detach().cpu().numpy()
                total_ewc_losses_per_task[self.current_task] += ewc_loss
            return step_results

        monkeypatch.setattr(EwcModel, "training_step", wrapped_training_step)

        _fit = EwcMethod.fit

        at_all_points_in_time = []

        def wrapped_fit(self, train_env, valid_env):
            print(f"starting task {self.model.current_task}: {total_ewc_losses_per_task}")
            total_ewc_losses_per_task[:] = 0
            _fit(self, train_env, valid_env)
            at_all_points_in_time.append(total_ewc_losses_per_task.copy())

        monkeypatch.setattr(EwcMethod, "fit", wrapped_fit)

        # _on_epoch_end = EwcModel.on_epoch_end

        # def fake_on_epoch_end(self, *args, **kwargs):
        #     assert False, f"heyo: {total_ewc_losses_per_task}"
        #     return _on_epoch_end(self, *args, **kwargs)

        # # monkeypatch.setattr(EwcModel, "on_epoch_end", fake_on_epoch_end)
        method = EwcMethod(max_epochs=1)
        results = setting.apply(method)
        assert (at_all_points_in_time[0] == 0).all()
        assert at_all_points_in_time[1][1] != 0
        assert at_all_points_in_time[2][2] != 0
        assert at_all_points_in_time[3][3] != 0
        assert at_all_points_in_time[4][4] != 0

        assert 0.95 <= results.average_online_performance.objective
        # TODO: Fix this: Should be getting way better than this, even when just
        # debugging.
        assert 0.15 <= results.average_final_performance.objective

    @pytest.mark.parametrize(
        "non_cl_setting_fn",
        [
            partial(ClassIncrementalSetting, nb_tasks=1),
            MultiTaskSLSetting,
            TraditionalSLSetting,
            TraditionalRLSetting,
            partial(IncrementalRLSetting, nb_tasks=1),
            partial(TaskIncrementalRLSetting, nb_tasks=1),
        ],
    )
    def test_raises_warning_when_applied_to_non_cl_setting(self, non_cl_setting_fn):
        """When applied onto a non-CL setting like IID or Multi-Task SL (or RL), the
        EWCMethod should raise a warning, and disable the auxiliary task.
        """
        method = EwcMethod()
        setting = non_cl_setting_fn()

        with pytest.warns(RuntimeWarning):
            method.configure(setting)


================================================
FILE: sequoia/methods/experience_replay.py
================================================
""" Method that uses a replay buffer to prevent forgetting.

TODO: Refactor this to be based on the BaseMethod, possibly using an auxiliary task for
the Replay.
"""
from argparse import ArgumentParser, Namespace
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Type

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import tqdm
from gym import spaces
from torch import Tensor
from torchvision.models import ResNet
from wandb.wandb_run import Run

from sequoia.methods import register_method
from sequoia.settings import ClassIncrementalSetting
from sequoia.settings.base import Actions, Environment, Method, Observations
from sequoia.settings.sl.continual.setting import smart_class_prediction
from sequoia.utils import get_logger

logger = get_logger(__name__)


@register_method
@dataclass
class ExperienceReplayMethod(Method, target_setting=ClassIncrementalSetting):
    """Simple method that uses a replay buffer to reduce forgetting."""

    def __init__(
        self,
        learning_rate: float = 1e-3,
        buffer_capacity: int = 200,
        max_epochs_per_task: int = 10,
        weight_decay: float = 1e-6,
        seed: int = None,
    ):
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.buffer_capacity = buffer_capacity

        self.net: ResNet
        self.buffer: Optional[Buffer] = None
        self.optim: torch.optim.Optimizer
        self.task: int = 0
        self.rng = np.random.default_rng(seed)
        self.seed = seed
        if seed:
            torch.manual_seed(seed)
            torch.set_deterministic(True)

        self.epochs_per_task: int = max_epochs_per_task
        self.early_stop_patience: int = 2

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def configure(self, setting: ClassIncrementalSetting):
        self.setting = setting
        # create the model
        self.net = models.resnet18(pretrained=False)
        self.net.fc = nn.Linear(512, setting.action_space.n)
        if torch.cuda.is_available():
            self.net = self.net.to(device=self.device)
        # Set drop_last to True, to avoid getting a batch of size 1, which makes
        # batchnorm raise an error.
        setting.drop_last = True
        image_space: spaces.Box = setting.observation_space["x"]
        # Create the buffer.
        if self.buffer_capacity:
            self.buffer = Buffer(
                capacity=self.buffer_capacity,
                input_shape=image_space.shape,
                extra_buffers={"t": torch.LongTensor},
                rng=self.rng,
            ).to(device=self.device)
        # Create the optimizer.
        self.optim = torch.optim.Adam(
            self.net.parameters(),
            lr=self.learning_rate,
            weight_decay=self.weight_decay,
        )

    def fit(self, train_env: Environment, valid_env: Environment):
        self.net.train()
        # Simple example training loop, not using the validation loader.
        best_val_loss = np.inf
        best_epoch = 0

        for epoch in range(self.epochs_per_task):
            train_pbar = tqdm.tqdm(train_env, desc=f"Training Epoch {epoch}")
            postfix = {}

            obs: ClassIncrementalSetting.Observations
            rew: ClassIncrementalSetting.Rewards
            for i, (obs, rew) in enumerate(train_pbar):
                self.optim.zero_grad()

                obs = obs.to(device=self.device)
                x = obs.x

                # FIXME: Batch norm will cause a crash if we pass x with batch_size==1!
                fake_batch = False
                if x.shape[0] == 1:
                    # Pretend like this has batch_size of 2 rather than just 1.
                    x = x.tile([2, *(1 for _ in x.shape[1:])])
                    x[1] += 1  # Just so the two samples aren't identical, otherwise
                    # maybe the batch norm std would be nan or something.
                    fake_batch = True
                logits = self.net(x)
                if fake_batch:
                    logits = logits[:1]  # Drop the 'fake' second item.

                if rew is None:
                    # If our online training performance is being measured, we might
                    # need to provide actions before we can get the corresponding
                    # rewards (image labels in this case).
                    y_pred = logits.argmax(1)
                    rew = train_env.send(y_pred)

                rew = rew.to(device=self.device)
                y = rew.y
                loss = F.cross_entropy(logits, y)

                postfix["loss"] = loss.detach().item()
                if self.task > 0 and self.buffer:
                    b_samples = self.buffer.sample(x.size(0))
                    b_logits = self.net(b_samples["x"])
                    loss_replay = F.cross_entropy(b_logits, b_samples["y"])
                    loss += loss_replay
                    postfix["replay loss"] = loss_replay.detach().item()

                loss.backward()
                self.optim.step()

                train_pbar.set_postfix(postfix)

                # Only add new samples to the buffer (only during first epoch).
                if self.buffer and epoch == 0:
                    self.buffer.add_reservoir({"x": x, "y": y, "t": self.task})

            # Validation loop:
            self.net.eval()
            torch.set_grad_enabled(False)
            val_pbar = tqdm.tqdm(valid_env)
            val_pbar.set_description(f"Validation Epoch {epoch}")
            epoch_val_loss = 0.0
            epoch_val_loss_list: List[float] = []

            for i, (obs, rew) in enumerate(val_pbar):
                obs = obs.to(device=self.device)
                x = obs.x
                logits = self.net(x)

                if rew is None:
                    y_pred = logits.argmax(-1)
                    rew = valid_env.send(y_pred)

                assert rew is not None
                rew = rew.to(device=self.device)
                y = rew.y
                val_loss = F.cross_entropy(logits, y).item()

                epoch_val_loss_list.append(val_loss)
                postfix["validation loss"] = val_loss
                val_pbar.set_postfix(postfix)
            torch.set_grad_enabled(True)
            epoch_val_loss_mean = np.mean(epoch_val_loss_list)

            if epoch_val_loss_mean < best_val_loss:
                best_val_loss = epoch_val_loss_mean
                best_epoch = epoch
            if epoch - best_epoch > self.early_stop_patience:
                print(f"Early stopping at epoch {epoch}.")
                # TODO: Reload the weights from the best epoch.
                break

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        observations = observations.to(device=self.device)
        task_labels = observations.task_labels

        logits = self.net(observations.x)

        if task_labels is not None:
            y_pred = smart_class_prediction(
                logits=logits,
                task_labels=task_labels,
                setting=self.setting,
                train=False,
            )
        else:
            y_pred = logits.argmax(1)
        return self.setting.Actions(y_pred=y_pred)

    def on_task_switch(self, task_id: Optional[int]):
        print(f"Switching from task {self.task} to task {task_id}")
        if self.training:
            self.task = task_id

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser) -> None:
        """Add the command-line arguments for this Method to the given parser.

        Parameters
        ----------
        parser : ArgumentParser
            The ArgumentParser.
        """
        parser.add_argument("--learning_rate", type=float, default=1e-3)
        parser.add_argument("--weight_decay", type=float, default=1e-6)
        parser.add_argument("--buffer_capacity", type=int, default=200)
        parser.add_argument("--max_epochs_per_task", type=int, default=10)
        parser.add_argument("--seed", type=int, default=None, help="Random seed")

    @classmethod
    def from_argparse_args(cls, args: Namespace, dest: str = None):
        """Extract the parsed command-line arguments from the namespace and
        return an instance of class `cls`.

        Parameters
        ----------
        args : Namespace
            The namespace containing all the parsed command-line arguments.
        dest : str, optional
            The , by default None

        Returns
        -------
        cls
            An instance of the class `cls`.
        """
        args = args if not dest else getattr(args, dest)
        return cls(
            learning_rate=args.learning_rate,
            buffer_capacity=args.buffer_capacity,
            max_epochs_per_task=args.max_epochs_per_task,
            weight_decay=args.weight_decay,
            seed=args.seed,
        )

    def get_search_space(self, setting: ClassIncrementalSetting) -> Dict:
        return {
            "learning_rate": "loguniform(1e-4, 5e-1, default_value=1e-3)",
            "buffer_capacity": "uniform(1000, 100_000, default_value=10_000, discrete=True)",
            "weight_decay": "loguniform(1e-12, 1e-3, default_value=1e-6)",
            "early_stop_patience": "uniform(0, 2, default_value=1, discrete=True)",
        }

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        NOTE: It is very strongly recommended that you always re-create your model and
        any modules / components that depend on these hyper-parameters inside the
        `configure` method! (Otherwise these new hyper-parameters will not be used in
        the next run)

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        # Here we overwrite the corresponding attributes with the new suggested values
        # leaving other fields unchanged.
        # NOTE: These new hyper-paramers will be used in the next run in the sweep,
        # since each call to `configure` will create a new Model.
        self.learning_rate = new_hparams["learning_rate"]
        self.weight_decay = new_hparams["weight_decay"]
        self.buffer_capacity = new_hparams["buffer_capacity"]

    def setup_wandb(self, run: Run) -> None:
        """Called by the Setting when using Weights & Biases, after `wandb.init`.

        This method is here to provide Methods with the opportunity to log some of their
        configuration options or hyper-parameters to wandb.

        NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
        this point.

        Parameters
        ----------
        run : wandb.Run
            Current wandb Run.
        """
        run.config.update(
            dict(
                learning_rate=self.learning_rate,
                weight_decay=self.weight_decay,
                buffer_capacity=self.buffer_capacity,
                epochs_per_task=self.epochs_per_task,
                seed=self.seed,
            )
        )


class Buffer(nn.Module):
    def __init__(
        self,
        capacity: int,
        input_shape: Tuple[int, ...],
        extra_buffers: Dict[str, Type[torch.Tensor]] = None,
        rng: np.random.RandomState = None,
    ):
        super().__init__()
        self.rng = rng or np.random.RandomState()

        bx = torch.zeros([capacity, *input_shape], dtype=torch.float)
        by = torch.zeros([capacity], dtype=torch.long)

        self.register_buffer("bx", bx)
        self.register_buffer("by", by)
        self.buffers = ["bx", "by"]

        extra_buffers = extra_buffers or {}
        for name, dtype in extra_buffers.items():
            tmp = dtype(capacity).fill_(0)
            self.register_buffer(f"b{name}", tmp)
            self.buffers += [f"b{name}"]

        self.current_index = 0
        self.n_seen_so_far = 0
        self.is_full = 0
        # (@lebrice) args isn't defined here:
        # self.to_one_hot  = lambda x : x.new(x.size(0), args.n_classes).fill_(0).scatter_(1, x.unsqueeze(1), 1)
        self.arange_like = lambda x: torch.arange(x.size(0)).to(x.device)
        self.shuffle = lambda x: x[torch.randperm(x.size(0))]

    @property
    def x(self):
        return self.bx[: self.current_index]

    @property
    def y(self):
        raise NotImplementedError("Can't make y one-hot, dont have n_classes.")
        return self.to_one_hot(self.by[: self.current_index])

    def add_reservoir(self, batch: Dict[str, Tensor]) -> None:
        n_elem = batch["x"].size(0)

        # add whatever still fits in the buffer
        place_left = max(0, self.bx.size(0) - self.current_index)

        if place_left:
            offset = min(place_left, n_elem)

            for name, data in batch.items():
                buffer = getattr(self, f"b{name}")
                if isinstance(data, Iterable):
                    buffer[self.current_index : self.current_index + offset].data.copy_(
                        data[:offset]
                    )
                else:
                    buffer[self.current_index : self.current_index + offset].fill_(data)

            self.current_index += offset
            self.n_seen_so_far += offset

            # everything was added
            if offset == batch["x"].size(0):
                return

        x = batch["x"]
        self.place_left = False

        indices = (
            torch.FloatTensor(x.size(0) - place_left)
            .to(x.device)
            .uniform_(0, self.n_seen_so_far)
            .long()
        )
        valid_indices: Tensor = (indices < self.bx.size(0)).long()

        idx_new_data = valid_indices.nonzero(as_tuple=False).squeeze(-1)
        idx_buffer = indices[idx_new_data]

        self.n_seen_so_far += x.size(0)

        if idx_buffer.numel() == 0:
            return

        # perform overwrite op
        for name, data in batch.items():
            buffer = getattr(self, f"b{name}")
            if isinstance(data, Iterable):
                data = data[place_left:]
                buffer[idx_buffer] = data[idx_new_data]
            else:
                buffer[idx_buffer] = data

    def sample(self, n_samples: int, exclude_task: int = None) -> Dict[str, Tensor]:
        buffers = {}
        if exclude_task is not None:
            assert hasattr(self, "bt")
            valid_indices = (self.bt != exclude_task).nonzero().squeeze()
            for buffer_name in self.buffers:
                buffers[buffer_name] = getattr(self, buffer_name)[valid_indices]
        else:
            for buffer_name in self.buffers:
                buffers[buffer_name] = getattr(self, buffer_name)[: self.current_index]

        bx = buffers["bx"]
        if bx.size(0) < n_samples:
            return buffers
        else:
            indices_np = self.rng.choice(bx.size(0), n_samples, replace=False)
            indices = torch.from_numpy(indices_np).to(self.bx.device)
            return {k[1:]: v[indices] for (k, v) in buffers.items()}


if __name__ == "__main__":
    ExperienceReplayMethod.main()


================================================
FILE: sequoia/methods/experience_replay_test.py
================================================
from typing import ClassVar, Dict, Type

import pytest

from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.method_test import MethodTests
from sequoia.settings.sl import ClassIncrementalSetting, SLSetting

from .experience_replay import ExperienceReplayMethod


class TestExperienceReplay(MethodTests):
    Method: ClassVar[Type[ExperienceReplayMethod]] = ExperienceReplayMethod
    method_debug_kwargs: ClassVar[Dict] = {"buffer_capacity": 100, "max_epochs_per_task": 1}

    @classmethod
    @pytest.fixture
    def method(cls, config: Config) -> ExperienceReplayMethod:
        """Fixture that returns the Method instance to use when testing/debugging."""
        return cls.Method(**cls.method_debug_kwargs)

    def validate_results(
        self,
        setting: SLSetting,
        method: ExperienceReplayMethod,
        results: SLSetting.Results,
    ) -> None:
        assert results
        assert results.objective

    @slow
    @pytest.mark.timeout(300)
    def test_class_incremental_mnist(self, config: Config):
        method = ExperienceReplayMethod(buffer_capacity=200, max_epochs_per_task=1)
        setting = ClassIncrementalSetting(
            dataset="mnist",
            monitor_training_performance=True,
        )
        results = setting.apply(method, config=config)
        assert 0.90 <= results.average_online_performance.objective

        assert 0.70 <= results.final_performance_metrics[0].objective
        assert 0.70 <= results.final_performance_metrics[1].objective
        assert 0.70 <= results.final_performance_metrics[2].objective
        assert 0.70 <= results.final_performance_metrics[3].objective
        assert 0.70 <= results.final_performance_metrics[4].objective

        assert 0.80 <= results.average_final_performance.objective


================================================
FILE: sequoia/methods/hat.py
================================================
""" Hard Attention to the Task

```
@inproceedings{serra2018overcoming,
    title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
    author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
    booktitle={International Conference on Machine Learning},
    pages={4548--4557},
    year={2018}
}
```
"""

from argparse import Namespace
from dataclasses import dataclass
from typing import Any, Dict, Mapping, NamedTuple, Optional, Tuple, Union

import gym
import numpy as np
import torch
import tqdm
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor
from wandb.wandb_run import Run

from sequoia.common import Config
from sequoia.common.hparams import HyperParameters, categorical, log_uniform, uniform
from sequoia.common.spaces import Image
from sequoia.methods import register_method
from sequoia.settings import Environment, Method, Setting
from sequoia.settings.sl import TaskIncrementalSLSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards


class Masks(NamedTuple):
    """Named tuple for the masked tensors created in the HATNet."""

    gc1: Tensor
    gc2: Tensor
    gc3: Tensor
    gfc1: Tensor
    gfc2: Tensor


class HatNet(torch.nn.Module):
    """
    @inproceedings{serra2018overcoming,
      title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
      author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
      booktitle={International Conference on Machine Learning},
      pages={4548--4557},
      year={2018}
    }

    The model is where the model weights are initialized.
    Just like a classic PyTorch, here the different layers and components of the model
    are defined.
    """

    def __init__(self, image_space: Image, n_classes_per_task: Dict[int, int], s_hat: int = 50):
        super().__init__()

        ncha = image_space.channels
        size = image_space.width
        self.n_classes_per_task = n_classes_per_task
        self.s_hat = s_hat

        self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8)
        s = compute_conv_output_size(size, size // 8)
        s //= 2
        self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10)
        s = compute_conv_output_size(s, size // 10)
        s //= 2
        self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2)
        s = compute_conv_output_size(s, 2)
        s //= 2
        self.smid = s
        self.maxpool = torch.nn.MaxPool2d(2)
        self.relu = torch.nn.ReLU()

        self.drop1 = torch.nn.Dropout(0.2)
        self.drop2 = torch.nn.Dropout(0.5)
        self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, 2048)
        self.fc2 = torch.nn.Linear(2048, 2048)
        self.output_layers = torch.nn.ModuleList()

        n_tasks = len(self.n_classes_per_task)
        # TODO: (@lebrice) Here I'm 'fixing' this, by making it so each output head has
        # as many outputs as there are classes in total. It's not super efficient, but
        # it should work.
        total_classes = sum(self.n_classes_per_task.values())
        for task_index, n_classes_in_task in self.n_classes_per_task.items():
            self.output_layers.append(torch.nn.Linear(2048, total_classes))

        self.gate = torch.nn.Sigmoid()
        # All embedding stuff should start with 'e'
        self.ec1 = torch.nn.Embedding(n_tasks, 64)
        self.ec2 = torch.nn.Embedding(n_tasks, 128)
        self.ec3 = torch.nn.Embedding(n_tasks, 256)
        self.efc1 = torch.nn.Embedding(n_tasks, 2048)
        self.efc2 = torch.nn.Embedding(n_tasks, 2048)

        self.flatten = torch.nn.Flatten()

        self.loss = torch.nn.CrossEntropyLoss()
        self.current_task: Optional[int] = 0

    def forward(self, observations: TaskIncrementalSLSetting.Observations) -> Tuple[Tensor, Masks]:
        observations.as_list_of_tuples()
        x = observations.x
        t = observations.task_labels
        # BUG: This won't work if task_labels is None (which is the case at
        # test-time in the ClassIncrementalSetting)
        masks = self.mask(t, s_hat=self.s_hat)
        gc1, gc2, gc3, gfc1, gfc2 = masks
        # Gated
        h = self.maxpool(self.drop1(self.relu(self.c1(x))))
        h = h * gc1.unsqueeze(2).unsqueeze(3)
        h = self.maxpool(self.drop1(self.relu(self.c2(h))))
        h = h * gc2.unsqueeze(2).unsqueeze(3)
        h = self.maxpool(self.drop2(self.relu(self.c3(h))))
        h = h * gc3.unsqueeze(2).unsqueeze(3)
        h = self.flatten(h)
        h = self.drop2(self.relu(self.fc1(h)))
        h = h * gfc1.expand_as(h)
        h = self.drop2(self.relu(self.fc2(h)))
        h = h * gfc2.expand_as(h)

        # Each batch can have elements of more than one Task (in test)
        # In Task Incremental Learning, each task have it own classification head.
        y: Optional[Tensor] = None
        task_masks = {}
        for task_id in set(t.tolist()):
            task_mask = t == task_id
            task_masks[task_id] = task_mask

            y_pred_t = self.output_layers[task_id](h.clone())
            if y is None:
                y = y_pred_t
            else:
                y[task_mask] = y_pred_t[task_mask]
        assert y is not None
        return y, masks

    def mask(self, t: Tensor, s_hat: float) -> Masks:
        gc1 = self.gate(s_hat * self.ec1(t))
        gc2 = self.gate(s_hat * self.ec2(t))
        gc3 = self.gate(s_hat * self.ec3(t))
        gfc1 = self.gate(s_hat * self.efc1(t))
        gfc2 = self.gate(s_hat * self.efc2(t))
        return Masks(gc1, gc2, gc3, gfc1, gfc2)

    def shared_step(
        self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
    ) -> Tuple[Tensor, Dict]:
        """Shared step used for both training and validation.

        Parameters
        ----------
        batch : Tuple[Observations, Optional[Rewards]]
            Batch containing Observations, and optional Rewards. When the Rewards are
            None, it means that we'll need to provide the Environment with actions
            before we can get the Rewards (e.g. image labels) back.

            This happens for example when being applied in a Setting which cares about
            sample efficiency or training performance, for example.

        environment : Environment
            The environment we're currently interacting with. Used to provide the
            rewards when they aren't already part of the batch, for example when our
            performance is being monitored during training.

        Returns
        -------
        Tuple[Tensor, Dict]
            The Loss tensor, and a dict of metrics to be logged.
        """
        # Since we're training on a Passive environment, we will get both observations
        # and rewards, unless we're being evaluated based on our training performance,
        # in which case we will need to send actions to the environments before we can
        # get the corresponding rewards (image labels) back.
        observations: Observations = batch[0]
        rewards: Optional[Rewards] = batch[1]

        # Get the predictions:
        logits, _ = self(observations)
        y_pred = logits.argmax(-1)

        if rewards is None:
            # If the rewards in the batch were None, it means we're expected to give
            # actions before we can get rewards back from the environment.
            # This happens when the Setting is monitoring our training performance.
            rewards = environment.send(Actions(y_pred))

        assert rewards is not None
        image_labels = rewards.y

        loss = self.loss(logits, image_labels)

        accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
        metrics_dict = {"accuracy": accuracy}
        return loss, metrics_dict


def compute_conv_output_size(
    Lin: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1
) -> int:
    return int(np.floor((Lin + 2 * padding - dilation * (kernel_size - 1) - 1) / float(stride) + 1))


@register_method
class HatMethod(Method, target_setting=TaskIncrementalSLSetting):
    """Hard Attention to the Task

    ```
    @inproceedings{serra2018overcoming,
        title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
        author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
        booktitle={International Conference on Machine Learning},
        pages={4548--4557},
        year={2018}
    }
    ```
    """

    @dataclass
    class HParams(HyperParameters):
        """Hyper-parameters of the Settings."""

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
        # Batch size
        batch_size: int = categorical(16, 32, 64, 128, default=128)
        # weight/importance of the task embedding to the gate function
        s_hat: float = uniform(1.0, 100.0, default=50.0)
        # Maximum number of training epochs per task
        max_epochs_per_task: int = uniform(1, 20, default=10, discrete=True)

    def __init__(self, hparams: HParams = None):
        self.hparams: HatMethod.HParams = hparams or self.HParams()
        self.early_stopping_patience = 2
        # We will create those when `configure` will be called, before training.
        self.model: HatNet
        self.optimizer: torch.optim.Optimizer

    def configure(self, setting: TaskIncrementalSLSetting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        setting.batch_size = self.hparams.batch_size
        assert (
            setting.increment == setting.test_increment
        ), "Assuming same number of classes per task for training and testing."
        n_classes_per_task = {
            i: setting.num_classes_in_task(i, train=True) for i in range(setting.nb_tasks)
        }
        image_space: Image = setting.observation_space["x"]
        self.model = HatNet(
            image_space=image_space,
            n_classes_per_task=n_classes_per_task,
            s_hat=self.hparams.s_hat,
        )
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=self.hparams.learning_rate,
        )

    def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
        """
        Train loop

        Different Settings can return elements from tasks in an other  way,
        be it class incremental, task incremental, etc.

        Batch can have information about en environment, rewards, input, task labels, etc.
        And we call the forward training function of our method, independent of the settings
        """

        # configure() will have been called by the setting before we get here,

        best_val_loss = inf
        best_epoch = 0
        for epoch in range(self.hparams.max_epochs_per_task):
            self.model.train()
            print(f"Starting epoch {epoch}")
            # Training loop:
            with tqdm.tqdm(train_env) as train_pbar:
                postfix = {}
                train_pbar.set_description(f"Training Epoch {epoch}")
                for i, batch in enumerate(train_pbar):
                    loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=train_env,
                    )
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    postfix.update(metrics_dict)
                    train_pbar.set_postfix(postfix)

            # Validation loop:
            self.model.eval()
            torch.set_grad_enabled(False)
            with tqdm.tqdm(valid_env) as val_pbar:
                postfix = {}
                val_pbar.set_description(f"Validation Epoch {epoch}")
                epoch_val_loss = 0.0

                for i, batch in enumerate(val_pbar):
                    batch_val_loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=valid_env,
                    )
                    epoch_val_loss += batch_val_loss
                    postfix.update(metrics_dict, val_loss=epoch_val_loss)
                    val_pbar.set_postfix(postfix)
            torch.set_grad_enabled(True)

            if epoch_val_loss < best_val_loss:
                best_val_loss = epoch_val_loss
                best_epoch = i
            elif epoch - best_epoch > self.early_stopping_patience:
                print(f"Early stopping at epoch {epoch}")
                break

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        """Get a batch of predictions (aka actions) for these observations."""
        with torch.no_grad():
            logits, _ = self.model(observations)
        # Get the predicted classes
        y_pred = logits.argmax(dim=-1)
        return self.target_setting.Actions(y_pred)

    def on_task_switch(self, task_id: Optional[int]):
        # This method gets called if task boundaries are known in the current
        # setting. Furthermore, if task labels are available, task_id will be
        # the index of the new task. If not, task_id will be None.
        # TODO: Does this method actually work when task_id is None?
        self.model.current_task = task_id

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser) -> None:
        parser.add_arguments(cls.HParams, dest="hparams")
        # You can also add arguments as usual:
        # parser.add_argument("--foo", default=123)

    @classmethod
    def from_argparse_args(cls, args: Namespace) -> "HatMethod":
        hparams: HatMethod.HParams = args.hparams
        # foo: int = args.foo
        method = cls(hparams=hparams)
        return method

    def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        return self.hparams.get_orion_space()

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        # Here we overwrite the corresponding attributes with the new suggested values
        # leaving other fields unchanged.
        # NOTE: These new hyper-paramers will be used in the next run in the sweep,
        # since each call to `configure` will create a new Model.
        self.hparams = self.hparams.replace(**new_hparams)

    def setup_wandb(self, run: Run) -> None:
        """Called by the Setting when using Weights & Biases, after `wandb.init`.

        This method is here to provide Methods with the opportunity to log some of their
        configuration options or hyper-parameters to wandb.

        NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
        this point.

        Parameters
        ----------
        run : wandb.Run
            Current wandb Run.
        """
        run.config["hparams"] = self.hparams.to_dict()


if __name__ == "__main__":
    # Example: Evaluate a Method on a single CL setting:
    parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)

    """
    We must define 3 main components:
     1.- Setting: It is the continual learning scenario that we are working, SL or RL, TI or CI
                  Each settings has it own parameters that can be customized.
     2.- Model: Is the parameters and layers of the model, just like in PyTorch.
                We can use a predefined model or create your own
     3.- Method: It is how we are going to use what the settings give us to train our model.
                 Same as before, we can define our own or use pre-defined Methods.
    """
    # Add arguments for the Method, the Setting, and the Config.
    # (Config contains options like the log_dir, the data_dir, etc.)
    HatMethod.add_argparse_args(parser, dest="method")
    parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
    parser.add_arguments(Config, "config")

    args = parser.parse_args()

    # Create the Method from the args, and extract the Setting, and the Config:
    method: HatMethod = HatMethod.from_argparse_args(args, dest="method")
    setting: TaskIncrementalSLSetting = args.setting
    config: Config = args.config

    # Apply the method to the setting, optionally passing in a Config,
    # producing Results.
    results = setting.apply(method, config=config)
    print(results.summary())
    print(f"objective: {results.objective}")


================================================
FILE: sequoia/methods/method_test.py
================================================
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import ClassVar, Dict, Type, TypeVar

import pytest

from sequoia.common.config import Config
from sequoia.conftest import config, session_config
from sequoia.settings import RLSetting, Setting, SLSetting
from sequoia.settings.base import Method
from sequoia.settings.sl.continual.setting import random_subset


def key_fn(setting_class: Type[Setting]):
    # order tests in terms of their 'depth' in the tree, and break ties arbitrarily
    # based on the name.
    return (len(setting_class.parents()), setting_class.__name__)


def make_setting_type_fixture(method_type: Type[Method]) -> pytest.fixture:
    """Create a parametrized fixture that will go through all the applicable settings
    for a given method.
    """

    def setting_type(self, request):
        setting_type = request.param
        return setting_type

    setting_types = set(method_type.get_applicable_settings())
    settings_to_remove = set([Setting, SLSetting, RLSetting])
    # NOTE: Need to make a deterministic ordering of settings, otherwise we can't
    # parallelize tests with pytest-xdist
    setting_types = sorted(list(setting_types - settings_to_remove), key=key_fn)
    return pytest.fixture(
        params=setting_types,
        scope="module",
    )(setting_type)


MethodType = TypeVar("MethodType", bound=Method)


class MethodTests(ABC):
    """Base class that can be extended to generate tests for a method.

    The main test of interest is `test_debug`.
    """

    Method: ClassVar[Type[MethodType]]
    setting_type: pytest.fixture
    # Kwargs to pass when contructing the Settings.
    setting_kwargs: ClassVar[Dict] = {}
    method_debug_kwargs: ClassVar[Dict] = {}

    def __init_subclass__(cls, method: Type[MethodType] = None):
        """Dynamically generates a `setting_type` fixture on the subclass, which will
        be parametrized by the settings that the Method is applicable to.
        """
        super().__init_subclass__()
        if not method and not hasattr(cls, "Method"):
            raise RuntimeError(
                "Need to either pass `method` when subclassing or set "
                "a 'Method' class attribute."
            )
        cls.Method = cls.Method or method
        cls.setting_type: pytest.fixture = make_setting_type_fixture(cls.Method)

    @classmethod
    @abstractmethod
    @pytest.fixture
    def method(cls, config: Config) -> MethodType:
        """Fixture that returns the Method instance to use when testing/debugging.

        Needs to be implemented when creating a new test class (to generate tests for a
        new method).
        """
        return cls.Method(**cls.method_debug_kwargs)

    @abstractmethod
    def validate_results(
        self,
        setting: Setting,
        method: MethodType,
        results: Setting.Results,
    ) -> None:
        assert results
        assert results.objective
        assert results.objective is not None
        print(results.summary())

    # NOTE: Need to re-define these here, just so external packages, which maybe aren't
    # in the "scope" of `sequoia/conftest.py` can also use them:
    # Dropping the `self` argument by making those static methods on the class.
    session_config: pytest.fixture = staticmethod(session_config)
    config: pytest.fixture = staticmethod(config)

    @pytest.fixture(scope="module")
    def setting(self, setting_type: Type[Setting], session_config: Config):
        # TODO: Fix this test setup, nb_tasks should be something low like 2, and
        # perhaps use max_episode_steps to limit episode length
        if issubclass(setting_type, SLSetting):
            setting_kwargs = dict(
                nb_tasks=5,
                config=session_config,
            )
            setting_kwargs.setdefault("monitor_training_performance", True)
            # TODO: Do we also want to parameterize the dataset? or is it too much?
            setting_kwargs.update(self.setting_kwargs)
            setting = setting_type(
                **setting_kwargs,
            )
            assert setting.dataset, setting_kwargs
            setting.config = session_config
            setting.batch_size = 10
            setting.prepare_data()
            setting.setup()
            nb_tasks = 5
            samples_per_task = 50
            # Testing this out: Shortening the train datasets:
            setting.train_datasets = [
                random_subset(task_dataset, samples_per_task)
                for task_dataset in setting.train_datasets
            ]
            setting.val_datasets = [
                random_subset(task_dataset, samples_per_task)
                for task_dataset in setting.val_datasets
            ]
            setting.test_datasets = [
                random_subset(task_dataset, samples_per_task)
                for task_dataset in setting.test_datasets
            ]
            assert len(setting.train_datasets) == nb_tasks
            assert len(setting.val_datasets) == nb_tasks
            assert len(setting.test_datasets) == nb_tasks
            assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
            assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
            assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)

            # Assert that calling setup doesn't overwrite the datasets.
            setting.setup()
            assert len(setting.train_datasets) == nb_tasks
            assert len(setting.val_datasets) == nb_tasks
            assert len(setting.test_datasets) == nb_tasks
            assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
            assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
            assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)
        else:
            # RL setting:
            setting_kwargs = dict(
                nb_tasks=2,
                train_max_steps=1_000,
                test_max_steps=1_000,
                # train_steps_per_task=2_000,
                # test_steps_per_task=1_000,
                config=session_config,
            )
            # TODO: Do we also want to parameterize the dataset? or is it too much?
            setting_kwargs.update(self.setting_kwargs)
            setting = setting_type(
                **setting_kwargs,
            )

        yield setting

    def test_debug(self, method: MethodType, setting: Setting, config: Config):
        """Apply the Method onto a setting, and validate the results."""
        results: Setting.Results = setting.apply(method, config=config)
        self.validate_results(setting=setting, method=method, results=results)


@dataclass
class NewSetting(Setting):
    pass


@dataclass
class NewMethod(Method, target_setting=NewSetting):
    def fit(self, train_env, valid_env):
        pass

    def get_actions(self, observations, action_space):
        return action_space.sample()


def test_passing_arg_to_class_constructor_works():
    assert NewMethod.target_setting is NewSetting
    assert NewMethod().target_setting is NewSetting


@pytest.mark.xfail(reason="Not sure this is necessary.")
def test_cant_change_target_setting():
    with pytest.raises(AttributeError):
        NewMethod.target_setting = NewSetting
    with pytest.raises(AttributeError):
        NewMethod().target_setting = NewSetting


def test_target_setting_is_inherited():
    @dataclass
    class NewMethod2(NewMethod):
        pass

    assert NewMethod2.target_setting is NewSetting


@dataclass
class SettingA(Setting):
    pass


@dataclass
class SettingA1(SettingA):
    pass


@dataclass
class SettingA2(SettingA):
    pass


@dataclass
class SettingB(Setting):
    pass


class MethodA(Method, target_setting=SettingA):
    def fit(self, train_env, valid_env):
        pass

    def get_actions(self, observations, action_space):
        return action_space.sample()


class MethodB(Method, target_setting=SettingB):
    def fit(self, train_env, valid_env):
        pass

    def get_actions(self, observations, action_space):
        return action_space.sample()


class CoolGeneralMethod(Method, target_setting=Setting):
    def fit(self, train_env, valid_env):
        pass

    def get_actions(self, observations, action_space):
        return action_space.sample()


def test_method_is_applicable_to_setting():
    """Test the mechanism for determining if a method is applicable for a given
    setting.

    Uses the mock hierarchy created above:
    - Setting
        - SettingA
            - SettingA1
            - SettingA2
        - SettingB

    - Method
        - MethodA (target_setting: SettingA)
        - MethodB (target_setting: SettingA)

    TODO: if we ever end up registering the method classes when declaring them,
    then we will need to check that this dummy test hierarchy doesn't actually
    show up in the real setting options.
    """
    # A Method designed for `SettingA` ISN'T applicable on the root node
    # `Setting`:
    assert not MethodA.is_applicable(Setting)

    # A Method designed for `SettingA` IS applicable on the target node, and all
    # nodes below it in the tree:
    assert MethodA.is_applicable(SettingA)
    assert MethodA.is_applicable(SettingA1)
    assert MethodA.is_applicable(SettingA2)
    # A Method designed for `SettingA` ISN'T applicable on some other branch in
    # the tree:
    assert not MethodA.is_applicable(SettingB)

    # Same for Method designed for `SettingB`
    assert MethodB.is_applicable(SettingB)
    assert not MethodB.is_applicable(Setting)
    assert not MethodB.is_applicable(SettingA)
    assert not MethodB.is_applicable(SettingA1)
    assert not MethodB.is_applicable(SettingA2)


def test_is_applicable_also_works_on_instances():
    assert MethodA().is_applicable(SettingA)
    assert MethodA.is_applicable(SettingA())
    assert MethodA().is_applicable(SettingA())

    assert not MethodA().is_applicable(SettingB)
    assert not MethodA.is_applicable(SettingB())
    assert not MethodA().is_applicable(SettingB())


================================================
FILE: sequoia/methods/models/__init__.py
================================================
# from .actor_critic_agent import ActorCritic
# from .agent import Agent
from .base_model import BaseModel, Model, available_encoders, available_optimizers
from .forward_pass import ForwardPass
from .output_heads import ClassificationHead, OutputHead, RegressionHead


================================================
FILE: sequoia/methods/models/base_model/__init__.py
================================================
""" This module defines the `BaseModel` used by the `BaseMethod`.

Output heads are available for both Supervised and Reinforcement Learning, and can be
found in `sequoia.methods.models.output_heads`.

Instead of defining the `Model` in one large file, it is instead split into a base
class (`Model`, defined in `model.py`) on top of which a few "mixins" are added, each
of which adds additional functionality:

- [SemiSupervisedModel](self_supervised_model.py):
    Adds support for semi-supervised (partially labeled or un-labeled) training, by
    splitting up partially labeled batches into a fully labeled sub-batch and a fully
    unlabeled sub-batch.

- [MultiHeadModel](multihead_model.py):
    Adds support for:
    - multi-head prediction: Using a dedicated output head for each task when
      task labels are available
    - Mixed batches (data coming from more than one task within the same batch)
    - TODO: Task inference: When task labels aren't available, perform
      some task inference in order to choose which output head to use.

- [SelfSupervisedModel](self_supervised_model.py):
    Adds methods for adding self-supervised losses to the model using different
    Auxiliary Tasks.
    
The `BaseModel` is then formed by inheriting from each of these mixins.
"""
from .base_model import BaseModel

# TODO: Maybe the naming of these could be a bit better: Model seems more 'general' than BaseModel.
from .model import Model, available_encoders, available_optimizers
from .multihead_model import MultiHeadModel
from .self_supervised_model import SelfSupervisedModel
from .semi_supervised_model import SemiSupervisedModel


================================================
FILE: sequoia/methods/models/base_model/base_model.py
================================================
""" Example/Template of a Model to be used as part of a Method.

You can use this as a base class when creating your own models, or you can
start from scratch, whatever you like best.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Generic, Optional, Tuple, Type, TypeVar

import numpy as np
import torch
from simple_parsing import choice, mutable_field
from torch import Tensor, nn, optim
from torch.optim.optimizer import Optimizer
from torchvision import models as tv_models

from sequoia.common.config import Config
from sequoia.common.hparams import categorical, log_uniform
from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask
from sequoia.methods.models.output_heads import OutputHead, PolicyHead
from sequoia.methods.models.simple_convnet import SimpleConvNet
from sequoia.settings import Environment, Observations, Rewards, Setting
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.utils.logging_utils import get_logger

from .model import ForwardPass
from .multihead_model import MultiHeadModel
from .self_supervised_model import SelfSupervisedModel
from .semi_supervised_model import SemiSupervisedModel

torch.autograd.set_detect_anomaly(True)

logger = get_logger(__name__)
SettingType = TypeVar("SettingType", bound=IncrementalAssumption)


class BaseModel(SemiSupervisedModel, MultiHeadModel, SelfSupervisedModel, Generic[SettingType]):
    """Base model LightningModule (nn.Module extended by pytorch-lightning)

    This model splits the learning task into a representation-learning problem
    and a downstream task (output head) applied on top of it.

    The most important method to understand is the `get_loss` method, which
    is used by the [train/val/test]_step methods which are called by
    pytorch-lightning.
    """

    @dataclass
    class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams, MultiHeadModel.HParams):
        """HParams of the Model."""

        # NOTE: All the fields below were just copied from the BaseHParams class, just
        # to improve visibility a bit.

        # Class variables that hold the available optimizers and encoders.
        # NOTE: These don't get parsed from the command-line.
        available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = {
            "sgd": optim.SGD,
            "adam": optim.Adam,
            "rmsprop": optim.RMSprop,
        }

        # Which optimizer to use.
        optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam)

        available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = {
            "vgg16": tv_models.vgg16,
            "resnet18": tv_models.resnet18,
            "resnet34": tv_models.resnet34,
            "resnet50": tv_models.resnet50,
            "resnet101": tv_models.resnet101,
            "resnet152": tv_models.resnet152,
            "alexnet": tv_models.alexnet,
            "densenet": tv_models.densenet161,
            # TODO: Add the self-supervised pl modules here!
            "simple_convnet": SimpleConvNet,
        }
        # Which encoder to use.
        encoder: Type[nn.Module] = choice(
            available_encoders,
            default=SimpleConvNet,
            # # TODO: Only considering these two for now when performing an HPO sweep.
            # probabilities={"resnet18": 0., "simple_convnet": 1.0},
        )

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
        # L2 regularization term for the model weights.
        weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)

        # Batch size to use during training and evaluation.
        batch_size: Optional[int] = None

        # Number of hidden units (before the output head).
        # When left to None (default), the hidden size from the pretrained
        # encoder model will be used. When set to an integer value, an
        # additional Linear layer will be placed between the outputs of the
        # encoder in order to map from the encoder's output size H_e
        # to this new hidden size `new_hidden_size`.
        new_hidden_size: Optional[int] = None
        # Retrain the encoder from scratch or start from pretrained weights.
        train_from_scratch: bool = False
        # Wether we should keep the weights of the encoder frozen.
        freeze_pretrained_encoder_weights: bool = False

        # Hyper-parameters of the output head.
        output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

        # Wether the output head should be detached from the representations.
        # In other words, if the gradients from the downstream task should be
        # allowed to affect the representations.
        detach_output_head: bool = False

    def __init__(self, setting: SettingType, hparams: HParams, config: Config):
        super().__init__(setting=setting, hparams=hparams, config=config)

        self.save_hyperparameters({"hparams": self.hp.to_dict(), "config": self.config.to_dict()})

        logger.debug(f"setting of type {type(self.setting)}")
        logger.debug(f"Observation space: {self.observation_space}")
        logger.debug(f"Action/Output space: {self.action_space}")
        logger.debug(f"Reward/Label space: {self.reward_space}")

        if self.config.debug and self.config.verbose:
            logger.debug("Config:")
            logger.debug(self.config.dumps(indent="\t"))
            logger.debug("Hparams:")
            logger.debug(self.hp.dumps(indent="\t"))

        for task_name, task in self.tasks.items():
            logger.debug("Auxiliary tasks:")
            assert isinstance(
                task, AuxiliaryTask
            ), f"Task {task} should be a subclass of {AuxiliaryTask}."
            if task.coefficient != 0:
                logger.debug(f"\t {task_name}: {task.coefficient}")
                logger.info(
                    f"Enabling the '{task_name}' auxiliary task (coefficient of "
                    f"{task.coefficient})"
                )
                task.enable()
        from pytorch_lightning.loggers import WandbLogger

        self.logger: WandbLogger

    def on_fit_start(self):
        super().on_fit_start()
        # NOTE: We could use this to log stuff to wandb.
        # NOTE: The Setting already logs itself in the `wandb.config` dict.

    def forward(self, observations: Setting.Observations) -> ForwardPass:  # type: ignore
        """Forward pass of the model.

        For the given observations, creates a `ForwardPass`, a dict-like object which
        will hold the observations, the representations and the output head predictions.

        NOTE: Base implementation is in `model.py`.

        Parameters
        ----------
        observations : Setting.Observations
            Observations from one of the environments of a Setting.

        Returns
        -------
        ForwardPass
            A dict-like object which holds the observations, representations, and output
            head predictions (actions). See the `ForwardPass` class for more info.
        """
        # The observations should come from a batched environment. If they are not, we
        # add a batch dimension, which we will then remove.
        assert isinstance(observations.x, (Tensor, np.ndarray))
        # Check if the observations are batched or not.
        not_batched = not self._are_batched(observations)
        if not_batched:
            observations = observations.with_batch_dimension()

        forward_pass = super().forward(observations)
        # Simplified this for now, but we could add more flexibility later.
        assert isinstance(forward_pass, ForwardPass)

        # If the original observations didn't have a batch dimension,
        # Remove the batch dimension from the results.
        if not_batched:
            forward_pass = forward_pass.remove_batch_dimension()
        return forward_pass

    def create_output_head(self, task_id: Optional[int]) -> OutputHead:
        """Create an output head for the current action and reward spaces.

        NOTE: This assumes that the input, action and reward spaces don't change
        between tasks.

        Parameters
        ----------
        task_id : Optional[int]
            ID of the task associated with this new output head. Can be `None`, which is
            interpreted as saying that either that task labels aren't available, or that
            this output head will be used for all tasks.

        Returns
        -------
        OutputHead
            The new output head for the given task.
        """
        # NOTE: Actual implementation is in `model.py`. This is added here just for
        # convenience when extending the baseline model.
        return super().create_output_head(task_id=task_id)

    def output_head_type(self, setting: SettingType) -> Type[OutputHead]:
        """Return the type of output head we should use in a given setting."""
        # NOTE: Implementation is in `model.py`.
        return super().output_head_type(setting)

    @property
    def automatic_optimization(self) -> bool:
        return not isinstance(self.output_head, PolicyHead)

    def training_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment = None,
        dataloader_idx: int = None,
        optimizer_idx: int = None,
    ) -> ForwardPass:
        return super().training_step(
            batch,
            batch_idx=batch_idx,
            environment=environment or self.setting.train_env,
            dataloader_idx=dataloader_idx,
            optimizer_idx=optimizer_idx,
        )

    def validation_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment = None,
        dataloader_idx: int = None,
    ) -> ForwardPass:
        return super().validation_step(
            batch,
            batch_idx=batch_idx,
            environment=environment or self.setting.val_env,
            dataloader_idx=dataloader_idx,
        )

    def test_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment = None,
        dataloader_idx: int = None,
    ) -> ForwardPass:
        return super().test_step(
            batch,
            batch_idx=batch_idx,
            environment=environment or self.setting.test_env,
            dataloader_idx=dataloader_idx,
        )

    def shared_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment,
        phase: str,
        dataloader_idx: int = None,
        optimizer_idx: int = None,
    ) -> ForwardPass:
        return super().shared_step(
            batch,
            batch_idx=batch_idx,
            environment=environment,
            phase=phase,
            dataloader_idx=dataloader_idx,
            optimizer_idx=optimizer_idx,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching between tasks.

        Args:
            task_id (int, optional): the id of the new task. When None, we are
            basically being informed that there is a task boundary, but without
            knowing what task we're switching to.
        """
        return super().on_task_switch(task_id)


================================================
FILE: sequoia/methods/models/base_model/model.py
================================================
"""Base for the model used by the `BaseMethod`.

This model is basically just an encoder and an output head. Both of these can be
switched out/customized as needed.
"""
import dataclasses
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, Generic, List, Optional, Tuple, Type, TypeVar, Union

import gym
import numpy as np
import torch
import torchvision.models as tv_models
from gym import Space, spaces
from gym.spaces.utils import flatdim
from pytorch_lightning import LightningModule
from simple_parsing import choice, mutable_field
from simple_parsing.helpers.hparams import HyperParameters
from simple_parsing.helpers.serialization import register_decoding_fn
from torch import Tensor, nn, optim
from torch.optim.optimizer import Optimizer  # type: ignore

from sequoia.common.config import Config
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.hparams import HyperParameters, categorical, log_uniform
from sequoia.common.loss import Loss
from sequoia.common.spaces import Image
from sequoia.methods.models.output_heads import OutputHead
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import Environment
from sequoia.settings.base.setting import Actions, Observations, Rewards
from sequoia.settings.rl import ContinualRLSetting, RLSetting
from sequoia.settings.sl import SLSetting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.pretrained_utils import get_pretrained_encoder

from ..fcnet import FCNet
from ..forward_pass import ForwardPass
from ..output_heads import (
    ActorCriticHead,
    ClassificationHead,
    OutputHead,
    PolicyHead,
    RegressionHead,
)
from ..output_heads.rl.episodic_a2c import EpisodicA2C
from ..simple_convnet import SimpleConvNet

logger = get_logger(__name__)
SettingType = TypeVar("SettingType", bound=IncrementalAssumption)

available_optimizers: Dict[str, Type[Optimizer]] = {
    "sgd": optim.SGD,
    "adam": optim.Adam,
    "rmsprop": optim.RMSprop,
}
available_encoders: Dict[str, Type[nn.Module]] = {
    "vgg16": tv_models.vgg16,
    "resnet18": tv_models.resnet18,
    "resnet34": tv_models.resnet34,
    "resnet50": tv_models.resnet50,
    "resnet101": tv_models.resnet101,
    "resnet152": tv_models.resnet152,
    "alexnet": tv_models.alexnet,
    "densenet": tv_models.densenet161,
    # TODO: Add the self-supervised pl modules here!
    "simple_convnet": SimpleConvNet,
}


class Model(LightningModule, Generic[SettingType]):
    """Basic Model to be used by a Method.

    Based on the `LightningModule` (nn.Module extended by pytorch-lightning).
    This Model can be trained on either Supervised or Reinforcement Learning environments.

    This model splits the learning task into a representation-learning problem
    and a downstream task (output head) applied on top of it.

    The most important method to understand is the `get_loss` method, which
    is used by the [train/val/test]_step methods which are called by
    pytorch-lightning.
    """

    @dataclass
    class HParams(HyperParameters):
        """HParams of the Model."""

        # Class variable versions of the above dicts, for easier subclassing.
        # NOTE: These don't get parsed from the command-line.
        available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = available_optimizers.copy()
        available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = available_encoders.copy()

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
        # L2 regularization term for the model weights.
        weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
        # Which optimizer to use.
        optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam)
        # Use an encoder architecture from the torchvision.models package.
        encoder: Type[nn.Module] = categorical(
            available_encoders,
            default=tv_models.resnet18,
            # TODO: Only using these two by default when performing a sweep.
            probabilities={"resnet18": 0.5, "simple_convnet": 0.5},
        )

        # Batch size to use during training and evaluation.
        batch_size: Optional[int] = None

        # Number of hidden units (before the output head).
        # When left to None (default), the hidden size from the pretrained
        # encoder model will be used. When set to an integer value, an
        # additional Linear layer will be placed between the outputs of the
        # encoder in order to map from the pretrained encoder's output size H_e
        # to this new hidden size `new_hidden_size`.
        new_hidden_size: Optional[int] = None
        # Retrain the encoder from scratch.
        train_from_scratch: bool = False
        # Wether we should keep the weights of the pretrained encoder frozen.
        freeze_pretrained_encoder_weights: bool = False

        # Settings for the output head.
        # TODO: This could be overwritten in a subclass to do classification or
        # regression or RL, etc.
        output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

        # Wether the output head should be detached from the representations.
        # In other words, if the gradients from the downstream task should be
        # allowed to affect the representations.
        detach_output_head: bool = False

        # Which algorithm to use for the output head when in an RL setting.
        # TODO: Run the PolicyHead in the following conditions:
        # - Compare the big backward pass vs many small ones
        # - Try to have it learn from pixel input, if possible
        # - Try to have it learn on a multi-task RL setting,
        # TODO: Finish the ActorCritic and EpisodicA2C heads.
        rl_output_head_algo: Type[OutputHead] = choice(
            {
                "reinforce": PolicyHead,
                "a2c_online": ActorCriticHead,
                "a2c_episodic": EpisodicA2C,
            },
            default=EpisodicA2C,
        )

    def __init__(self, setting: SettingType, hparams: HParams, config: Config):
        super().__init__()
        self.setting: SettingType = setting
        self.hp: Model.HParams = hparams

        self.Observations: Type[Observations] = setting.Observations
        self.Actions: Type[Actions] = setting.Actions
        self.Rewards: Type[Rewards] = setting.Rewards

        # Choose what type of output head to use depending on the kind of
        # Setting.
        self.OutputHead: Type[OutputHead] = self.output_head_type(setting)

        self.observation_space: gym.Space = setting.observation_space
        self.action_space: gym.Space = setting.action_space
        self.reward_space: gym.Space = setting.reward_space

        self.input_shape = self.observation_space.x.shape
        self.reward_shape = self.reward_space.shape

        self.config: Config = config
        # NOTE: do NOT set the `datamodule` property, otherwise the trainer will ignore
        # the passed train/val/test dataloader from the Setting.
        # self.datamodule: LightningDataModule = setting

        # (Testing) Setting this attribute is supposed to help with ddp/etc
        # training in pytorch-lightning. Not 100% sure.
        # self.example_input_array = torch.rand(self.batch_size, *self.input_shape)

        # Create the encoder and the output head.
        # Space of our encoder representations.
        self.representation_space: gym.Space
        observing_state = not isinstance(setting.observation_space.x, Image)
        if isinstance(setting, ContinualRLSetting) and observing_state:
            # ISSUE # 62: Need to add a dense network instead of no encoder, and
            # change the PolicyHead to have only one layer.
            # Only pass the image, not the task labels to the encoder (for now).
            input_dims = flatdim(self.observation_space["x"])
            output_dims = self.hp.new_hidden_size or 128

            self.encoder = FCNet(
                in_features=input_dims,
                out_features=output_dims,
                hidden_layers=3,
                hidden_neurons=[256, 128, output_dims],
                activation=nn.ReLU,
            )
            self.representation_space = add_tensor_support(
                spaces.Box(low=-np.inf, high=np.inf, shape=[output_dims])
            )
            self.hidden_size = output_dims
        else:
            self.encoder, self.hidden_size = self.make_encoder()
            # TODO: Check that the outputs of the encoders are actually
            # flattened. I'm not sure they all are, which case the samples
            # wouldn't match with this space.
            self.representation_space = spaces.Box(-np.inf, np.inf, (self.hidden_size,), np.float32)

        logger.info(f"Moving encoder to device {self.config.device}")
        self.encoder = self.encoder.to(self.config.device)

        self.representation_space = add_tensor_support(self.representation_space)

        # Upgrade the type of hparams for the output head based on the setting, if
        # needed.
        if not isinstance(self.hp.output_head, self.OutputHead.HParams):
            self.hp.output_head = self.hp.output_head.upgrade(target_type=self.OutputHead.HParams)
        # Then, create the 'default' output head.
        self.output_head: OutputHead = self.create_output_head(task_id=0)

    def make_encoder(self) -> Tuple[nn.Module, int]:
        """Creates an Encoder model and returns the number of output dimensions.

        Returns:
            Tuple[nn.Module, int]: the encoder and the hidden size.

        TODO: Could instead return its output space, in case we didn't necessarily want
        to flatten the representations (e.g. for image segmentation tasks).
        """
        # Get the chosen type of encoder
        encoder_type: Type[nn.Module] = self.hp.encoder
        # This does a few things:
        # 1. Instantiate the model (with pretrained weights if desired)
        # 2. Infer the output size of the model
        # 3. Remove the output fully-connected layer, if present.
        encoder, hidden_size = get_pretrained_encoder(
            encoder_model=encoder_type,
            pretrained=not self.hp.train_from_scratch,
            freeze_pretrained_weights=self.hp.freeze_pretrained_encoder_weights,
            new_hidden_size=self.hp.new_hidden_size,
        )
        return encoder, hidden_size

    def forward(self, observations: IncrementalAssumption.Observations) -> ForwardPass:
        """Forward pass of the Model.

        Returns a ForwardPass object (acts like a dict of Tensors.)
        """
        # If there's any additional 'input preprocessing' to do, do it here.
        # NOTE (@lebrice): This is currently done this way so that we don't have
        # to pass transforms to the settings from the method side.
        observations = self.preprocess_observations(observations)
        # Encode the observation to get representations.
        assert observations.x.device == self.device

        representations = self.encode(observations)
        # Pass the observations and representations to the output head to get
        # the 'action' (prediction).

        if self.hp.detach_output_head:
            representations = representations.detach()

        actions = self.output_head(observations=observations, representations=representations)
        # NOTE: Need to put a `rewards` field in this forward_pass, so we can pass it
        # to the training_step_end method, which will calculate and aggregate the loss
        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=actions,
            rewards=None,
        )
        return forward_pass

    def encode(self, observations: Observations) -> Tensor:
        """Encodes a batch of samples `x` into a hidden vector.

        Args:
            observations (Union[Tensor, Observation]): Tensor of Observation
            containing a batch of samples (before preprocess_observations).

        Returns:
            Tensor: The hidden vector / embedding for that sample, with size
                [B, `self.hidden_size`].
        """
        # Here in this base model the encoder only takes the 'x' from the
        # observations.
        x = torch.as_tensor(observations.x, device=self.device, dtype=self.dtype)
        assert x.device == self.device
        encoder_parameters = list(self.encoder.parameters())
        encoder_device = encoder_parameters[0].device if encoder_parameters else self.device
        # BUG: WHen using the EWCTask, there seems to be some issues related to which
        # device the model is stored on.

        if encoder_device != self.device:
            x = x.to(encoder_device)
            # self.encoder = self.encoder.to(self.device)

        h_x = self.encoder(x)

        if encoder_device != self.device:
            h_x = h_x.to(self.device)

        if isinstance(h_x, list) and len(h_x) == 1:
            # Some pretrained encoders sometimes give back a list with one tensor. (?)
            h_x = h_x[0]
        if not isinstance(h_x, Tensor):
            h_x = torch.as_tensor(h_x, device=self.device, dtype=self.dtype)
        return h_x

    def create_output_head(self, task_id: Optional[int]) -> OutputHead:
        """Create an output head for the current action and reward spaces.

        NOTE: This assumes that the input, action and reward spaces don't change
        between tasks.

        Parameters
        ----------
        task_id : Optional[int]
            ID of the task associated with this new output head. Can be `None`, which is
            interpreted as saying that either that task labels aren't available, or that
            this output head will be used for all tasks.

        Returns
        -------
        OutputHead
            The new output head for the given task.
        """
        # NOTE: This assumes that the input, action and reward spaces don't change
        # between tasks.
        # TODO: Maybe add something like `setting.get_action_space(task_id)`
        input_space: Space = self.representation_space
        action_space: Space = self.action_space
        reward_space: Space = self.reward_space
        hparams: OutputHead.HParams = self.hp.output_head
        # NOTE: self.OutputHead is the type of output head used for the current setting.
        # NOTE: Could also use a name for the output head using the task id, for example
        output_head_name = None  # Use the name defined on the output head.
        output_head = self.OutputHead(
            input_space=input_space,
            action_space=action_space,
            reward_space=reward_space,
            hparams=hparams,
            name=output_head_name,
        ).to(self.device)

        # Do not add the output head's parameters to the optimizer of the whole model,
        # if it already has an `optimizer` attribute of its own. (NOTE: this isn't the
        # case in practice so far)
        add_to_optimizer = not getattr(output_head, "optimizer", None)
        if add_to_optimizer:
            # Add the new parameters to the Optimizer, if it already exists.
            # If we don't yet have a Trainer, the Optimizer hasn't been created
            # yet. Once it is created though, it will get the parameters of this output
            # head from `self.parameters()` is passed to its constructor, since the
            # output head will be stored in `self.output_heads`.
            if self.trainer:
                optimizer: Optimizer = self.optimizers()
                assert isinstance(optimizer, Optimizer)
                optimizer.add_param_group({"params": output_head.parameters()})

        return output_head

    def output_head_type(self, setting: SettingType) -> Type[OutputHead]:
        """Return the type of output head we should use in a given setting."""
        if isinstance(setting, RLSetting):
            if not isinstance(setting.action_space, spaces.Discrete):
                raise NotImplementedError("Only support discrete actions for now.")
            assert issubclass(self.hp.rl_output_head_algo, OutputHead)
            return self.hp.rl_output_head_algo

        assert isinstance(setting, SLSetting)

        if isinstance(setting.action_space, spaces.Discrete):
            # Discrete actions: i.e. classification problem.
            if isinstance(setting.reward_space, spaces.Discrete):
                # Classification problem: Discrete action, Discrete rewards (labels).
                return ClassificationHead
            # Reinforcement learning problem: Discrete action, float rewards.
            # TODO: There might be some RL environments with discrete
            # rewards, right? For instance CartPole is, on-paper, a discrete
            # reward setting, since its always 1.
        if isinstance(setting.action_space, spaces.Box):
            # Regression problem: For now there is only RL that has such a
            # space.
            return RegressionHead

        raise NotImplementedError(f"Unsupported action space: {setting.action_space}")

    def training_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment = None,
        dataloader_idx: int = None,
        optimizer_idx: int = None,
    ) -> ForwardPass:
        return self.shared_step(
            batch,
            batch_idx=batch_idx,
            environment=environment or self.setting.train_env,
            phase="train",
            dataloader_idx=dataloader_idx,
            optimizer_idx=optimizer_idx,
        )

    def validation_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment = None,
        dataloader_idx: int = None,
    ) -> ForwardPass:
        return self.shared_step(
            batch,
            batch_idx=batch_idx,
            environment=environment or self.setting.val_env,
            phase="val",
            dataloader_idx=dataloader_idx,
        )

    def test_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment = None,
        dataloader_idx: int = None,
    ) -> ForwardPass:
        return self.shared_step(
            batch,
            batch_idx=batch_idx,
            environment=environment or self.setting.test_env,
            phase="test",
            dataloader_idx=dataloader_idx,
        )

    def shared_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment,
        phase: str,
        dataloader_idx: int = None,
        optimizer_idx: int = None,
    ) -> ForwardPass:
        """Main logic of the "forward pass".

        This is used as part of `training_step`, `validation_step` and `test_step`.
        See the PL docs for `training_step` for more info.

        NOTE: The prediction / environment interaction / loss calculation has been
        moved into the `shared_step_end` method for DP to also work.
        """

        # Split the batch into observations and (maybe) rewards.
        observations: Observations
        rewards: Optional[Rewards]
        if isinstance(batch, tuple) and len(batch) == 2:
            observations, rewards = batch
        else:
            assert isinstance(batch, self.Observations), batch
            observations, rewards = batch, None

        # Get the forward pass results, containing:
        # - "observation": the augmented/transformed/processed observation.
        # - "representations": the representations for the observations.
        # - "actions": The actions (predictions)
        forward_pass: ForwardPass = self(observations)
        if rewards is not None:
            forward_pass = dataclasses.replace(forward_pass, rewards=rewards)
        return forward_pass

    def training_step_end(self, step_outputs: Union[Loss, List[Loss]]) -> Loss:
        loss_object: Loss = self.shared_step_end(
            step_outputs=step_outputs, phase="train", environment=self.setting.train_env
        )
        loss = loss_object.loss
        if not isinstance(loss, Tensor) or not loss.requires_grad:
            # NOTE: There might be no loss at some steps, because for instance
            # we haven't reached the end of an episode in an RL setting.
            return None

        # NOTE In RL, we can only update the model's weights on steps where the output
        # head has as loss, because the output head has buffers of tensors whose grads
        # would become invalidated if we performed the optimizer step.
        if loss.requires_grad and not self.automatic_optimization:
            output_head_loss = loss_object.losses.get(self.output_head.name)
            update_model = output_head_loss is not None and output_head_loss.requires_grad
            optimizer = self.optimizers()

            self.manual_backward(loss, optimizer, retain_graph=not update_model)
            if update_model:
                optimizer.step()
                optimizer.zero_grad()
        # BUG: Need to return this dict, otherwise the optimizer closure in the DP
        # accelerator fails (it only expects to get `dict` or `Tensor` values for
        # `training_step_output` in `_process_training_step_output`)
        # return loss
        # NOTE: the 'hidden' key isn't currently used, but it could be in the future if
        # we added support for BBPT, i.e. recurrent policies or output heads, etc.
        return {"loss": loss, "hidden": loss_object.tensors.get("hidden")}

    def validation_step_end(self, step_outputs: Union[ForwardPass, List[ForwardPass]]) -> Loss:
        return self.shared_step_end(
            step_outputs=step_outputs, phase="val", environment=self.setting.val_env
        )

    def test_step_end(self, step_outputs: Union[ForwardPass, List[ForwardPass]]) -> Loss:
        return self.shared_step_end(
            step_outputs=step_outputs, phase="test", environment=self.setting.test_env
        )

    def shared_step_end(
        self,
        step_outputs: Union[ForwardPass, List[ForwardPass]],
        phase: str,
        environment: Environment,
    ) -> Loss:
        """Called with the outputs of each replica's `[train/validation/test]_step`:

        - Sends the Actions from each worker to the environment to obtain rewards, if
          necessary;
        - Calculates the loss, given the merged forward pass and the rewards/labels;
        - Aggregates the losses/metrics from each replica, logs the relevant values, and
          returns the aggregated losses and metrics (a single Loss object).
        """
        forward_pass: ForwardPass
        if isinstance(step_outputs, list):
            forward_pass = ForwardPass.concatenate(step_outputs)
        else:
            forward_pass = step_outputs

        # get the actions from the forward pass:
        actions = forward_pass.actions
        rewards: Optional[Rewards] = forward_pass.rewards

        if rewards is None:
            # Get the reward from the environment (the dataloader).
            if self.config.debug and self.config.render:
                environment.render("human")
                # import matplotlib.pyplot as plt
                # plt.waitforbuttonpress(10)
            assert isinstance(actions, Actions), actions
            rewards = environment.send(actions)
            assert rewards is not None

        # BUG: Rewards is array of [None]s in TraditionalSL and MultiTask SL!
        assert isinstance(rewards, Rewards), rewards
        # Now that we have the rewards, we calculate the loss.

        loss: Loss = self.get_loss(forward_pass, rewards, loss_name=phase)
        loss_tensor: Tensor = loss.loss
        if loss_tensor == 0.0:
            return loss
        loss_pbar_dict = loss.to_pbar_message()
        for key, value in loss_pbar_dict.items():
            assert not isinstance(value, dict), "shouldn't be nested at this point!"
            self.log(key, value, prog_bar=self.config.debug, logger=False)
            logger.debug(f"{key}: {value}")

        loss_log_dict = loss.to_log_dict(verbose=self.config.verbose)
        for key, value in loss_log_dict.items():
            assert not isinstance(value, dict), "shouldn't be nested at this point!"
            self.log(key, value, prog_bar=False, logger=True)
        return loss

    def split_batch(self, batch: Any) -> Tuple[Observations, Optional[Rewards]]:
        """Splits the batch into the observations and the rewards.

        Uses the types defined on the setting that this model is being applied
        on (which were copied to `self.Observations` and `self.Actions`) to
        figure out how many fields each type requires.

        TODO: This is slightly confusing, should probably get rid of this.
        """
        observations: Observations
        rewards: Optional[Rewards]
        if isinstance(batch, self.Observations):
            observations, rewards = batch, None
        else:
            assert isinstance(batch, (tuple, list)) and len(batch) == 2
            observations, rewards = batch

        assert isinstance(observations, self.Observations), (
            observations,
            type(observations),
            self.Observations,
        )
        # Move the observations to the right device, and convert numpy arrays to
        # tensors.
        observations = observations.torch(device=self.device)
        if rewards is not None:
            rewards = rewards.torch(device=self.device)
        return observations, rewards

    def get_loss(
        self, forward_pass: ForwardPass, rewards: Rewards = None, loss_name: str = ""
    ) -> Loss:
        """Gets a Loss given the results of the forward pass and the reward.

        Args:
            forward_pass (Dict[str, Tensor]): Results of the forward pass.
            reward (Tensor, optional): The reward that resulted from the action
                chosen in the forward pass. Defaults to None.
            loss_name (str, optional): The name for the resulting Loss.
                Defaults to "".

        Returns:
            Loss: a Loss object containing the loss tensor, associated metrics
            and sublosses.

        This could look a bit like this, for example:
        ```
        action = forward_pass["action"]
        predicted_reward = forward_pass["predicted_reward"]
        nce = self.loss_fn(predicted_reward, reward)
        loss = Loss(loss_name, loss=nce)
        return loss
        ```
        """
        assert loss_name
        # Create an 'empty' Loss object with the given name, so that we always
        # return a Loss object, even when `y` is None and we can't the loss from
        # the output_head.
        total_loss = Loss(name=loss_name)
        if rewards:
            assert rewards.y is not None
            # TODO: If we decide to re-organize the forward pass object to also
            # contain the predictions of the self-supervised tasks, (atm they
            # perform their 'forward pass' in their get_loss functions)
            # then we could change 'actions' to be a dict, and index the
            # dict with the 'name' of each output head, like so:
            # actions_of_head = forward_pass.actions[self.output_head.name]
            # rewards_of_head = forward_pass.rewards[self.output_head.name]

            # For now though, we only have one "prediction" in the actions:
            actions = forward_pass.actions
            # So far we only use 'y' from the rewards in the output head.
            supervised_loss = self.output_head_loss(forward_pass, actions=actions, rewards=rewards)
            total_loss += supervised_loss

        return total_loss

    def output_head_loss(
        self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards
    ) -> Loss:
        """Gets the Loss of the output head."""
        # TODO: The rewards can still contain just numpy arrays, keeping it so for now.
        assert actions.device == self.device  # == rewards.device (would be None)
        return self.output_head.get_loss(
            forward_pass,
            actions=actions,
            rewards=rewards,
        )

    def preprocess_observations(self, observations: Observations) -> Observations:
        assert isinstance(observations, self.Observations)
        # TODO: Make sure this also works in the supervised setting.
        # Convert all numpy arrays to tensors if possible.
        # TODO: Make sure this still works in settings without task labels (
        # None in numpy arrays)
        observations = observations.torch(device=self.device)
        return observations

    def preprocess_rewards(self, reward: Rewards) -> Rewards:
        return reward

    def configure_optimizers(self):
        optimizer_class: Type[Optimzier] = self.hp.optimizer
        options = {
            "lr": self.hp.learning_rate,
            "weight_decay": self.hp.weight_decay,
        }
        return optimizer_class(
            self.parameters(),
            lr=self.hp.learning_rate,
            weight_decay=self.hp.weight_decay,
        )

    @property
    def batch_size(self) -> int:
        return self.hp.batch_size

    @batch_size.setter
    def batch_size(self, value: int) -> None:
        self.hp.batch_size = value

    @property
    def learning_rate(self) -> float:
        return self.hp.learning_rate

    @learning_rate.setter
    def learning_rate(self, value: float) -> None:
        self.hp.learning_rate = value

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching between tasks.

        Args:
            task_id (Optional[int]): the Id of the task.
        """

    def shared_modules(self) -> Dict[str, nn.Module]:
        """Returns any trainable modules in `self` that are shared across tasks.

        By giving this information, these weights can then be used in
        regularization-based auxiliary tasks like EWC, for example.

        Returns
        -------
        Dict[str, nn.Module]:
            Dictionary mapping from name to the shared modules, if any.
        """
        shared_modules: Dict[str, nn.Module] = nn.ModuleDict()

        if self.encoder:
            shared_modules["encoder"] = self.encoder
        if self.output_head:
            shared_modules["output_head"] = self.output_head
        return shared_modules

    # def summarize(self, mode: str = ModelSummary.MODE_DEFAULT) -> ModelSummary:
    #     model_summary = ModelSummary(self, mode=mode)
    #     log.debug("\n" + str(model_summary))
    #     return model_summary

    def _are_batched(self, observations: IncrementalAssumption.Observations) -> bool:
        """Returns wether these observations are batched."""
        assert isinstance(self.observation_space, spaces.Dict)

        # if observations.task_labels is not None:
        #     if isinstance(observations.task_labels, int):
        #         return True
        #     assert isinstance(observations.task_labels, (np.ndarray, Tensor))
        #     assert False, observations.shapes
        #     return observations.task_labels.shape and observations.task_labels.shape[0]

        x_space: spaces.Box = self.observation_space["x"]

        if isinstance(x_space, Image) or len(x_space.shape) == 4:
            return observations.x.ndim == 4

        if not isinstance(x_space, spaces.Box):
            raise NotImplementedError(
                f"Don't know how to tell if obs space {x_space} is batched, only "
                f"support Box spaces for the observation's 'x' for now."
            )

        # self.observation_space *should* usually reflect the shapes of individual
        # (non-batched) observations.
        return observations.x.ndim == len(x_space.shape) + 1


# Registering this handler for decoding the type of output head to use (a field in the
# hparams) from a dictionary.
register_decoding_fn(Type[OutputHead], lambda v: v)


================================================
FILE: sequoia/methods/models/base_model/multihead_model.py
================================================
from dataclasses import dataclass, replace
from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union

import numpy as np
import torch
import torch.nn.functional as F
from torch import Tensor, nn

from sequoia.common import Batch, Config, Loss
from sequoia.settings import Actions, Environment, Observations, Rewards
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.utils.generic_functions import concatenate, get_slice, stack
from sequoia.utils.logging_utils import get_logger

from ..forward_pass import ForwardPass
from ..output_heads import OutputHead
from .model import Model, SettingType

logger = get_logger(__name__)


class MultiHeadModel(Model[SettingType]):
    """Mixin that adds multi-head prediction to the Model when task labels are
    available.
    """

    @dataclass
    class HParams(Model.HParams):
        """Hyperparameters specific to a multi-head model."""

        # Wether to create one output head per task.
        multihead: Optional[bool] = None

    def __init__(self, setting: SettingType, hparams: HParams, config: Config):
        super().__init__(setting=setting, hparams=hparams, config=config)

        # Dictionary of output heads!
        self.output_heads: Dict[str, OutputHead] = nn.ModuleDict()
        self.hp: MultiHeadModel.HParams
        self.setting: SettingType

        # TODO: Add an optional task inference mechanism
        # See https://github.com/lebrice/Sequoia/issues/49
        self.task_inference_module: Optional[nn.Module] = None

        self.previous_task: Optional[int] = None
        self.current_task: Optional[int] = None

        self.previous_task_labels: Optional[Sequence[int]] = None

        if setting.task_labels_at_train_time:
            # NOTE: Not sure if this could cause an issue when setting is a SettingProxy
            starting_task_id = 0  # setting.current_task_id
        else:
            starting_task_id = None
        self.output_heads[str(starting_task_id)] = self.output_head

    def output_head_loss(
        self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards
    ) -> Loss:
        """TODO: Need to then re-split stuff (undo the work we did in forward) to get a
        loss per output head?
        """
        # Asks each output head for its contribution to the loss.
        observations: IncrementalAssumption.Observations = forward_pass.observations
        task_labels = observations.task_labels
        if isinstance(task_labels, Tensor):
            task_labels = task_labels.cpu().numpy()

        batch_size = forward_pass.batch_size
        assert batch_size is not None

        if task_labels is None:
            if self.task_inference_module:
                # TODO: Predict the task ids using some kind of task
                # inference mechanism.
                task_labels = self.task_inference_module(forward_pass)
            else:
                raise NotImplementedError(
                    "Multihead model doesn't have access to task labels and "
                    "doesn't have a task inference module!"
                )
                # TODO: Maybe use the last trained output head, by default?

        # TODO: Check if this is still necessary
        if self.previous_task_labels is None:
            self.previous_task_labels = task_labels

        # Default behaviour: use the (only) output head.
        if not self.hp.multihead:
            return self.output_head.get_loss(
                forward_pass,
                actions=actions,
                rewards=rewards,
            )

        # The sum of all the losses from all the output heads.
        total_loss = Loss(self.output_head.name)

        task_switched_in_env = task_labels != self.previous_task_labels
        # This `done` attribute isn't added in supervised settings.
        episode_ended = getattr(observations, "done", np.zeros(batch_size, dtype=bool))
        # TODO: Remove all this useless conversion from Tensors to ndarrays
        if isinstance(episode_ended, Tensor):
            episode_ended = episode_ended.cpu().numpy()

        # logger.debug(f"Task labels: {task_labels}, task switched in env: {task_switched_in_env}, episode ended: {episode_ended}")
        done_set_to_false_temporarily_indices = []

        if any(episode_ended & task_switched_in_env):
            # In the environments where there was a task switch to a different task and
            # where some episodes ended, we need to first get the corresponding output
            # head losses from these environments first.
            if self.batch_size in {None, 1}:
                # If the batch size is 1, this is a little bit simpler to deal with.
                previous_task: int = self.previous_task_labels[0].item()
                from sequoia.methods.models.output_heads.rl import PolicyHead

                previous_output_head = self.output_heads[str(previous_task)]
                assert isinstance(
                    previous_output_head, PolicyHead
                ), "todo: assuming that this only happends in RL currently."
                # We want the loss from that output head, but we don't want to
                # re-compute it below!
                env_index_in_previous_batch = 0
                # breakpoint()
                logger.debug(
                    f"Getting a loss from the output head for task {previous_task}, that was used for the last task."
                )
                env_episode_loss = previous_output_head.get_episode_loss(
                    env_index_in_previous_batch, done=True
                )
                # logger.debug(f"Loss from that output head: {env_episode_loss}")
                # Add this end-of-episode loss to the total loss.
                # breakpoint()
                # BUG: This can sometimes (rarely) be None! Need to better understand
                # why this is happening.
                if env_episode_loss is None:
                    logger.warning(
                        RuntimeWarning(
                            f"BUG: Env {env_index_in_previous_batch} gave back a loss "
                            f"of `None`, when we expected a loss from that output head "
                            f"for task id {previous_task}."
                        )
                    )
                else:
                    total_loss += env_episode_loss
                # We call on_episode_end so the output head can clear the relevant
                # buffers. Note that get_episode_loss(env_index, done=True) doesn't
                # clear the buffers, it just calculates a loss.
                previous_output_head.on_episode_end(env_index_in_previous_batch)

                # Set `done` to `False` for that env, to prevent the output head for the
                # new task from seeing the first observation in the episode as the last.
                observations.done[env_index_in_previous_batch] = False
                # FIXME: If we modify that entry in-place, then even after this method
                # returns, the change will persist.. Therefore we just save the indices
                # that we altered, and reset them before returning.
                done_set_to_false_temporarily_indices.append(env_index_in_previous_batch)
            else:
                raise NotImplementedError(
                    "TODO: The BaseModel doesn't yet support having multiple "
                    "different tasks within the same batch in RL. "
                )
                # IDEA: Need to somehow pass the indices of which env to take care of to
                # each output head, so they can create / clear buffers only when needed.

        assert task_labels is not None
        all_task_indices: Dict[int, Tensor] = get_task_indices(task_labels)

        # Get the loss from each output head:
        if len(all_task_indices) == 1:
            # If everything is in the same task (only one key), no need to split/merge
            # stuff, so it's a bit easier:
            task_id: int = task_labels[0].item()

            self.setup_for_task(task_id)
            # task_output_head = self.output_heads[str(task_id)]
            total_loss += super().output_head_loss(forward_pass, actions=actions, rewards=rewards)
            # total_loss += self.output_head.get_loss(
            #     forward_pass, actions=actions, rewards=rewards,
            # )
        else:
            # Split off the input batch, do a forward pass for each sub-task.
            # (could be done in parallel but whatever.)
            # TODO: Also, not sure if this will play well with DP, DDP, etc.
            for task_id, task_indices in all_task_indices.items():
                # Make a partial observation without the task labels, so that
                # super().forward will use the current output head.
                logger.debug(
                    f"Getting output head loss for "
                    f"{len(task_indices)/batch_size:.0%} of the batch which "
                    f"has task_id of '{task_id}'."
                )

                self.setup_for_task(task_id)
                task_loss = super().output_head_loss(
                    forward_pass=get_slice(forward_pass, task_indices),
                    actions=get_slice(actions, task_indices),
                    rewards=get_slice(rewards, task_indices),
                )
                # NOTE: useful for debugging, but shouldn't be enabled normally.
                # task_loss.name += f"(task {task_id})"
                logger.debug(f"Task {task_id} loss: {task_loss}")
                total_loss += task_loss

        self.previous_task_labels = task_labels
        # FIXME: Reset the 'done' to True, if we manually set it to False.
        for index in done_set_to_false_temporarily_indices:
            observations.done[index] = True

        return total_loss

    def on_before_zero_grad(self, optimizer):
        super().on_before_zero_grad(optimizer)
        from sequoia.methods.models.output_heads.rl import PolicyHead

        for task_id_string, output_head in self.output_heads.items():
            if isinstance(output_head, PolicyHead):
                output_head.detach_all_buffers()

    def shared_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        batch_idx: int,
        environment: Environment,
        phase: str,
        dataloader_idx: int = None,
        optimizer_idx: int = None,
    ) -> Dict:
        assert phase
        if dataloader_idx is not None:
            logger.debug(
                "TODO: We were indirectly given a task id with the "
                "dataloader_idx. Ignoring for now, as we're trying to avoid "
                "this (the task labels should be given for each example "
                "anyway). "
            )
            dataloader_idx = None

        return super().shared_step(
            batch=batch,
            batch_idx=batch_idx,
            environment=environment,
            phase=phase,
            dataloader_idx=dataloader_idx,
            optimizer_idx=optimizer_idx,
        )

    def on_task_switch(self, task_id: Optional[int]):
        """Called when switching between tasks.

        Args:
            task_id (int, optional): the id of the new task. When None, we are
            basically being informed that there is a task boundary, but without
            knowing what task we're switching to.

        NOTE: You can check wether this task switch is occuring at train or test time
        using `self.training`.
        """
        logger.info(f"Switching from task {self.current_task} -> {task_id}.")

        # TODO: Move these to the base model perhaps? (In case there is ever a
        # re-ordering of the mixins that make up the BaseModel)
        super().on_task_switch(task_id)

        self.previous_task = self.current_task
        self.current_task = task_id

        if task_id is not None and self.hp.multihead:
            # Switch the output head to use.
            self.output_head = self.get_or_create_output_head(task_id)

    def shared_modules(self) -> Dict[str, nn.Module]:
        """Returns any trainable modules in `self` that are shared across tasks.

        By giving this information, these weights can then be used in
        regularization-based auxiliary tasks like EWC, for example.

        This dict contains the encoder and output head, by default, as well as any
        shared modules in the auxiliary tasks.

        When using only multiple output heads (i.e. when `self.hp.multihead` is `True`),
        then we remove the output head from the dict before returning it.

        Returns
        -------
        Dict[str, nn.Module]:
            Dictionary mapping from name to the shared modules, if any.
        """
        shared_modules = super().shared_modules()
        if self.hp.multihead:
            shared_modules.pop("output_head")
        return shared_modules

    def load_state_dict(
        self,
        state_dict: Union[Dict[str, Tensor], Dict[str, Tensor]],
        strict: bool = True,
    ):
        if self.hp.multihead:
            # TODO: Figure out exactly where/when/how pytorch-lightning is
            # trying to load the model from, because there are some keys
            # missing (['output_heads.1.output.weight', 'output_heads.1.output.bias'])
            # For now, we're just gonna pretend it's not a problem, I guess?
            strict = False

        missing_keys, unexpected_keys = super().load_state_dict(state_dict=state_dict, strict=False)

        # TODO: Double-check that this makes sense and works properly.
        if self.hp.multihead and unexpected_keys:
            for i in range(self.setting.nb_tasks):
                # Try to load the output head weights
                logger.info(f"Creating a new output head for task {i}")
                new_output_head = self.create_output_head(self.setting, task_id=i)
                # FIXME: TODO: This is wrong. We should create all the
                # output heads if they aren't already created, and then try to
                # load the state_dict again.
                new_output_head.load_state_dict(
                    {k: state_dict[k] for k in unexpected_keys},
                    strict=False,
                )
                key = str(i)
                self.output_heads[key] = new_output_head.to(self.device)

        if missing_keys or unexpected_keys:
            logger.debug(f"Missing keys: {missing_keys}, unexpected keys: {unexpected_keys}")

        return missing_keys, unexpected_keys

    def get_or_create_output_head(self, task_id: int) -> nn.Module:
        """Retrieves or creates a new output head for the given task index.

        Also stores it in the `output_heads`, and adds its parameters to the
        optimizer.
        """
        task_output_head: nn.Module
        assert self.hp.multihead, "This should get called when model isnt multi-headed!"
        if str(task_id) in self.output_heads.keys():
            task_output_head = self.output_heads[str(task_id)]
        else:
            logger.info(f"Creating a new output head for task {task_id}.")
            # NOTE: This also takes care to add the output head's parameters to the
            # optimizer.
            task_output_head = self.create_output_head(task_id=task_id)
            self.output_heads[str(task_id)] = task_output_head
        return task_output_head

    def forward(self, observations: IncrementalAssumption.Observations) -> ForwardPass:
        """Smart forward pass with multi-head predictions and task inference.

        This forward pass can handle three different scenarios, depending on the
        contents of `observations.task_labels`:
        1.  Base case: task labels are present, and all examples are from the same task.
            - Perform the 'usual' forward pass (e.g. `super().forward(observations)`).
        2.  Task labels are present, and the batch contains a mix of samples from
            different tasks:
            - Create slices of the batch for each task, where all items in each
              'sub-batch' come from the same task.
            - Perform a forward pass for each task, by calling `forward` recursively
              with the sub-batch for each task as an argument (Case 1).
        3.  Task labels are *not* present. Perform some type of task inference, using
            the `task_inference_forward_pass` method. Check its docstring for more info.

        Parameters
        ----------
        observations : Observations
            Observations from an environment. As of right now, all Settings produce
            observations with (at least) the two following attributes:
            - x: Tensor (the images/inputs)
            - task_labels: Optional[Tensor] (The task labels, when available, else None)

        Returns
        -------
        Tensor
            The outputs, which in this case are the classification logits.
            All three cases above produce the same kind of outputs.
        """
        # TODO: Shouldn't have to do this here, since we have the @auto_move_data dec...
        # observations = observations.to(self.device)
        task_ids: Optional[Tensor] = observations.task_labels

        if isinstance(task_ids, np.ndarray) and task_ids.dtype == np.object:
            task_ids = task_ids.tolist()
            if len(task_ids) == 1:
                task_ids = task_ids[0]
        if task_ids is None:
            # Run the forward pass with task inference turned on.
            return self.task_inference_forward_pass(observations)
        task_ids = torch.as_tensor(task_ids, device=self.device, dtype=int)

        task_ids_present_in_batch = torch.unique(task_ids)
        if len(task_ids_present_in_batch) > 1:
            # Case 2: The batch contains data from more than one task.
            return self.split_forward_pass(observations)

        # Base case: "Normal" forward pass, where all items come from the same task.
        # - Setup the model for this task, however you want, and then do a forward pass,
        # as you normally would.
        # NOTE: If you want to reuse this cool multi-headed forward pass in your
        # own model, these lines here are what you'd want to change.
        task_id: int = task_ids_present_in_batch.item()

        if task_id != self.current_task and self.hp.multihead:
            # Setup the model for this task. For now we just switch the output head.
            self.output_head = self.get_or_create_output_head(task_id)

        return super().forward(observations)

    def setup_for_task(self, task_id: int) -> None:
        if task_id is not None and self.hp.multihead:
            # Setup the model for this task. For now we just switch the output head.
            self.output_head = self.get_or_create_output_head(task_id)

    def split_forward_pass(self, observations: Observations) -> ForwardPass:
        """Perform a forward pass for a batch of observations from different tasks.

        This is called in `forward` when there is more than one unique task label in the
        batch.
        This will call `forward` for each task id present in the batch, passing it a
        slice of the batch, in which all items are from that task.

        NOTE: This cannot cause recursion problems, because `forward`(d=2) will be
        called with a bach of items, all of which come from the same task. This makes it
        so `split_forward_pass` cannot then be called again.

        Parameters
        ----------
        observations : Observations
            Observations, in which the task labels might not all be the same.

        Returns
        -------
        Tensor
            The outputs/logits from each task, re-assembled into a single batch, with
            the task ordering from `observations` preserved.
        """
        assert observations.task_labels is not None
        assert self.hp.multihead, "Can only use split forward pass with multiple heads."
        # We have task labels.
        task_labels = observations.task_labels
        if isinstance(task_labels, Tensor):
            task_labels = task_labels.cpu().numpy()

        # Get the indices of the items from each task.
        all_task_indices_dict: Dict[int, np.ndarray] = get_task_indices(task_labels)

        if len(all_task_indices_dict) == 1:
            # No need to split the input, since everything is from the same task.
            task_id: int = task_labels[0].item()
            self.setup_for_task(task_id)
            return self.forward(observations)

        # Placeholder for the predicitons for each item in the batch.
        # NOTE: We put each item in the batch in this list and then stack the results.
        batch_size = len(task_labels)
        task_outputs: List[Batch] = [None for _ in range(batch_size)]

        for task_id, task_indices in all_task_indices_dict.items():
            # Take a slice of the observations, in which all items come from this task.
            task_observations = get_slice(observations, task_indices)
            # Perform a "normal" forward pass (Base case).
            task_output = self.forward(task_observations)

            # Store the outputs for the items from this task in the list.
            for i, index in enumerate(task_indices):
                task_outputs[index] = get_slice(task_output, i)

        # Stack the results.
        assert all(item is not None for item in task_outputs)
        merged_outputs = concatenate(task_outputs)
        return merged_outputs

    def task_inference_forward_pass(self, observations: Observations) -> Tensor:
        """Forward pass with a simple form of task inference."""
        # We don't have access to task labels (`task_labels` is None).
        # --> Perform a simple kind of task inference:
        # 1. Perform a forward pass with each task's output head;
        # 2. Merge these predictions into a single prediction somehow.
        assert observations.task_labels is None or all(observations.task_labels == None)
        # NOTE: This assumes that the observations are batched.
        # These are used below to indicate the shape of the different tensors.
        B = observations.x.shape[0]
        T = n_known_tasks = len(self.output_heads)
        N = self.action_space.n
        # Tasks encountered previously and for which we have an output head.
        known_task_ids: list[int] = list(range(n_known_tasks))
        assert known_task_ids
        # Placeholder for the predictions from each output head for each item in the
        # batch
        task_outputs = [None for _ in known_task_ids]  # [T, B, N]

        # Get the forward pass for each task.
        for task_id in known_task_ids:
            # Create 'fake' Observations for this forward pass, with 'fake' task labels.
            # NOTE: We do this so we can call `self.forward` and not get an infinite
            # recursion.
            task_labels = torch.full([B], task_id, device=self.device, dtype=int)
            task_observations = replace(observations, task_labels=task_labels)

            # Setup the model for task `task_id`, and then do a forward pass.
            task_forward_pass = self.forward(task_observations)

            task_outputs[task_id] = task_forward_pass

        # 'Merge' the predictions from each output head using some kind of task
        # inference.
        assert all(item is not None for item in task_outputs)
        # Stack the predictions (logits) from each output head.
        stacked_forward_pass: ForwardPass = stack(task_outputs, dim=1)
        logits_from_each_head = stacked_forward_pass.actions.logits
        assert logits_from_each_head.shape == (B, T, N), (logits_from_each_head.shape, (B, T, N))

        # Normalize the logits from each output head with softmax.
        # Example with batch size of 1, output heads = 2, and classes = 4:
        # logits from each head:  [[[123, 456, 123, 123], [1, 1, 2, 1]]]
        # 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]]
        probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1)
        assert probs_from_each_head.shape == (B, T, N)

        # Simple kind of task inference:
        # For each item in the batch, use the class that has the highest probability
        # accross all output heads.
        max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1)
        assert max_probs_across_heads.shape == (B, N)
        assert chosen_head_per_class.shape == (B, N)
        # Example (continued):
        # max probs across heads:        [[0.2, 0.6, 0.4, 0.2]]
        # chosen output heads per class: [[1, 0, 1, 1]]

        # Determine which output head has highest "confidence":
        max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1)
        assert max_prob_value.shape == (B,)
        assert most_probable_class.shape == (B,)
        # Example (continued):
        # max_prob_value: [0.6]
        # max_prob_class: [1]

        # A bit of boolean trickery to get what we need, which is, for each item, the
        # index of the output head that gave the most confident prediction.
        mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=self.device)
        chosen_output_head_per_item = chosen_head_per_class[mask]
        assert mask.shape == (B, N)
        assert chosen_output_head_per_item.shape == (B,)
        # Example (continued):
        # mask: [[False, True, False, True]]
        # chosen_output_head_per_item: [0]

        # Create a bool tensor to select items associated with the chosen output head.
        selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=self.device)
        assert selected_mask.shape == (B, T)
        # Select the logits using the mask:
        selected_forward_pass = stacked_forward_pass[selected_mask]
        assert selected_forward_pass.actions.logits.shape == (B, N)
        return selected_forward_pass


from typing import Dict, Tuple, TypeVar

Dataclass = TypeVar("Dataclass", bound=Batch)


def get_task_indices(
    task_labels: Union[List[Optional[int]], np.ndarray, Tensor]
) -> Dict[Optional[int], Union[np.ndarray, Tensor]]:
    """Given an array-like of task labels, gives back a dictionary mapping from task id
    to an array-like of indices for the corresponding indices in the batch.

    Parameters
    ----------
    task_labels : Union[np.ndarray, Tensor]
        [description]

    Returns
    -------
    Dict[Optional[int], Union[np.ndarray, Tensor]]
        Dictionary mapping from task index (int or None) to an ndarray or Tensor
        (depending on the type of `task_labels`) of indices corresponding to the indices
        in `task_labels` that correspond to that task.
    """
    all_task_indices: Dict[Optional[int], Union[np.ndarray, Tensor]] = {}

    if task_labels is None:
        return {}

    output_type = np.asarray

    assert isinstance(task_labels, (np.ndarray, Tensor))

    if isinstance(task_labels, Tensor):
        assert task_labels.ndim == 1 or task_labels.size() == 1, task_labels
        task_labels = task_labels.reshape(-1)
    else:
        assert task_labels.ndim == 1 or task_labels.size == 1, task_labels
        task_labels = task_labels.reshape(-1)

    unique_task_labels = list(set(task_labels.tolist()))

    batch_size = len(task_labels)
    # Get the indices for each task.
    for task_id in unique_task_labels:
        if isinstance(task_labels, np.ndarray):
            task_indices = np.arange(batch_size)[task_labels == task_id]
        else:
            assert isinstance(task_labels, Tensor), task_labels
            task_indices = torch.arange(batch_size, device=task_labels.device)[
                task_labels == task_id
            ]
        all_task_indices[task_id] = task_indices
    return all_task_indices


# TODO: Remove this, currently unused.
def cleanup_task_labels(
    task_labels: Optional[Sequence[Optional[int]]],
) -> Optional[np.ndarray]:
    """'cleans up' the task labels, by returning either None or an integer numpy array.

    TODO: Not clear why we really have to do this in the first place. The point is, if
    we wanted to allow only a fraction of task labels for instance, then we have to deal
    with np.ndarrays with `object` dtypes.

    Parameters
    ----------
    task_labels : Optional[Sequence[Optional[int]]]
        Some sort of array of task ids, or None.

    Returns
    -------
    Optional[np.ndarray]
        None if there are no task ids, or an integer numpy array if there are.

    Raises
    ------
    NotImplementedError
        If only a portion of the task labels are available.
    """
    if isinstance(task_labels, np.ndarray):
        if task_labels.dtype == object:
            if all(task_labels == None):
                task_labels = None
            elif not any(task_labels == None):
                task_labels = torch.as_tensor(task_labels.astype(int))
            else:
                raise NotImplementedError(f"TODO: Only given a portion of task labels?")
                # IDEA: Maybe set task_id to -1 in those cases, and return an int
                # ndarray as well?
    if task_labels is None:
        return None
    assert isinstance(task_labels, (np.ndarray, Tensor)), task_labels
    if not task_labels.shape:
        task_labels = task_labels.reshape([1])
    if isinstance(task_labels, Tensor):
        task_labels = task_labels.cpu().numpy()
    if task_labels is not None:
        task_labels = task_labels.astype(int)
    assert task_labels is None or isinstance(task_labels, np.ndarray)
    return task_labels


================================================
FILE: sequoia/methods/models/base_model/multihead_model_test.py
================================================
"""Tests for the class-incremental version of the Model class.
"""
# from sequoia.conftest import config
from collections import defaultdict
from typing import Dict, List, Optional, Tuple, Type

import numpy as np
import pytest
import torch
from continuum import ClassIncremental
from continuum.datasets import MNIST
from continuum.tasks import TaskSet
from gym import spaces
from torch import Tensor, nn

from sequoia.common import Loss
from sequoia.common.config import Config
from sequoia.methods.base_method import BaseMethod
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.methods.models.output_heads.rl.episodic_a2c import EpisodicA2C
from sequoia.settings import ClassIncrementalSetting, RLSetting, TraditionalRLSetting
from sequoia.settings.rl import IncrementalRLSetting

from .base_model import BaseModel
from .multihead_model import MultiHeadModel, OutputHead, get_task_indices


@pytest.fixture()
def mixed_samples(config: Config):
    """Fixture that produces some samples from each task."""
    dataset = MNIST(config.data_dir, download=True, train=True)
    datasets: List[TaskSet] = ClassIncremental(dataset, nb_tasks=5)
    n_samples_per_task = 10
    indices = list(range(10))
    samples_per_task: Dict[int, Tensor] = {
        i: tuple(map(torch.as_tensor, taskset.get_samples(indices)))
        for i, taskset in enumerate(datasets)
    }
    return samples_per_task


class MockOutputHead(OutputHead):
    def __init__(self, *args, Actions: Type, task_id: int = -1, **kwargs):
        super().__init__(*args, **kwargs)
        self.task_id = task_id
        self.Actions = Actions
        self.name = f"task_{task_id}"

    def forward(self, observations, representations) -> Tensor:  # type: ignore
        """This mock forward just creates an action that is related to the observation
        and the task id for this output head.
        """
        x: Tensor = observations.x
        assert (observations.task_labels == self.task_id).all()
        h_x = representations
        # actions = torch.stack([h_i.mean() * self.task_id for h_i in h_z])
        # actions = torch.stack([x_i.mean() * self.task_id for x_i in x])
        actions = [x_i.mean() * self.task_id for x_i in x]
        actions = torch.stack(actions)
        fake_logits = torch.rand([actions.shape[0], self.action_space.n])
        from sequoia.methods.models.output_heads.classification_head import ClassificationOutput

        # assert issubclass(ClassificationOutput, self.Actions)
        # TODO: Ideally self.Actions would already be a subclass of ClassificationActions!
        # return self.Actions(y_pred=actions, logits=fake_logits)
        return ClassificationOutput(y_pred=actions, logits=fake_logits)

    def get_loss(self, forward_pass, actions, rewards):
        return Loss(self.name, 0.0)


# def mock_output_task(self: MultiHeadModel, x: Tensor, h_x: Tensor) -> Tensor:
#     return self.output_head(x)

# def mock_encoder(self: MultiHeadModel, x: Tensor) -> Tensor:
#     return x.new_ones(self.hp.hidden_size)


@pytest.mark.parametrize(
    "indices",
    [
        slice(0, 10),  # all the same task (0)
        slice(0, 20),  # 10 from task 0, 10 from task 1
        slice(0, 30),  # 10 from task 0, 10 from task 1, 10 from task 2
        slice(0, 50),  # 10 from each task.
    ],
)
def test_multiple_tasks_within_same_batch(
    mixed_samples: Dict[int, Tuple[Tensor, Tensor, Tensor]],
    indices: slice,
    monkeypatch,
    config: Config,
):
    """TODO: Write out a test that checks that when given a batch with data
    from different tasks, and when the model is multiheaded, it will use the
    right output head for each image.
    """
    # Get a mixed batch
    xs, ys, ts = map(torch.cat, zip(*mixed_samples.values()))
    xs = xs[indices]
    ys = ys[indices]
    ts = ts[indices].int()
    obs = ClassIncrementalSetting.Observations(x=xs, task_labels=ts)

    setting = ClassIncrementalSetting()
    model = MultiHeadModel(
        setting=setting,
        hparams=MultiHeadModel.HParams(batch_size=30, multihead=True),
        config=config,
    )

    class MockEncoder(nn.Module):
        def forward(self, x: Tensor):
            return x.new_ones([x.shape[0], model.hidden_size])

    mock_encoder = MockEncoder()
    model.encoder = mock_encoder

    for i in range(5):
        model.output_heads[str(i)] = MockOutputHead(
            input_space=spaces.Box(0, 1, [model.hidden_size]),
            action_space=spaces.Discrete(2),
            Actions=setting.Actions,
            task_id=i,
        )
    model.output_head = model.output_heads["0"]

    forward_pass = model(obs)
    y_preds = forward_pass["y_pred"]

    assert y_preds.shape == ts.shape
    assert torch.all(y_preds == ts * xs.view([xs.shape[0], -1]).mean(1))


def test_multitask_rl_bug_without_PL(monkeypatch):
    """TODO: on_task_switch is called on the new observation, but we need to produce a
    loss for the output head that we were just using!
    """
    # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at
    # each episode.
    max_episode_steps = 5
    setting = TraditionalRLSetting(
        dataset="cartpole",
        batch_size=1,
        nb_tasks=2,
        train_max_steps=100,
        max_episode_steps=max_episode_steps,
        add_done_to_observations=True,
    )
    assert setting.stationary_context

    # setting = RLSetting.load_benchmark("monsterkong")
    config = Config(debug=True, verbose=True, seed=123)
    config.seed_everything()
    model = BaseModel(
        setting=setting,
        hparams=MultiHeadModel.HParams(
            multihead=True,
            output_head=EpisodicA2C.HParams(accumulate_losses_before_backward=True),
        ),
        config=config,
    )
    # TODO: Maybe add some kind of "hook" to check which losses get returned when?
    model.train()
    # from pytorch_lightning import Trainer
    # trainer = Trainer(fast_dev_run=True)
    # trainer.fit(model, train_dataloader=setting.train_dataloader())
    # trainer.setup(model, stage="fit")

    # from pytorch_lightning import Trainer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    episodes = 0
    max_episodes = 5

    # Dict mapping from step to loss at that step.
    losses: Dict[int, Loss] = {}

    with setting.train_dataloader() as env:
        env.seed(123)
        # env = TimeLimit(env, max_episode_steps=max_episode_steps)
        # Iterate over the environment, which yields one observation at a time:
        for step, obs in enumerate(env):
            assert isinstance(obs, RLSetting.Observations)

            if step == 0:
                assert not any(obs.done)
            start_task_label = obs["task_labels"][0]

            stored_steps_in_each_head_before = {
                task_key: output_head.num_stored_steps(0)
                for task_key, output_head in model.output_heads.items()
            }
            forward_pass: ForwardPass = model.forward(observations=obs)
            rewards = env.send(forward_pass.actions)

            loss: Loss = model.get_loss(
                forward_pass=forward_pass, rewards=rewards, loss_name="debug"
            )
            stored_steps_in_each_head_after = {
                task_key: output_head.num_stored_steps(0)
                for task_key, output_head in model.output_heads.items()
            }
            # if step == 5:
            #     assert False, (loss, stored_steps_in_each_head_before, stored_steps_in_each_head_after)

            if any(obs.done):
                assert loss.loss != 0.0, step
                assert loss.loss.requires_grad

                # Backpropagate the loss, update the models, etc etc.
                loss.loss.backward()
                model.on_after_backward()
                optimizer.step()
                model.on_before_zero_grad(optimizer)
                optimizer.zero_grad()

                # TODO: Need to let the model know than an update is happening so it can clear
                # buffers etc.

                episodes += sum(obs.done)
                losses[step] = loss
            else:
                assert loss.loss == 0.0
            # TODO:
            print(
                f"Step {step}, episode {episodes}: x={obs.x}, done={obs.done}, reward={rewards} task labels: {obs.task_labels}, loss: {loss.losses.keys()}: {loss.loss}"
            )

            if episodes > max_episodes:
                break
    # assert False, losses


@pytest.mark.xfail(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.")
def test_multitask_rl_bug_with_PL(monkeypatch, config: Config):
    """ """
    # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at
    # each episode.

    cpu_config = config
    # cpu_config = Config(device="cpu", num_workers=0)

    setting = TraditionalRLSetting(
        dataset="cartpole",
        batch_size=1,
        num_workers=0,
        nb_tasks=2,
        train_max_steps=200,
        test_max_steps=200,
        max_episode_steps=5,
        add_done_to_observations=True,
        config=cpu_config,
    )
    assert setting.train_max_steps == 200
    assert setting.test_max_steps == 200
    assert setting.stationary_context

    # setting = RLSetting.load_benchmark("monsterkong")
    cpu_config.seed_everything()
    model = BaseModel(
        setting=setting,
        hparams=MultiHeadModel.HParams(
            multihead=True,
            output_head=EpisodicA2C.HParams(accumulate_losses_before_backward=True),
        ),
        config=cpu_config,
    ).to(device=config.device)

    # TODO: Maybe add some kind of "hook" to check which losses get returned when?
    model.train()
    assert not model.automatic_optimization

    # Import this and use it to create the Trainer, rather than creating the Trainer
    # directly, so we don't get the same bug (due to with_is_last in PL) from the
    # DataConnector.
    from sequoia.methods.base_method import TrainerConfig

    # NOTE: We only do this so that the Model has a self.trainer attribute and so the
    # model.training_step below can be used:
    if config.device.type == "cuda":
        trainer_config = TrainerConfig(fast_dev_run=True)
    else:
        trainer_config = TrainerConfig(
            fast_dev_run=True,
            gpus=0,
            distributed_backend=None,
        )

    trainer = trainer_config.make_trainer(config=cpu_config)

    # Fit in 'fast_dev_run' mode, so just a single batch of train / valid / test data.
    with setting.train_dataloader() as temp_env:
        temp_env.seed(123)
        trainer.fit(model, train_dataloader=temp_env)

    # NOTE: If we don't clear the buffers, there is a bug because the things that get put
    # in buffers aren't on the same device as later.
    model.output_head.clear_all_buffers()

    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    episodes = 0
    max_episodes = 5

    # Dict mapping from step to loss at that step.
    losses: Dict[int, List[Loss]] = defaultdict(list)

    with setting.train_dataloader() as env:
        env.seed(123)

        # TODO: Interesting bug/problem: Since the VectorEnvs always want to reset the
        # env at the end of the episode, they also also so on the individual envs.
        # In order to solve that, we need to NOT put any 'ActionLimit' on the inside
        # envs, but only on the outer env.
        for step, obs in enumerate(env):
            assert isinstance(obs, RLSetting.Observations)

            print(step, env.is_closed())
            forward_pass = model.training_step(batch=obs, batch_idx=step)
            step_results: Optional[Loss] = model.training_step_end([forward_pass])
            loss_tensor: Optional[Tensor] = None

            if step > 0 and step % 5 == 0:
                # We should get a loss at each episode end:
                assert all(obs.done), step  # Since batch_size == 1 for now.
                assert step_results is not None, (step, obs.task_labels)
                loss_tensor = step_results["loss"]
                loss: Loss = step_results["loss_object"]
                print(f"Loss at step {step}: {loss}")
                losses[step].append(loss)

            else:
                assert step_results is None

            print(
                f"Step {step}, episode {episodes}: x={obs.x}, done={obs.done}, task labels: {obs.task_labels}, loss_tensor: {loss_tensor}"
            )

            if step >= setting.train_max_steps:
                assert False, "Shouldn't the environment have closed at this point?"

    for step, step_losses in losses.items():
        print(f"Losses at step {step}:")
        for loss in step_losses:
            print(f"\t{loss}")
    # assert False, losses


@pytest.mark.parametrize(
    "input, expected",
    [
        (np.array([0, 0, 0, 0]), {0: np.arange(4)}),
        (torch.as_tensor([0, 0, 0, 0]), {0: torch.arange(4)}),
        (
            torch.as_tensor([0, 0, 1, 0]),
            {0: torch.LongTensor([0, 1, 3]), 1: torch.LongTensor([2])},
        ),
        (
            np.array([0, 0, 1, None]),
            {0: np.array([0, 1]), 1: np.array([2]), None: np.array([3])},
        ),
    ],
)
def test_get_task_indices(input, expected):
    actual = get_task_indices(input)
    assert str(actual) == str(expected)


@pytest.mark.parametrize(
    "indices",
    [
        slice(0, 10),  # all the same task (0)
        slice(0, 20),  # 10 from task 0, 10 from task 1
        slice(0, 30),  # 10 from task 0, 10 from task 1, 10 from task 2
        slice(0, 50),  # 10 from each task.
    ],
)
def test_task_inference_sl(
    mixed_samples: Dict[int, Tuple[Tensor, Tensor, Tensor]],
    indices: slice,
    config: Config,
):
    """TODO: Write out a test that checks that when given a batch with data
    from different tasks, and when the model is multiheaded, it will use the
    right output head for each image.
    """
    # Get a mixed batch
    xs, ys, ts = map(torch.cat, zip(*mixed_samples.values()))
    xs = xs[indices]
    ys = ys[indices]
    ts = ts[indices].int()
    obs = ClassIncrementalSetting.Observations(x=xs, task_labels=None)

    setting = ClassIncrementalSetting()
    model = MultiHeadModel(
        setting=setting,
        hparams=MultiHeadModel.HParams(batch_size=30, multihead=True),
        config=config,
    )

    class MockEncoder(nn.Module):
        def forward(self, x: Tensor):
            return x.new_ones([x.shape[0], model.hidden_size])

    mock_encoder = MockEncoder()
    model.encoder = mock_encoder

    for i in range(5):
        model.output_heads[str(i)] = MockOutputHead(
            input_space=spaces.Box(0, 1, [model.hidden_size]),
            action_space=spaces.Discrete(setting.action_space.n),
            Actions=setting.Actions,
            task_id=i,
        )
    model.output_head = model.output_heads["0"]

    forward_pass = model(obs)
    y_preds = forward_pass.actions.y_pred

    assert y_preds.shape == ts.shape
    # TODO: Check that the task inference works by changing the logits to be based on
    # the assigned task in the Mock output head.
    # assert torch.all(y_preds == ts * xs.view([xs.shape[0], -1]).mean(1))


@pytest.mark.skip(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.")
@pytest.mark.timeout(120)
def test_task_inference_rl_easy(config: Config):
    from sequoia.methods.base_method import BaseMethod

    method = BaseMethod(config=config)
    from sequoia.settings.rl import IncrementalRLSetting

    setting = IncrementalRLSetting(
        dataset="cartpole",
        nb_tasks=2,
        max_episode_steps=20,
        train_max_steps=200,
        test_max_steps=200,
        config=config,
    )
    results = setting.apply(method)
    assert results
    # assert False, results.to_log_dict()


@pytest.mark.skip(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.")
@pytest.mark.timeout(120)
def test_task_inference_rl_hard(config: Config):

    method = BaseMethod(config=config)

    setting = IncrementalRLSetting(
        dataset="cartpole",
        nb_tasks=2,
        train_max_steps=1000,
        test_max_steps=1000,
        config=config,
    )
    results = setting.apply(method)
    assert results
    # assert False, results.to_log_dict()


from sequoia.methods.base_method import BaseMethod
from sequoia.settings.sl import TraditionalSLSetting
from sequoia.settings.sl.continual.setting import subset


@pytest.mark.timeout(30)
def test_task_inference_multi_task_sl(config: Config):
    setting = TraditionalSLSetting(dataset="mnist", nb_tasks=2, config=config)
    # TODO: Maybe add this kind of 'max_steps_per_task' argument even in supervised
    # settings:
    dataset_length = 1000
    # TODO: Shorten the train/test datasets?
    method = BaseMethod(config=config, max_epochs=1)
    setting.setup()
    setting.train_datasets = [
        subset(dataset, list(range(dataset_length))) for dataset in setting.train_datasets
    ]
    setting.val_datasets = [
        subset(dataset, list(range(dataset_length))) for dataset in setting.val_datasets
    ]
    setting.test_datasets = [
        subset(dataset, list(range(dataset_length))) for dataset in setting.test_datasets
    ]

    results = setting.apply(method)
    assert 0.80 <= results.average_final_performance.objective


================================================
FILE: sequoia/methods/models/base_model/self_supervised_model.py
================================================
""" Base class for a Self-Supervised model.

This is meant to be a kind of 'Mixin' that you can use and extend in order
to add self-supervised losses to your model.
"""

import warnings
from dataclasses import dataclass
from typing import Dict, Optional, TypeVar

from torch import Tensor, nn

from sequoia.common.config import Config
from sequoia.common.loss import Loss
from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask
from sequoia.settings import Rewards, Setting, SettingType
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import flatten_dict

from .model import Model

# from sequoia.utils.module_dict import ModuleDict


logger = get_logger(__name__)
HParamsType = TypeVar("HParamsType", bound="SelfSupervisedModel.HParams")


class SelfSupervisedModel(Model[SettingType]):
    """
    Model 'mixin' that adds support for modular, configurable "auxiliary tasks".

    These auxiliary tasks are used to get a self-supervised loss to train on
    when labels aren't available.
    """

    @dataclass
    class HParams(Model.HParams):
        """Hyperparameters of a Self-Supervised method."""

        # vae: Optional[VAEReconstructionTask.Options] = None
        # ae: Optional[AEReconstructionTask.Options] = None

    def __init__(self, setting: Setting, hparams: HParams, config: Config):
        super().__init__(setting, hparams, config)
        self.hp: SelfSupervisedModel.HParams
        # Dictionary of auxiliary tasks.
        self.tasks: Dict[str, AuxiliaryTask] = self.create_auxiliary_tasks()

    def get_loss(
        self,
        forward_pass: Dict[str, Tensor],
        rewards: Rewards = None,
        loss_name: str = "",
    ) -> Loss:
        # Get the output task loss (the loss of the base model)
        loss: Loss = super().get_loss(forward_pass, rewards=rewards, loss_name=loss_name)

        # Add the self-supervised losses from all the enabled auxiliary tasks.
        for task_name, aux_task in self.tasks.items():
            assert task_name, "Auxiliary tasks should have a name!"
            if aux_task.enabled:
                # TODO: Auxiliary tasks all share the same 'y' for now, but it
                # might make more sense to organize this differently.
                y = rewards.y if rewards else None
                aux_loss: Loss = aux_task.get_loss(forward_pass, y=y)
                # Scale the loss by the corresponding coefficient before adding
                # it to the total loss.
                loss += aux_task.coefficient * aux_loss.to(self.device)
                if self.config.debug and self.config.verbose:
                    logger.debug(f"{task_name} loss: {aux_loss.total_loss}")

        return loss

    def add_auxiliary_task(
        self, aux_task: AuxiliaryTask, key: str = None, coefficient: float = None
    ) -> None:
        """Adds an auxiliary task to the self-supervised model."""
        key = aux_task.name if key is None else key
        if key in self.tasks:
            raise RuntimeError(f"There is already an auxiliary task with name {key} in the model!")
        self.tasks[key] = aux_task.to(self.device)
        if coefficient is not None:
            aux_task.coefficient = coefficient
        elif not aux_task.coefficient:
            warnings.warn(
                UserWarning(f"Adding auxiliary task with name {key}, but with coefficient of 0.!")
            )

        if aux_task.coefficient:
            aux_task.enable()

    def create_auxiliary_tasks(self) -> Dict[str, AuxiliaryTask]:
        # Share the relevant parameters with all the auxiliary tasks.
        # We do this by setting class attributes.
        # TODO: Make sure that we aren't duplicating all of the model's weights
        # by setting a class attribute.
        AuxiliaryTask._model = self
        AuxiliaryTask.hidden_size = self.hidden_size
        AuxiliaryTask.input_shape = self.input_shape
        AuxiliaryTask.encoder = self.encoder
        AuxiliaryTask.output_head = self.output_head
        # AuxiliaryTask.preprocessing = self.preprocess_batch

        tasks: Dict[str, AuxiliaryTask] = nn.ModuleDict()
        # TODO(@lebrice): Should we create the tasks even if they aren't used,
        # and then 'enable' them when they are needed? (I'm thinking that maybe
        # being enable/disable auxiliary tasks when needed might be useful
        # later?)
        # if self.hp.vae and self.hp.vae.coefficient:
        #     tasks[VAEReconstructionTask.name] = VAEReconstructionTask(options=self.hp.vae)
        # if self.hp.ae and self.hp.ae.coefficient:
        #     tasks[AEReconstructionTask.name] = AEReconstructionTask(options=self.hp.ae)
        # if self.hp.ewc and self.hp.ewc.coefficient:
        #     tasks[EWCTask.name] = EWCTask(options=self.hp.ewc)

        return tasks

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching between tasks.

        Args:
            task_id (int): the Id of the task.
        """
        for task_name, task in self.tasks.items():
            if task.enabled:
                task.on_task_switch(task_id=task_id)
        super().on_task_switch(task_id=task_id)

    def shared_modules(self) -> Dict[str, nn.Module]:
        """Returns any trainable modules in `self` that are shared across tasks.

        By giving this information, these weights can then be used in
        regularization-based auxiliary tasks like EWC, for example.

        For the base model, this returns a dictionary with the encoder, for example.
        When using auxiliaryt tasks, they also add their shared weights, if any.

        Returns
        -------
        Dict[str, nn.Module]:
            Dictionary mapping from name to the shared modules, if any.
        """
        shared_modules = super().shared_modules()
        for task_name, task in self.tasks.items():
            # TODO: What separator to use when dealing with nested dictionaries? I seem
            # to recall that ModuleDicts don't like some separators.
            sep = "."
            task_modules = task.shared_modules()
            flattened_task_modules = flatten_dict(task_modules, separator=sep)
            for module_name, module in flattened_task_modules.items():
                shared_modules[f"{task_name}{sep}{module_name}"] = module
        return shared_modules


================================================
FILE: sequoia/methods/models/base_model/self_supervised_model_test.py
================================================
from typing import Dict, List, Tuple, Type

import pytest

from sequoia.conftest import id_fn, parametrize, slow
from sequoia.methods.aux_tasks import AE, EWC, VAE
from sequoia.methods.base_method import BaseMethod
from sequoia.settings.base import Results, Setting
from sequoia.settings.sl import TaskIncrementalSLSetting, TraditionalSLSetting
from sequoia.settings.sl.incremental import ClassIncrementalSetting

Method = BaseMethod
# Use 'Method' as an alias for the actual Method subclass under test. (since at
# the moment quite a few tests share some code.
# List of datasets that are currently supported for this method.
supported_datasets: List[str] = [
    "mnist",
    "fashion_mnist",
    "cifar10",
    "cifar100",
    "kmnist",
]


def test_get_applicable_settings():
    settings = Method.get_applicable_settings()
    assert ClassIncrementalSetting in settings
    assert TaskIncrementalSLSetting in settings
    assert TraditionalSLSetting in settings


@pytest.fixture(
    scope="module",
    params=[
        {},
        {VAE: 1},
        {AE: 1},
        {EWC: 1},
    ],  # no aux task.
    ids=id_fn,
)
def method_and_coefficients(request, tmp_path_factory):
    """Fixture that creates a method to be reused for the tests below as well
    as return the coefficients for each auxiliary task.
    """
    # Reuse the Method accross all tests below
    log_dir = tmp_path_factory.mktemp("log_dir")

    aux_task_coefficients = request.param

    args = f"""
    --debug
    --log_dir_root {log_dir}
    --default_root_dir {log_dir}
    --knn_samples 0
    --seed 123
    --fast_dev_run
    """
    for aux_task_name, coef in aux_task_coefficients.items():
        args += f"--{aux_task_name}.coef {coef} "

    return Method.from_args(args, strict=False), aux_task_coefficients


# @parametrize("dataset", get_dataset_params(Method, supported_datasets))


from sequoia.methods.method_test import key_fn


@slow
@parametrize("setting_type", sorted(Method.get_applicable_settings(), key=key_fn))
def test_fast_dev_run(
    method_and_coefficients: Tuple[Method, Dict[str, float]],
    setting_type: Type[Setting],
    test_dataset: str,
):
    """Performs a quick run with only one batch of train / val / test data and
    check that the 'Results' objects are ok.
    """
    method, aux_task_coefficients = method_and_coefficients
    if test_dataset not in setting_type.available_datasets:
        pytest.skip(msg=f"dataset {test_dataset} isn't available for this setting.")
    # Instantiate the setting
    setting: Setting = setting_type(dataset=test_dataset, nb_tasks=2)
    results: Results = setting.apply(method)
    validate_results(results, aux_task_coefficients)


def validate_results(results: Results, aux_task_coefficients: Dict[str, float]):
    """Makes sure that the results make sense for the method being tested.

    Checks that the Loss object has losses for each 'enabled' auxiliary task.

    Args:
        results (Results): A given Results object.
    """
    assert results is not None
    assert results.hparams is not None
    assert results.test_loss is not None

    for loss in results.task_losses:
        for aux_task_name, coef in aux_task_coefficients.items():
            assert aux_task_name in loss.losses
            aux_task_loss = loss.losses[aux_task_name]
            assert aux_task_loss.loss >= 0.0
            assert aux_task_loss._coefficient == coef


================================================
FILE: sequoia/methods/models/base_model/semi_supervised_model.py
================================================
"""
Addon that enables training on semi-supervised batches.

NOTE: Not used at the moment, but should work just fine.
"""
from dataclasses import dataclass
from typing import Dict, Optional, Sequence, Union

import numpy as np
from torch import Tensor

# from sequoia.common.callbacks import KnnCallback
from sequoia.common.loss import Loss
from sequoia.settings import Rewards, SettingType
from sequoia.utils.logging_utils import get_logger

from .model import Model

logger = get_logger(__name__)


class SemiSupervisedModel(Model[SettingType]):
    @dataclass
    class HParams(Model.HParams):
        """Hyperparameters of a Self-Supervised method."""

        # Adds Options for a KNN classifier callback, which is used to evaluate
        # the quality of the representations on each task after each training
        # epoch.
        # TODO: Debug/test this callback to make sure it still works fine.
        # knn_callback: KnnCallback = mutable_field(KnnCallback)

    def get_loss(
        self,
        forward_pass: Dict[str, Tensor],
        rewards: Optional[Rewards] = None,
        loss_name: str = "",
    ) -> Loss:
        """Trains the model on a batch of (potentially partially labeled) data.

        Args:
            forward_pass (Dict[str, Tensor]): WIP: The results of the forward
                pass (processed input, predictions, etc.)
            rewards (Union[Optional[Tensor], List[Optional[Tensor]]]):
                Labels associated with the data. Can either be:
                - None: fully unlabeled batch
                - Tensor: fully labeled batch
                - List[Optional[Tensor]]: Partially labeled batch.
            loss_name (str, optional): Name of the resulting loss object. Defaults to
                "Train".

        Returns:
            Loss: a loss object made from both the unsupervised and
                supervised losses.
        """

        # TODO: We could also just use '-1' instead as the 'no-label' val: this
        # would make it a bit simpler than having both numpy arrays and tensors
        # in the batch

        y: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = rewards.y
        if y is None or all(y_i is not None for y_i in y):
            # Fully labeled/unlabeled batch
            # NOTE: Tensors can't have None items, so if we get a Tensor that
            # means that we have all task labels.
            labeled_ratio = float(y is not None)
            return super().get_loss(forward_pass, rewards, loss_name=loss_name)

        is_labeled: np.ndarray = np.asarray([y_i is not None for y_i in y])

        # Batch is maybe a mix of labeled / unlabeled data.
        labeled_y = y[is_labeled]
        # TODO: Might have to somehow re-order the results based on the indices?
        # TODO: Join (merge) the metrics? or keep them separate?
        labeled_forward_pass = {k: v[is_labeled] for k, v in forward_pass.items()}
        unlabeled_forward_pass = {k: v[~is_labeled] for k, v in forward_pass.items()}

        labeled_ratio = len(labeled_y) / len(y)
        logger.debug(f"Labeled ratio: {labeled_ratio}")

        # Create the 'total' loss for the batch, with the required name.
        # We will then create two 'sublosses', one named 'unsupervised' and one
        # named 'supervised', each containing the respective losses and metrics.
        # TODO: Make sure that this doesn't make it harder to get the metrics
        # from the Loss object. If it does, then we could maybe just fuse the
        # labeled and unlabeled losses and metrics, but that might also cause
        # issues.
        loss = Loss(name=loss_name)
        if unlabeled_forward_pass:
            # TODO: Setting a different loss name for the for this is definitely going to cause trouble!
            unsupervised_loss = super().get_loss(
                unlabeled_forward_pass,
                rewards=None,
                loss_name="unsupervised",
            )
            loss += unsupervised_loss

        if labeled_forward_pass:
            supervised_loss = super().get_loss(
                labeled_forward_pass,
                rewards=labeled_y,
                loss_name="supervised",
            )
            loss += supervised_loss

        return loss


================================================
FILE: sequoia/methods/models/baseline_model.puml
================================================
@startuml base_model

' !include output_heads.puml

package base_model {

    package model {
        abstract class Model {
            + hparams: Model.HParams
            + encoder: nn.Module
            + output_head: OutputHead
            + forward(Observations): ForwardPass
            + get_loss(ForwardPass, Rewards): Loss
            + get_actions(observations: Observations, action_space: Space): Actions
        }
        ' class Model.HParams extends BaseHParams {}
        ' class BaseHParams {
        class Model.HParams {
            {static} + available_optimizers: Dict[str, Type[Optimizer]]
            {static} + available_encoders: Dict[str, Type[nn.Module]]

            + learning_rate: float = 0.001
            + weight_decay: float = 1e-6
            + optimizer: str = "adam"
            + encoder: str = "resnet18"
            + batch_size: Optional[int]
            + train_from_scratch: bool = False
            + freeze_pretrained_encoder_weights: bool = False
            + output_head: OutputHead.HParams
            + detach_output_head: bool = False
        }
        
    }

    together {
        package semi_supervised_model {
            abstract class SemiSupervisedModel extends Model {
                + forward(Observations): ForwardPass
                + get_loss(ForwardPass, Optional[Rewards]): Loss
            }
            abstract class SemiSupervisedModel.HParams extends Model.HParams {
                + knn_callback: KnnCallback note (todo: unused atm)
            }
        }
        package self_supervised_model {
            abstract class SelfSupervisedModel extends Model {
                + hparams: SelfSupervisedModel.HParams
                + tasks: dict[str, AuxiliaryTask]
                + add_auxiliary_task(task AuxiliaryTask)
            }
            abstract class SelfSupervisedModel.HParams extends Model.HParams {
                + simclr: Optional[SimCLRTask.Options]
                + vae: Optional[VAEReconstructionTask.Options]
                + ae: Optional[AEReconstructionTask.Options]
                + ewc: Optional[EWCTask.Options]
            }
        }

        package multihead_model {
            abstract class MultiHeadModel extends Model {
                + output_heads: dict[str, OutputHead]
                + forward(Observations): ForwardPass
                + on_task_switch(task_id: Optional[int])
            }

            abstract class MultiHeadModel.HParams extends Model.HParams {
                + multihead: Optional[bool]
            }
        }
    }
    package base_model as base_model.base_model {
        class BaseModel extends SemiSupervisedModel, SelfSupervisedModel, MultiHeadModel
        {
            + hparams: BaseModel.HParams
        }
        class BaseModel.HParams extends SelfSupervisedModel.HParams, MultiHeadModel.HParams, SemiSupervisedModel.HParams {
        }
    }

Model "1" *-- "1" OutputHead
' Model *-- Model.HParams
' BaseModel *-- BaseModel.HParams
' SemiSupervisedModel *-- SemiSupervisedModel.HParams
' SelfSupervisedModel *-- SelfSupervisedModel.HParams
' MultiHeadModel *-- MultiHeadModel.HParams
SelfSupervisedModel "1" o-- "many" aux_tasks.AuxiliaryTask
' BaseMethod "1" *--> "1" BaseModel : uses
MultiHeadModel "1" *-- "many" OutputHead
' MultiHeadModel "1" *-- "1" OutputHead

}
@enduml


================================================
FILE: sequoia/methods/models/fcnet.py
================================================
""" TODO: Take out the dense network from the OutputHead. """
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Type, Union, overload

from torch import nn

from sequoia.common.hparams import HyperParameters, categorical, uniform


class FCNet(nn.Sequential):
    """Fully-connected network."""

    @dataclass
    class HParams(HyperParameters):
        """Hyper-parameters of a fully-connected network."""

        available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "elu": nn.ELU,  # No idea what these do, but hey, they are available!
            "gelu": nn.GELU,
            "relu6": nn.ReLU6,
        }
        # Number of hidden layers in the output head.
        hidden_layers: int = uniform(0, 10, default=3)
        # Number of neurons in each hidden layer of the output head.
        # If a single value is given, than each of the `hidden_layers` layers
        # will have that number of neurons.
        # If `n > 1` values are given, then `hidden_layers` must either be 0 or
        # `n`, otherwise a RuntimeError will be raised.
        hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
        activation: Type[nn.Module] = categorical(available_activations, default=nn.Tanh)
        # Dropout probability. Dropout is applied after each layer.
        # Set to None or 0 for no dropout.
        # TODO: Not sure if this is how it's typically used. Need to check.
        dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)

        def __post_init__(self):
            super().__post_init__()
            if isinstance(self.activation, str):
                self.activation = self.available_activations[self.activation.lower()]

            if isinstance(self.hidden_neurons, int):
                self.hidden_neurons = [self.hidden_neurons]

            # no value passed to --hidden_layers
            if self.hidden_layers == 0:
                if len(self.hidden_neurons) == 1:
                    # Default Setting: No hidden layers.
                    self.hidden_neurons = []
                elif len(self.hidden_neurons) > 1:
                    # Set the number of hidden layers to the number of passed values.
                    self.hidden_layers = len(self.hidden_neurons)
            elif self.hidden_layers > 0 and len(self.hidden_neurons) == 1:
                # Duplicate that value for each of the `hidden_layers` layers.
                self.hidden_neurons *= self.hidden_layers
            elif self.hidden_layers == 1 and not self.hidden_neurons:
                self.hidden_layers = 0

            if self.hidden_layers != len(self.hidden_neurons):
                raise RuntimeError(
                    f"Invalid values: hidden_layers ({self.hidden_layers}) != "
                    f"len(hidden_neurons) ({len(self.hidden_neurons)})."
                )

    @overload
    def __init__(self, in_features: int, out_features: int, hparams: HParams = None):
        ...

    @overload
    def __init__(
        self,
        in_features: int,
        out_features: int,
        hidden_layers: int = 1,
        hidden_neurons: List[int] = None,
        activation: Type[nn.Module] = nn.Tanh,
    ):
        ...

    def __init__(self, in_features: int, out_features: int, hparams: HParams = None, **kwargs):
        self.in_features = in_features
        self.out_features = out_features
        self.hparams = hparams or self.HParams(**kwargs)
        hidden_layers: List[nn.Module] = []
        output_size = out_features
        assert isinstance(self.hparams.hidden_neurons, list)
        for i, neurons in enumerate(self.hparams.hidden_neurons):
            out_features = neurons
            if self.hparams.dropout_prob:
                hidden_layers.append(nn.Dropout(p=self.hparams.dropout_prob))
            hidden_layers.append(nn.Linear(in_features, out_features))
            hidden_layers.append(self.hparams.activation())
            in_features = out_features  # next input size is output size of prev.
        super().__init__(nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size))

    # TODO: IDEA: use @singledispatchmethod to add a `forward` implementation
    # for mapping input space to output space.
    # def forward(self, input: Any)


================================================
FILE: sequoia/methods/models/forward_pass.py
================================================
""" Typed object that represents the outputs of the forward pass of a model. """

from dataclasses import dataclass
from typing import Any, Optional

from simple_parsing.helpers.flatten import FlattenedAccess
from torch import Tensor

from sequoia.common import Batch
from sequoia.settings.base.objects import Actions, Observations, Rewards


@dataclass(frozen=True)
class ForwardPass(Batch, FlattenedAccess):
    """Typed version of the result of a forward pass through a model.

    FlattenedAccess is pretty cool, but potentially confusing. We can get
    any attributes in the children by getting them directly on the
    parent. So if the `observation` has an `x` attribute, we can get on this
    object directly with `self.x`, and it will fetch the attribute from the
    observation.
    """

    observations: Observations
    representations: Tensor
    actions: Actions
    rewards: Optional[Rewards] = None
    # Note: Might be annoying later if there is a need for subclasses of ForwardPass,
    # since dataclass fields without a default value can't follow fields that have one.

    @property
    def h_x(self) -> Any:
        return self.representations


================================================
FILE: sequoia/methods/models/output_heads/__init__.py
================================================
from .classification_head import ClassificationHead
from .output_head import OutputHead
from .regression_head import RegressionHead
from .rl import ActorCriticHead, PolicyHead


================================================
FILE: sequoia/methods/models/output_heads/classification_head.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Type, Union

import gym
import torch
from gym import spaces
from torch import LongTensor, Tensor, nn

from sequoia.common import ClassificationMetrics, Loss
from sequoia.common.hparams import categorical, uniform
from sequoia.settings import Actions, Observations, Rewards

from ..fcnet import FCNet
from ..forward_pass import ForwardPass
from .output_head import OutputHead

# TODO: This is based on 'Actions' which is currently basically the same for all settings
# However, there should probably have a different `Action` class on a
# IncrementalSLSetting("mnist") vs IncrementalSLSetting("some_regression_dataset")!
# IDEA: What if Settings were actually meta-classes, where the 'instances' were for a
# particular choice of dataset? (e.g. `IncrementalSLSetting("mnist")` -> <type SplitMnistSetting>)
# This would maybe look a bit like the 'fully compositional' approach as well?


@dataclass(frozen=True)
class ClassificationOutput(Actions):
    """Typed dict-like class that represents the 'forward pass'/output of a
    classification head, which correspond to the 'actions' to be sent to the
    environment, in the general formulation.
    """

    y_pred: Union[LongTensor, Tensor]
    logits: Tensor

    @property
    def action(self) -> LongTensor:
        return self.y_pred

    @property
    def y_pred_log_prob(self) -> Tensor:
        """returns the log probabilities for the chosen actions/predictions."""
        return self.logits[:, self.y_pred]

    @property
    def y_pred_prob(self) -> Tensor:
        """returns the log probabilities for the chosen actions/predictions."""
        return self.probabilities[self.y_pred]

    @property
    def probabilities(self) -> Tensor:
        """Returns the normalized probabilies for each class, i.e. the
        softmax-ed version of `self.logits`.
        """
        return self.logits.softmax(-1)


class ClassificationHead(OutputHead):
    @dataclass
    class HParams(FCNet.HParams, OutputHead.HParams):
        """Hyper-parameters of the OutputHead used for classification."""

        # NOTE: These hparams were basically copied over from FCNet.HParams, just so its a
        # bit more visible.

        available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "elu": nn.ELU,  # No idea what these do, but hey, they are available!
            "gelu": nn.GELU,
            "relu6": nn.ReLU6,
        }
        # Number of hidden layers in the output head.
        hidden_layers: int = uniform(0, 3, default=0)
        # Number of neurons in each hidden layer of the output head.
        # If a single value is given, than each of the `hidden_layers` layers
        # will have that number of neurons.
        # If `n > 1` values are given, then `hidden_layers` must either be 0 or
        # `n`, otherwise a RuntimeError will be raised.
        hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
        activation: Type[nn.Module] = categorical(available_activations, default=nn.Tanh)
        # Dropout probability. Dropout is applied after each layer.
        # Set to None or 0 for no dropout.
        # TODO: Not sure if this is how it's typically used. Need to check.
        dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)

    def __init__(
        self,
        input_space: gym.Space,
        action_space: gym.Space,
        reward_space: gym.Space = None,
        hparams: "ClassificationHead.HParams" = None,
        name: str = "classification",
    ):
        super().__init__(
            input_space=input_space,
            action_space=action_space,
            reward_space=reward_space,
            hparams=hparams,
            name=name,
        )
        self.hparams: ClassificationHead.HParams

        assert isinstance(action_space, spaces.Discrete)
        output_size = action_space.n
        self.dense = FCNet(
            in_features=self.input_size,
            out_features=output_size,
            hparams=self.hparams,
        )
        # if output_size == 2:
        #     # TODO: Should we be using this loss instead?
        #     self.loss_fn = nn.BCEWithLogitsLoss()
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, observations: Observations, representations: Tensor) -> ClassificationOutput:
        # TODO: This should probably take in a dict and return a dict, or something like that?
        # TODO: We should maybe convert this to also return a dict instead
        # of a Tensor, just to be consistent with everything else. This could
        # also maybe help with having multiple different output heads, each
        # having a different name and giving back a dictionary of their own
        # forward pass tensors (if needed) and predictions?
        logits = self.dense(representations)
        y_pred = logits.argmax(dim=-1)
        return ClassificationOutput(
            logits=logits,
            y_pred=y_pred,
        )

    def get_loss(
        self, forward_pass: ForwardPass, actions: ClassificationOutput, rewards: Rewards
    ) -> Loss:
        logits: Tensor = actions.logits
        y_pred: Tensor = actions.y_pred
        rewards = rewards.to(logits.device)

        y: Tensor = rewards.y

        n_classes = logits.shape[-1]
        # Could remove these: just used for debugging.
        assert len(y.shape) == 1, y.shape
        assert not torch.is_floating_point(y), y.dtype
        assert 0 <= y.min(), y
        assert y.max() < n_classes, y

        loss = self.loss_fn(logits, y)

        assert loss.shape == ()
        metrics = ClassificationMetrics(y_pred=logits, y=y)

        assert self.name, "Output Heads should have a name!"
        loss_object = Loss(
            name=self.name,
            loss=loss,
            # NOTE: we're passing the tensors to the Loss object because we let
            # it create the Metrics for us automatically.
            metrics={self.name: metrics},
        )
        return loss_object


================================================
FILE: sequoia/methods/models/output_heads/output_head.py
================================================
""" Abstract base class for an output head of the BaseModel. """
import dataclasses
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import ClassVar, List, Sequence, Type

import gym
import numpy as np
from gym import spaces
from gym.spaces.utils import flatdim
from torch import Tensor, nn
from torch.nn import Flatten  # type: ignore
from torch.optim.optimizer import Optimizer

from sequoia.common.hparams import HyperParameters
from sequoia.common.loss import Loss
from sequoia.settings import Actions, Rewards, Setting
from sequoia.utils import Parseable, get_logger

from ..forward_pass import ForwardPass

logger = get_logger(__name__)


class OutputHead(nn.Module, ABC):
    """Module for the output head of the model.

    This output head is meant for classification, but you could inherit from it
    and customize it for doing something different like RL or reconstruction,
    for instance.
    """

    # TODO: Rename this to 'output' and create some ClassificationHead,
    # RegressionHead, ValueHead, etc. subclasses with the corresponding names.
    name: ClassVar[str] = "classification"

    # Reference to the optimizer of the BaseModel.
    base_model_optimizer: ClassVar[Optimizer]

    @dataclass
    class HParams(HyperParameters, Parseable):
        """Hyperparameters of the output head."""

    def __init__(
        self,
        input_space: gym.Space,
        action_space: gym.Space,
        reward_space: gym.Space = None,
        hparams: "OutputHead.HParams" = None,
        name: str = "",
    ):
        super().__init__()

        self.input_space = input_space
        self.action_space = action_space
        self.reward_space = reward_space or spaces.Box(-np.inf, np.inf, ())
        self.input_size = flatdim(input_space)
        self.hparams = hparams or self.HParams()
        if not isinstance(self.hparams, self.HParams):
            # Upgrade the hparams to the right type, if needed.
            self.hparams = self.upgrade_hparams()
        self.name = name or type(self).name

    def make_dense_network(
        self,
        in_features: int,
        hidden_neurons: Sequence[int],
        out_features: int,
        activation: Type[nn.Module] = nn.ReLU,
    ):
        hidden_layers: List[nn.Module] = []
        output_size = out_features
        for i, neurons in enumerate(hidden_neurons):
            out_features = neurons
            hidden_layers.append(nn.Linear(in_features, out_features))
            hidden_layers.append(activation())
            in_features = out_features  # next input size is output size of prev.

        return nn.Sequential(nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size))

    @abstractmethod
    def forward(
        self, observations: Setting.Observations, representations: Tensor
    ) -> Setting.Actions:
        """Given the observations and their representations, produce "actions".

        Parameters
        ----------
        observations : Observations
            Object containing the input examples.
        representations : Any
            The results of encoding the input examples.

        Returns
        -------
        Actions
            An object containing the action to take, and which can be used to
            calculate the loss later on.
        """

    @abstractmethod
    def get_loss(self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards) -> Loss:
        """Given the forward pass,(a dict-like object that includes the
        observations, representations and actions, the actions produced by this
        output head and the resulting rewards, returns a Loss to use.
        """

    def clear_all_buffers(self) -> None:
        """Optional method that gets called when using multiple output heads, to
        prevent keeping stale gradients around after the model that produced them gets
        updated during training.
        """

    def upgrade_hparams(self):
        """Upgrades the hparams at `self.hparams` to the right type for this
        output head (`type(self).HParams`), filling in any missing values by
        parsing them from the command-line.

        Returns
        -------
        type(self).HParams
            Hparams of the type `self.HParams`, with the original values
            preserved and any new values parsed from the command-line.
        """
        # NOTE: This (getting the wrong hparams class) could happen for
        # instance when parsing a BaseMethod from the command-line, the
        # default type of hparams on the method is BaseModel.HParams,
        # whose `output_head` field doesn't have the right type exactly.
        current_hparams = self.hparams.to_dict()
        # TODO: If a value is not at its current default, keep it.
        default_hparams = self.HParams()
        missing_fields = [
            f.name
            for f in dataclasses.fields(self.HParams)
            if f.name not in current_hparams
            or current_hparams[f.name] == getattr(type(self.hparams)(), f.name, None)
            or current_hparams[f.name] == getattr(default_hparams, f.name)
        ]
        logger.warning(
            RuntimeWarning(
                f"Upgrading the hparams from type {type(self.hparams)} to "
                f"type {self.HParams}. This will try to fetch the values for "
                f"the missing fields {missing_fields} from the command-line. "
            )
        )
        # Get the missing values

        if self.hparams._argv:
            return self.HParams.from_args(argv=self.hparams._argv, strict=False)
        hparams = self.HParams.from_args(argv=self.hparams._argv, strict=False)
        for missing_field in missing_fields:
            current_hparams[missing_field] = getattr(hparams, missing_field)
        return self.HParams(**current_hparams)


================================================
FILE: sequoia/methods/models/output_heads/regression_head.py
================================================
from dataclasses import dataclass
from typing import List

import gym
from gym import spaces
from torch import Tensor, nn

from sequoia.common import Loss, RegressionMetrics
from sequoia.settings import Actions, Observations, Rewards
from sequoia.utils.utils import prod

from ..fcnet import FCNet
from ..forward_pass import ForwardPass
from .output_head import OutputHead


class RegressionHead(OutputHead):
    """Output head used for regression problems."""

    @dataclass
    class HParams(FCNet.HParams, OutputHead.HParams):
        """Hyper-parameters of the regression output head."""

    def __init__(
        self,
        input_space: gym.Space,
        action_space: gym.Space,
        reward_space: gym.Space = None,
        hparams: OutputHead.HParams = None,
        name: str = "regression",
    ):
        assert isinstance(action_space, spaces.Box)
        if len(action_space.shape) > 1:
            raise NotImplementedError(
                f"TODO: Regression head doesn't support output shapes that are "
                f"more than 1d for atm, (output space: {action_space})."
            )
            # TODO: Add support for something like a "decoder head" (maybe as a
            # subclass of RegressionHead)?
        super().__init__(
            input_space=input_space,
            action_space=action_space,
            reward_space=reward_space,
            hparams=hparams,
            name=name,
        )
        assert isinstance(action_space, spaces.Box)
        output_size = prod(action_space.shape)

        hidden_layers: List[nn.Module] = []
        in_features = self.input_size
        for i, neurons in enumerate(self.hparams.hidden_neurons):
            out_features = neurons
            hidden_layers.append(nn.Linear(in_features, out_features))
            hidden_layers.append(nn.ReLU())
            in_features = out_features  # next input size is output size of prev.

        self.dense = nn.Sequential(
            nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size)
        )
        self.loss_fn = nn.MSELoss()

    def forward(self, observations: Observations, representations: Tensor) -> Actions:
        y_pred = self.dense(representations)
        return Actions(y_pred)

    def get_loss(self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards) -> Loss:
        actions: Actions = forward_pass.actions
        y_pred: Tensor = actions.y_pred
        y: Tensor = rewards.y

        loss = self.loss_fn(y_pred, y)
        metrics = RegressionMetrics(y_pred=y_pred, y=y)

        assert self.name, "Output Heads should have a name!"
        loss = Loss(
            name=self.name,
            loss=loss,
            # NOTE: we're passing the tensors to the Loss object because we let
            # it create the Metrics for us automatically.
            metrics={self.name: metrics},
        )
        return loss


================================================
FILE: sequoia/methods/models/output_heads/rl/__init__.py
================================================
from .actor_critic_head import ActorCriticHead
from .policy_head import PolicyHead


================================================
FILE: sequoia/methods/models/output_heads/rl/actor_critic_head.py
================================================
""" An output head for RL based on Advantage Actor Critic.

NOTE: This is the 'online' version of an Advantage Actor Critic, based
on the following blog:

https://medium.com/deeplearningmadeeasy/advantage-actor-critic-a2c-implementation-944e98616b

"""

from dataclasses import dataclass
from typing import Optional, Tuple

import torch
from gym import spaces
from gym.spaces.utils import flatdim
from torch import Tensor, nn

from sequoia.common import Loss
from sequoia.settings import ContinualRLSetting
from sequoia.utils import get_logger

from ...forward_pass import ForwardPass
from ..classification_head import ClassificationHead
from .policy_head import Categorical, PolicyHeadOutput

logger = get_logger(__name__)


class ActorCriticHead(ClassificationHead):
    @dataclass
    class HParams(ClassificationHead.HParams):
        """Hyper-parameters of the Actor-Critic head."""

        gamma: float = 0.95
        learning_rate: float = 1e-3

    def __init__(
        self,
        input_space: spaces.Space,
        action_space: spaces.Discrete,
        reward_space: spaces.Box,
        hparams: "ActorCriticHead.HParams" = None,
        name: str = "actor_critic",
    ):
        assert isinstance(action_space, spaces.Discrete), "Only support discrete space for now."
        super().__init__(
            input_space=input_space,
            action_space=action_space,
            reward_space=reward_space,
            hparams=hparams,
            name=name,
        )
        if not isinstance(self.hparams, self.HParams):
            self.hparams = self.upgrade_hparams()

        action_dims = flatdim(action_space)

        # Critic takes in state-action pairs? or just state?
        self.critic_input_dims = self.input_size
        # self.critic_input_dims = self.input_size + action_dims
        self.critic_output_dims = 1
        self.critic = nn.Sequential(
            # Lambda(concat_obs_and_action),
            nn.Flatten(),
            nn.Linear(self.critic_input_dims, 32),
            nn.ReLU(),
            nn.Linear(32, self.critic_output_dims),
        )
        self.actor_input_dims = self.input_size
        self.actor_output_dims = action_dims
        self.actor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.actor_input_dims, 32),
            nn.ReLU(),
            nn.Linear(32, self.actor_output_dims),
        )
        self._current_state: Optional[Tensor] = None
        self._previous_state: Optional[Tensor] = None
        self._step = 0

        self.optimizer = torch.optim.Adam(self.actor.parameters(), lr=self.hparams.learning_rate)
        self.optimizer_critic = torch.optim.Adam(
            self.critic.parameters(), lr=self.hparams.learning_rate
        )

    def forward(
        self, observations: ContinualRLSetting.Observations, representations: Tensor
    ) -> PolicyHeadOutput:
        # NOTE: Here we could probably use either as the 'state':
        # state = observations.x
        # state = representations
        representations = representations.float()
        if len(representations.shape) != 2:
            representations = representations.reshape([-1, self.actor_input_dims])

        self._previous_state = self._current_state
        self._current_state = representations

        # TODO: Actually implement the actor-critic forward pass.
        # predicted_reward = self.critic([state, action])
        # Do we want to detach the representations? or not?

        logits = self.actor(representations)
        # The policy is the distribution over actions given the current state.
        action_dist = Categorical(logits=logits)

        if action_dist.has_rsample:
            sample = action_dist.rsample()
        else:
            sample = action_dist.sample()

        actions = PolicyHeadOutput(
            y_pred=sample,
            logits=logits,
            action_dist=action_dist,
        )
        return actions

    def get_loss(
        self,
        forward_pass: ForwardPass,
        actions: PolicyHeadOutput,
        rewards: ContinualRLSetting.Rewards,
    ) -> Loss:
        action_dist: Categorical = actions.action_dist

        rewards = rewards.to(device=actions.device)
        env_reward = torch.as_tensor(rewards.y, device=actions.device)

        observations: ContinualRLSetting.Observations = forward_pass.observations
        done = observations.done
        assert done is not None, "Need the end-of-episode signal!"
        done = torch.as_tensor(done, device=actions.device)
        assert self._current_state is not None
        if self._previous_state is None:
            # Only allow this once!
            assert self._step == 0
            self._previous_state = self._current_state
        self._step += 1

        # TODO: Need to detach something here, right?
        advantage: Tensor = (
            env_reward
            + (~done) * self.hparams.gamma * self.critic(self._current_state)
            - self.critic(self._previous_state)  # detach previous representations?
        )

        total_loss = Loss(self.name)
        if self.training:
            self.optimizer_critic.zero_grad()
        critic_loss_tensor = (advantage**2).mean()
        critic_loss = Loss("critic", loss=critic_loss_tensor)
        if self.training:
            critic_loss_tensor.backward()
            self.optimizer_critic.step()

        total_loss += critic_loss.detach()

        if self.training:
            self.optimizer.zero_grad()
        actor_loss_tensor = -action_dist.log_prob(actions.action) * advantage.detach()
        actor_loss_tensor = actor_loss_tensor.mean()
        actor_loss = Loss("actor", loss=actor_loss_tensor)
        if self.training:
            actor_loss_tensor.backward()
            self.optimizer.step()

        total_loss += actor_loss.detach()

        return total_loss


def concat_obs_and_action(observation_action: Tuple[Tensor, Tensor]) -> Tensor:
    observation, action = observation_action
    batch_size = observation.shape[0]
    observation = observation.reshape([batch_size, -1])
    action = action.reshape([batch_size, -1])
    return torch.cat([observation, action], dim=-1)


================================================
FILE: sequoia/methods/models/output_heads/rl/episodic_a2c.py
================================================
""" TODO: IDEA: Similar to ActorCriticHead, but episodic, i.e. only gives a Loss at
the end of the episode, rather than at each step.
"""

from dataclasses import dataclass
from typing import ClassVar, Deque, List, Optional

import numpy as np
import torch
from gym import spaces
from torch import Tensor, nn
from torch.nn import functional as F

from sequoia.common import Loss
from sequoia.common.hparams import categorical, uniform
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings import ContinualRLSetting
from sequoia.settings.base import Rewards
from sequoia.utils import get_logger

from .policy_head import PolicyHead, PolicyHeadOutput, normalize

logger = get_logger(__name__)


@dataclass(frozen=True)
class A2CHeadOutput(PolicyHeadOutput):
    """Output produced by the A2C output head."""

    # The value estimate coming from the critic.
    value: Tensor


class EpisodicA2C(PolicyHead):
    """Advantage-Actor-Critic output head that produces a loss only at end of
    episode.

    TODO: This could actually produce a loss every N steps, rather than just at
    the end of the episode.
    """

    name: ClassVar[str] = "episodic_a2c"

    @dataclass
    class HParams(PolicyHead.HParams):
        """Hyper-parameters of the episodic A2C output head."""

        # Wether to normalize the advantages for each episode.
        normalize_advantages: bool = categorical(True, False, default=False)

        actor_loss_coef: float = uniform(0.1, 1, default=0.5)
        critic_loss_coef: float = uniform(0.1, 1, default=0.5)
        entropy_loss_coef: float = uniform(0, 1, default=0.1)

        # Maximum norm of the policy gradient.
        max_policy_grad_norm: Optional[float] = None

        # The discount factor.
        gamma: float = uniform(0.9, 0.999, default=0.99)

    def __init__(
        self,
        input_space: spaces.Box,
        action_space: spaces.Discrete,
        reward_space: spaces.Box,
        hparams: HParams = None,
        name: str = "episodic_a2c",
    ):
        super().__init__(
            input_space=input_space,
            action_space=action_space,
            reward_space=reward_space,
            hparams=hparams,
            name=name,
        )
        self.hparams: EpisodicA2C.HParams
        # Critic takes in state-action pairs? or just state?
        self.critic_input_dims = self.input_size
        # self.critic_input_dims = self.input_size + action_dims
        self.critic_output_dims = 1
        self.critic = self.make_dense_network(
            in_features=self.critic_input_dims,
            hidden_neurons=self.hparams.hidden_neurons,
            out_features=self.critic_output_dims,
            activation=self.hparams.activation,
        )
        self.actions: List[Deque[A2CHeadOutput]]
        self._current_state: Optional[Tensor] = None
        self._previous_state: Optional[Tensor] = None
        self._step = 0

    @property
    def actor(self) -> nn.Module:
        return self.dense

    def forward(
        self, observations: ContinualRLSetting.Observations, representations: Tensor
    ) -> A2CHeadOutput:
        actions: PolicyHeadOutput = super().forward(observations, representations)
        # TODO: Shouldn't the critic also take the actor's action as an input?
        value = self.critic(representations)
        # We just need to add the value to the actions of the PolicyHead.
        # This works, because `self.actor` :== `self.dense`, which is what's used by
        # the PolicyHead.
        actions = A2CHeadOutput(
            y_pred=actions.y_pred,
            logits=actions.logits,
            action_dist=actions.action_dist,
            value=value,
        )
        return actions

    def num_stored_steps(self, env_index: int) -> Optional[int]:
        """Returns the number of steps stored in the buffer for the given
        environment index.

        If there are no buffers for the given env, returns None
        """
        if not self.actions or env_index >= len(self.actions):
            return None
        return len(self.actions[env_index])

    def get_episode_loss(self, env_index: int, done: bool) -> Optional[Loss]:
        # IDEA: Actually, now that I think about it, instead of detaching the
        # tensors, we could instead use the critic's 'value' estimate and get a
        # loss for that incomplete episode using the tensors in the buffer,
        # rather than detaching them!

        if not done:
            return None

        # TODO: Add something like a 'num_steps_since_update' for each env? (it
        # would actually be a num_steps_since_backward)
        # if self.num_steps_since_update?
        n_stored_steps = self.num_stored_steps(env_index)
        if n_stored_steps < 5:
            # For now, we only give back a loss at the end of the episode.
            # TODO: Test if giving back a loss at each step or every few steps
            # would work better!
            logger.warning(
                RuntimeWarning(
                    f"Returning None as the episode loss, because only have "
                    f"{n_stored_steps} steps stored for that environment."
                )
            )
            return None

        inputs: Tensor
        actions: A2CHeadOutput
        rewards: Rewards
        inputs, actions, rewards = self.stack_buffers(env_index)
        logits: Tensor = actions.logits
        action_log_probs: Tensor = actions.action_log_prob
        values: Tensor = actions.value
        assert rewards.y is not None
        episode_rewards: Tensor = rewards.y

        # target values are calculated backward
        # it's super important to handle correctly done states,
        # for those cases we want our to target to be equal to the reward only
        episode_length = len(episode_rewards)
        dones = torch.zeros(episode_length, dtype=torch.bool)
        dones[-1] = bool(done)

        returns = self.get_returns(episode_rewards, gamma=self.hparams.gamma).type_as(values)
        advantages = returns - values

        # Normalize advantage (not present in the original implementation)
        if self.hparams.normalize_advantages:
            advantages = normalize(advantages)

        # Create the Loss to be returned.
        loss = Loss(self.name)

        # Policy gradient loss (actor loss)
        policy_gradient_loss = -(advantages.detach() * action_log_probs).mean()
        actor_loss = Loss("actor", policy_gradient_loss)
        loss += self.hparams.actor_loss_coef * actor_loss

        # Value loss: Try to get the critic's values close to the actual return,
        # which means the advantages should be close to zero.
        value_loss_tensor = F.mse_loss(values, returns.reshape(values.shape))
        critic_loss = Loss("critic", value_loss_tensor)
        loss += self.hparams.critic_loss_coef * critic_loss

        # Entropy loss, to "favor exploration".
        entropy_loss_tensor = -actions.action_dist.entropy().mean()
        entropy_loss = Loss("entropy", entropy_loss_tensor)
        loss += self.hparams.entropy_loss_coef * entropy_loss
        if done:
            episode_rewards_array = episode_rewards.reshape([-1])
            loss.metric = EpisodeMetrics(
                n_samples=1,
                mean_episode_reward=float(episode_rewards_array.sum()),
                mean_episode_length=len(episode_rewards_array),
            )
        loss.metrics["gradient_usage"] = self.get_gradient_usage_metrics(env_index)
        return loss

    def optimizer_step(self):
        # Clip grad norm if desired.
        if self.hparams.max_policy_grad_norm is not None:
            original_norm: Tensor = torch.nn.utils.clip_grad_norm_(
                self.actor.parameters(),
                self.hparams.max_policy_grad_norm,
            )
            self.loss.metrics["policy_gradient_norm"] = original_norm.item()
        super().optimizer_step()


def compute_returns_and_advantage(self, last_values: Tensor, dones: np.ndarray) -> None:
    """
    TODO: Adapting this snippet from SB3's common/buffers.py RolloutBuffer.

    Post-processing step: compute the returns (sum of discounted rewards)
    and GAE advantage.
    Adapted from Stable-Baselines PPO2.

    Uses Generalized Advantage Estimation (https://arxiv.org/abs/1506.02438)
    to compute the advantage. To obtain vanilla advantage (A(s) = R - V(S))
    where R is the discounted reward with value bootstrap,
    set ``gae_lambda=1.0`` during initialization.

    :param last_values:
    :param dones:

    """
    buffer_size: int = self.buffer_size
    dones: np.ndarray = self.dones
    rewards: np.ndarray = self.rewards
    values: np.ndarray = self.values
    gamma: float = self.gamma
    gae_lambda: float = 1.0
    # convert to numpy
    last_values = last_values.clone().cpu().numpy().flatten()
    advantages = np.zeros_like(rewards)

    last_gae_lam = 0
    for step in reversed(range(buffer_size)):
        if step == buffer_size - 1:
            next_non_terminal = 1.0 - dones
            next_values = last_values
        else:
            next_non_terminal = 1.0 - dones[step + 1]
            next_values = values[step + 1]
        delta = rewards[step] + gamma * next_values * next_non_terminal - values[step]
        last_gae_lam = delta + gamma * gae_lambda * next_non_terminal * last_gae_lam
        self.advantages[step] = last_gae_lam
    self.returns = self.advantages + self.values


================================================
FILE: sequoia/methods/models/output_heads/rl/episodic_a2c_test.py
================================================
from functools import partial
from typing import Callable, Optional, Sequence

import gym
import numpy as np
import pytest
import torch
from gym import spaces
from gym.spaces.utils import flatdim
from gym.vector import SyncVectorEnv
from gym.vector.utils import batch_space
from torch import Tensor, nn

from sequoia.common.gym_wrappers import AddDoneToObservation, ConvertToFromTensors, EnvDataset
from sequoia.common.loss import Loss
from sequoia.conftest import DummyEnvironment
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.settings.rl.continual import ContinualRLSetting

from .episodic_a2c import EpisodicA2C
from .policy_head import PolicyHead


class FakeEnvironment(SyncVectorEnv):
    def __init__(
        self,
        env_fn: Callable[[], gym.Env],
        batch_size: int,
        new_episode_length: Callable[[int], int],
        episode_lengths: Sequence[int] = None,
    ):
        super().__init__([env_fn for _ in range(batch_size)])
        self.new_episode_length = new_episode_length
        self.batch_size = batch_size
        self.episode_lengths = np.array(
            episode_lengths or [new_episode_length(i) for i in range(self.num_envs)]
        )
        self.steps_left_in_episode = self.episode_lengths.copy()

        reward_space = spaces.Box(*self.reward_range, shape=())
        self.single_reward_space = reward_space
        self.reward_space = batch_space(reward_space, batch_size)

    def step(self, actions):
        self.steps_left_in_episode[:] -= 1

        # obs, reward, done, info = super().step(actions)
        obs = self.observation_space.sample()
        reward = np.ones(self.batch_size)

        assert not any(self.steps_left_in_episode < 0)
        done = self.steps_left_in_episode == 0

        info = np.array([{} for _ in range(self.batch_size)])

        for env_index, env_done in enumerate(done):
            if env_done:
                next_episode_length = self.new_episode_length(env_index)
                self.episode_lengths[env_index] = next_episode_length
                self.steps_left_in_episode[env_index] = next_episode_length

        return obs, reward, done, info


@pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)")
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_with_controllable_episode_lengths(batch_size: int, monkeypatch):
    """TODO: Test out the EpisodicA2C output head in a very controlled environment,
    where we know exactly the lengths of each episode.
    """
    env = FakeEnvironment(
        partial(gym.make, "CartPole-v0"),
        batch_size=batch_size,
        episode_lengths=[5, *(10 for _ in range(batch_size - 1))],
        new_episode_length=lambda env_index: 10,
    )
    env = AddDoneToObservation(env)
    env = ConvertToFromTensors(env)
    env = EnvDataset(env)

    obs_space = env.single_observation_space
    x_dim = flatdim(obs_space["x"])
    # Create some dummy encoder.
    encoder = nn.Linear(x_dim, x_dim)
    representation_space = obs_space["x"]

    output_head = EpisodicA2C(
        input_space=representation_space,
        action_space=env.single_action_space,
        reward_space=env.single_reward_space,
        hparams=PolicyHead.HParams(
            max_episode_window_length=100,
            min_episodes_before_update=1,
            accumulate_losses_before_backward=False,
        ),
    )
    # TODO: Simplify the loss function somehow using monkeypatch so we know exactly what
    # the loss should be at each step.

    batch_size = env.batch_size

    obs = env.reset()
    step_done = np.zeros(batch_size, dtype=np.bool)

    for step in range(200):
        x, obs_done = obs

        # The done from the obs should always be the same as the 'done' from the 'step' function.
        assert np.array_equal(obs_done, step_done)

        representations = encoder(x)
        observations = ContinualRLSetting.Observations(
            x=x,
            done=obs_done,
        )

        actions_obj = output_head(observations, representations)
        actions = actions_obj.y_pred

        # TODO: kinda useless to wrap a single tensor in an object..
        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=actions,
        )
        obs, rewards, step_done, info = env.step(actions)

        rewards_obj = ContinualRLSetting.Rewards(y=rewards)
        loss = output_head.get_loss(
            forward_pass=forward_pass,
            actions=actions_obj,
            rewards=rewards_obj,
        )
        print(f"Step {step}")
        print(f"num episodes since update: {output_head.num_episodes_since_update}")
        print(f"steps left in episode: {env.steps_left_in_episode}")
        print(f"Loss for that step: {loss}")

        if any(obs_done):
            assert loss != 0.0

        if step == 5.0:
            # Env 0 first episode from steps 0 -> 5
            assert loss.loss == 5.0
            assert loss.metrics["gradient_usage"].used_gradients == 5.0
            assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
        elif step == 10:
            # Envs[1:batch_size], first episode, from steps 0 -> 10
            # NOTE: At this point, both envs have reached the required number of episodes.
            # This means that the gradient usage on the next time any env reaches
            # an end-of-episode will be one less than the total number of items.
            assert loss.loss == 10.0 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].used_gradients == 10.0 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
        elif step == 15:
            # Env 0 second episode from steps 5 -> 15
            assert loss.loss == 10.0
            assert loss.metrics["gradient_usage"].used_gradients == 4
            assert loss.metrics["gradient_usage"].wasted_gradients == 6

        elif step == 20:
            # Envs[1:batch_size]: second episode, from steps 0 -> 10
            # NOTE: At this point, both envs have reached the required number of episodes.
            # This means that the gradient usage on the next time any env reaches
            # an end-of-episode will be one less than the total number of items.
            assert loss.loss == 10.0 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)

        elif step == 25:
            # Env 0 third episode from steps 5 -> 15
            assert loss.loss == 10.0
            assert loss.metrics["gradient_usage"].used_gradients == 4
            assert loss.metrics["gradient_usage"].wasted_gradients == 6

        elif step > 0 and step % 10 == 0:
            # Same pattern as step 20 above
            assert loss.loss == 10.0 * (batch_size - 1), step
            assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)

        elif step > 0 and step % 5 == 0:
            # Same pattern as step 25 above
            assert loss.loss == 10.0
            assert loss.metrics["gradient_usage"].used_gradients == 4
            assert loss.metrics["gradient_usage"].wasted_gradients == 6

        else:
            assert loss.loss == 0.0, step


@pytest.mark.parametrize(
    "batch_size",
    [
        1,
        2,
        5,
    ],
)
def test_loss_is_nonzero_at_episode_end(batch_size: int):
    """Test that when stepping through the env, when the episode ends, a
    non-zero loss is returned by the output head.
    """
    with gym.make("CartPole-v0") as temp_env:
        temp_env = AddDoneToObservation(temp_env)
        obs_space = temp_env.observation_space
        action_space = temp_env.action_space
        reward_space = getattr(
            temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
        )

    env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
    env = AddDoneToObservation(env)
    env = ConvertToFromTensors(env)
    env = EnvDataset(env)

    head = EpisodicA2C(
        input_space=obs_space["x"],
        action_space=action_space,
        reward_space=reward_space,
        hparams=EpisodicA2C.HParams(accumulate_losses_before_backward=False),
    )
    head.train()

    env.seed(123)
    obs = env.reset()

    # obs = torch.as_tensor(obs, dtype=torch.float32)

    done = torch.zeros(batch_size, dtype=bool)
    info = np.array([{} for _ in range(batch_size)])
    loss = None

    non_zero_losses = 0

    encoder = nn.Linear(4, 4)
    encoder.train()

    for i in range(100):
        representations = encoder(obs["x"])

        observations = ContinualRLSetting.Observations(
            x=obs["x"],
            done=done,
            # info=info,
        )
        head_output = head.forward(observations, representations=representations)
        actions = head_output.actions.numpy().tolist()
        # actions = np.zeros(batch_size, dtype=int).tolist()

        obs, rewards, done, info = env.step(actions)
        done = torch.as_tensor(done, dtype=bool)
        rewards = ContinualRLSetting.Rewards(rewards)
        assert len(info) == batch_size

        print(f"Step {i}, obs: {obs}, done: {done}, info: {info}")

        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=head_output,
        )
        loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
        print("loss:", loss)

        assert observations.done is not None
        for env_index, env_is_done in enumerate(observations.done):
            if env_is_done:
                print(f"Episode ended for env {env_index} at step {i}")
                assert loss.loss != 0.0
                non_zero_losses += 1
                break
        else:
            print(f"No episode ended on step {i}, expecting no loss.")
            assert loss is None or loss.loss == 0.0

    assert non_zero_losses > 0


@pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)")
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_loss_is_nonzero_at_episode_end_iterate(batch_size: int):
    """Test that when *iterating* through the env (active-dataloader style),
    when the episode ends, a non-zero loss is returned by the output head.
    """
    with gym.make("CartPole-v0") as temp_env:
        temp_env = AddDoneToObservation(temp_env)

        obs_space = temp_env.observation_space
        action_space = temp_env.action_space
        reward_space = getattr(
            temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
        )

    env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
    env = AddDoneToObservation(env)
    env = ConvertToFromTensors(env)
    env = EnvDataset(env)

    head = EpisodicA2C(
        # observation_space=obs_space,
        input_space=obs_space["x"],
        action_space=action_space,
        reward_space=reward_space,
        hparams=EpisodicA2C.HParams(accumulate_losses_before_backward=False),
    )

    env.seed(123)
    non_zero_losses = 0

    for i, obs in zip(range(100), env):
        print(i, obs)
        x = obs["x"]
        done = obs[1]
        representations = x
        assert isinstance(x, Tensor)
        assert isinstance(done, Tensor)
        observations = ContinualRLSetting.Observations(
            x=x,
            done=done,
            # info=info,
        )
        head_output = head.forward(observations, representations=representations)

        actions = head_output.actions.numpy().tolist()
        # actions = np.zeros(batch_size, dtype=int).tolist()

        rewards = env.send(actions)

        # print(f"Step {i}, obs: {obs}, done: {done}")
        assert isinstance(representations, Tensor)
        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=head_output,
        )
        rewards = ContinualRLSetting.Rewards(rewards)
        loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
        print("loss:", loss)

        for env_index, env_is_done in enumerate(observations.done):
            if env_is_done:
                print(f"Episode ended for env {env_index} at step {i}")
                assert loss.total_loss != 0.0
                non_zero_losses += 1
                break
        else:
            print(f"No episode ended on step {i}, expecting no loss.")
            assert loss.total_loss == 0.0

    assert non_zero_losses > 0


@pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)")
@pytest.mark.xfail(reason="TODO: Fix this test")
def test_buffers_are_stacked_correctly(monkeypatch):
    """TODO: Test that when "de-synced" episodes, when fed to the output head,
    get passed, re-stacked correctly, to the get_episode_loss function.
    """
    batch_size = 5

    starting_values = [i for i in range(batch_size)]
    targets = [10 for i in range(batch_size)]

    env = SyncVectorEnv(
        [
            partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
            for start, target in zip(starting_values, targets)
        ]
    )
    obs = env.reset()
    assert obs.tolist() == list(range(batch_size))

    reward_space = spaces.Box(*env.reward_range, shape=())
    output_head = PolicyHead(  # observation_space=spaces.Tuple([env.observation_space,
        #              spaces.Box(False, True, [batch_size], np.bool)]),
        input_space=spaces.Box(0, 1, (1,)),
        action_space=env.single_action_space,
        reward_space=reward_space,
    )
    # Set the max window length, for testing.
    output_head.hparams.max_episode_window_length = 100

    obs = initial_obs = env.reset()
    done = np.zeros(batch_size, dtype=bool)

    obs = torch.from_numpy(obs)
    done = torch.from_numpy(done)

    def mock_get_episode_loss(
        self: PolicyHead,
        env_index: int,
        inputs: Tensor,
        actions: ContinualRLSetting.Observations,
        rewards: ContinualRLSetting.Rewards,
        done: bool,
    ) -> Optional[Loss]:
        print(f"Environment at index {env_index}, episode ended: {done}")
        if done:
            print(f"Full episode: {inputs}")
        else:
            print(f"Episode so far: {inputs}")

        n_observations = len(inputs)

        assert inputs.flatten().tolist() == (env_index + np.arange(n_observations)).tolist()
        if done:
            # Unfortunately, we don't get the final state, because of how
            # VectorEnv works atm.
            assert inputs[-1] == targets[env_index] - 1

    monkeypatch.setattr(PolicyHead, "get_episode_loss", mock_get_episode_loss)

    # perform 10 iterations, incrementing each DummyEnvironment's counter at
    # each step (action of 1).
    # Therefore, at first, the counters should be [0, 1, 2, ... batch-size-1].
    info = [{} for _ in range(batch_size)]

    for step in range(10):
        print(f"Step {step}.")
        # Wrap up the obs to pretend that this is the data coming from a
        # ContinualRLSetting.
        observations = ContinualRLSetting.Observations(x=obs, done=done)  # , info=info)
        # We don't use an encoder for testing, so the representations is just x.
        representations = obs.reshape([batch_size, 1])
        assert observations.task_labels is None

        actions = output_head(observations.float(), representations.float())

        # Wrap things up to pretend like the output head is being used in the
        # BaseModel:

        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=actions,
        )

        action_np = actions.actions_np

        obs, rewards, done, info = env.step(action_np)

        obs = torch.from_numpy(obs)
        rewards = torch.from_numpy(rewards)
        done = torch.from_numpy(done)

        rewards = ContinualRLSetting.Rewards(y=rewards)
        loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)

        # Check the contents of the episode buffers.

        assert len(output_head.representations) == batch_size
        for env_index in range(batch_size):

            # obs_buffer = output_head.observations[env_index]
            representations_buffer = output_head.representations[env_index]
            action_buffer = output_head.actions[env_index]
            reward_buffer = output_head.rewards[env_index]

            if step >= batch_size:
                if step + env_index == targets[env_index]:
                    assert len(representations_buffer) == 1 and output_head.done[env_index] == False
                # if env_index == step - batch_size:
                continue
            assert len(representations_buffer) == step + 1
            # Check to see that the last entry in the episode buffer for this
            # environment corresponds to the slice of the most recent
            # observations/actions/rewards at the index corresponding to this
            # environment.

            # observation_tuple = input_buffer[-1]
            step_action = action_buffer[-1]
            step_reward = reward_buffer[-1]
            # assert observation_tuple.x == observations.x[env_index]
            # assert observation_tuple.task_labels is None
            # assert observation_tuple.done == observations.done[env_index]

            # The last element in the buffer should be the slice in the batch
            # for that environment.
            assert step_action.y_pred == actions.y_pred[env_index]
            assert step_reward.y == rewards.y[env_index]

        if step < batch_size:
            assert obs.tolist() == (np.arange(batch_size) + step + 1).tolist()
        # if step >= batch_size:
        #     if step + env_index == targets[env_index]:
        #         assert done

    # assert False, (obs, rewards, done, info)
    # loss: Loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)


================================================
FILE: sequoia/methods/models/output_heads/rl/policy_head.py
================================================
""" Defines a (hopefully general enough) Output Head class to be used by the
BaseMethod when applied on an RL setting.

NOTE: The training procedure is fundamentally on-policy atm, i.e. the
observation is a single state, not a rollout, and the reward is the
immediate reward at the current step.

Therefore, what we do here is to first split things up and push the
observations/actions/rewards into a per-environment buffer, of max
length `self.hparams.max_episode_window_length`. These buffers get
cleared when starting a new episode in their corresponding environment.

The contents of this buffer are then rearranged and presented to the
`get_episode_loss` method in order to get a loss for the given episode.
The `get_episode_loss` method is also given the environment index, and
is passed a boolean `done` that indicates wether the last
items in the sequences it received mark the end of the episode.

TODO: My hope is that this will allow us to implement RL methods that
need a complete episode in order to give a loss to train with, as well
as methods (like A2C, I think) which can give a Loss even when the
episode isn't over yet.

Also, standard supervised learning could be recovered by setting the
maximum length of the 'episode buffer' to 1, and consider all
observations as final, i.e., when episode length == 1
"""

from collections import deque
from dataclasses import dataclass
from typing import ClassVar, Deque, List, Optional, Sequence, Tuple, TypeVar, Union

import numpy as np
import torch
from gym import spaces
from gym.spaces.utils import flatdim
from simple_parsing import list_field
from torch import Tensor

from sequoia.common import Loss
from sequoia.common.metrics.rl_metrics import EpisodeMetrics, GradientUsageMetric
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.utils.categorical import Categorical
from sequoia.utils.generic_functions import stack
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import flag

from ..classification_head import ClassificationHead, ClassificationOutput

logger = get_logger(__name__)
T = TypeVar("T")


@dataclass(frozen=True)
class PolicyHeadOutput(ClassificationOutput):
    """WIP: Adds the action pdf to ClassificationOutput."""

    # The distribution over the actions, either as a single
    # (batched) distribution or as a list of distributions, one for each
    # environment in the batch.
    action_dist: Categorical

    @property
    def y_pred_prob(self) -> Tensor:
        """returns the probabilities for the chosen actions/predictions."""
        return self.action_dist.probs(self.y_pred)

    @property
    def y_pred_log_prob(self) -> Tensor:
        """returns the log probabilities for the chosen actions/predictions."""
        return self.action_dist.log_prob(self.y_pred)

    @property
    def action_log_prob(self) -> Tensor:
        return self.y_pred_log_prob

    @property
    def action_prob(self) -> Tensor:
        return self.y_pred_log_prob


## NOTE: Since the gym VectorEnvs actually auto-reset the individual
## environments (and also discard the final state, for some weird
## reason), I added a way to save it into the 'info' dict at the key
## 'final_state'. Assuming that the env this output head gets applied
## on adds the info dict to the observations (using the
## AddInfoToObservations wrapper, for instance), then the 'final'
## observation would be stored in the dict for this environment in
## the Observations object, while the 'observation' you get from step
## is the 'initial' observation of the new episode.


class PolicyHead(ClassificationHead):
    """[WIP] Output head for RL settings.

    Uses the REINFORCE algorithm to calculate its loss.

    TODOs/issues:
    - Only currently works with batch_size == 1
    - The buffers are common to training/validation/testing atm..

    """

    name: ClassVar[str] = "policy"

    @dataclass
    class HParams(ClassificationHead.HParams):
        hidden_layers: int = 0
        hidden_neurons: List[int] = list_field()
        # The discount factor for the Return term.
        gamma: float = 0.99

        # The maximum length of the buffer that will hold the most recent
        # states/actions/rewards of the current episode.
        max_episode_window_length: int = 1000

        # Minumum number of epidodes that need to be completed in each env
        # before we update the parameters of the output head.
        min_episodes_before_update: int = 1

        # TODO: Add this mechanism, so that this method could work even when
        # episodes are very long.
        max_steps_between_updates: Optional[int] = None

        # NOTE: Here we have two options:
        # 1- `True`: sum up all the losses and do one larger backward pass,
        # and have `retrain_graph=False`, or
        # 2- `False`: Perform multiple little backward passes, one for each
        # end-of-episode in a single env, w/ `retain_graph=True`.
        # Option 1 is maybe more performant, as it might only require
        # unrolling the graph once, but would use more memory to store all the
        # intermediate graphs.
        accumulate_losses_before_backward: bool = flag(True)

    def __init__(
        self,
        input_space: spaces.Space,
        action_space: spaces.Discrete,
        reward_space: spaces.Box,
        hparams: "PolicyHead.HParams" = None,
        name: str = "policy",
    ):
        assert isinstance(
            input_space, spaces.Box
        ), f"Only support Tensor (box) input space. (got {input_space})."
        assert isinstance(
            action_space, spaces.Discrete
        ), f"Only support discrete action space (got {action_space})."
        assert isinstance(
            reward_space, spaces.Box
        ), f"Reward space should be a Box (scalar rewards) (got {reward_space})."
        super().__init__(
            input_space=input_space,
            action_space=action_space,
            reward_space=reward_space,
            hparams=hparams,
            name=name,
        )
        logger.debug("New Output head with hparams: " + self.hparams.dumps_json(indent="\t"))
        self.hparams: PolicyHead.HParams
        # Type hints for the spaces;
        self.input_space: spaces.Box
        self.action_space: spaces.Discrete
        self.reward_space: spaces.Box

        # List of buffers for each environment that will hold some items.
        # TODO: Won't use the 'observations' anymore, will only use the
        # representations from the encoder, so renaming 'representations' to
        # 'observations' in this case.
        # (Should probably come up with another name so this isn't ambiguous).
        # TODO: Perhaps we should register these as buffers so they get
        # persisted correclty? But then we also need to make sure that the grad
        # stuff would work the same way..
        self.representations: List[Deque[Tensor]] = []
        # self.representations: List[deque] = []
        self.actions: List[Deque[PolicyHeadOutput]] = []
        self.rewards: List[Deque[ContinualRLSetting.Rewards]] = []

        # The actual "internal" loss we use for training.
        self.loss: Loss = Loss(self.name)
        self.batch_size: int = 0

        self.num_episodes_since_update: np.ndarray = np.zeros(1)
        self.num_steps_in_episode: np.ndarray = np.zeros(1)

        self._training: bool = True

        self.device: Optional[Union[str, torch.device]] = None

    def create_buffers(self):
        """Creates the buffers to hold the items from each env."""
        logger.debug(f"Creating buffers (batch size={self.batch_size})")
        logger.debug(f"Maximum buffer length: {self.hparams.max_episode_window_length}")

        self.representations = self._make_buffers()
        self.actions = self._make_buffers()
        self.rewards = self._make_buffers()

        self.num_steps_in_episode = np.zeros(self.batch_size, dtype=int)
        self.num_episodes_since_update = np.zeros(self.batch_size, dtype=int)

    def forward(
        self, observations: ContinualRLSetting.Observations, representations: Tensor
    ) -> PolicyHeadOutput:
        """Forward pass of a Policy head.

        TODO: Do we actually need the observations here? It is here so we have
        access to the 'done' from the env, but do we really need it here? or
        would there be another (cleaner) way to do this?
        """
        if len(representations.shape) < 2:
            # Flatten the representations.
            representations = representations.reshape([-1, flatdim(self.input_space)])

        # Setup the buffers, which will hold the most recent observations,
        # actions and rewards within the current episode for each environment.
        if not self.batch_size:
            self.batch_size = representations.shape[0]
            self.create_buffers()

        representations = representations.float()

        logits = self.dense(representations)

        # The policy is the distribution over actions given the current state.
        action_dist = Categorical(logits=logits)
        sample = action_dist.sample()
        actions = PolicyHeadOutput(
            y_pred=sample,
            logits=logits,
            action_dist=action_dist,
        )
        return actions

    T = TypeVar("T")

    def to(self: T, device: Optional[Union[int, torch.device]] = None, **kwargs) -> T:
        result = super().to(device=device, **kwargs)
        if device is not None:
            result.device = torch.device(device)
        return result

    def get_loss(
        self,
        forward_pass: ForwardPass,
        actions: PolicyHeadOutput,
        rewards: ContinualRLSetting.Rewards,
    ) -> Loss:
        """Given the forward pass, the actions produced by this output head and
        the corresponding rewards for the current step, get a Loss to use for
        training.

        TODO: Replace the `forward_pass` argument with just `observations` and
        `representations` and provide the right (augmented) observations to the
        aux tasks. (Need to design that part later).

        NOTE: If an end of episode was reached in a given environment, we always
        calculate the losses and clear the buffers before adding in the new observation.
        """
        observations: ContinualRLSetting.Observations = forward_pass.observations
        representations: Tensor = forward_pass.representations
        assert self.batch_size, "forward() should have been called before this."

        if not self.hparams.accumulate_losses_before_backward:
            # Reset the loss for the current step, if we're not accumulating it.
            self.loss = Loss(self.name)

        observations = forward_pass.observations
        representations = forward_pass.representations
        assert observations.done is not None, "need the end-of-episode signal"

        # Calculate the loss for each environment.
        for env_index, done in enumerate(observations.done):

            env_loss = self.get_episode_loss(env_index, done=done)

            if env_loss is not None:
                self.loss += env_loss

            if done:
                # End of episode reached in that env!
                if self.training:
                    # BUG: This seems to be failing, during testing:
                    # assert env_loss is not None, (self.name)
                    pass

                self.on_episode_end(env_index)

        if self.batch_size != forward_pass.batch_size:
            raise NotImplementedError(
                "TODO: The batch size changed, because the batch contains different "
                "tasks. The BaseModel isn't yet applicable in the setup where "
                "there are multiple different tasks in the same batch in RL. "
            )
            # IDEA: Need to get access to the 'original' env indices (before slicing),
            # so that even when one more environment is in this task, the other
            # environment's buffers remain at the same index.. Something like a
            # remapping of env indices?
            assert len(representations.shape) == 2, (
                f"Need batched representations, with a shape [16, 128] or similar, but "
                f"representations have shape {representations.shape}."
            )
            self.batch_size = representations.shape[0]
            self.create_buffers()

        for env_index in range(self.batch_size):
            # Take a slice across the first dimension
            # env_observations = get_slice(observations, env_index)
            env_representations = representations[env_index]
            env_actions = actions.slice(env_index)
            # env_actions = actions[env_index, ...] # TODO: Is this nicer?
            env_rewards = rewards.slice(env_index)
            # BUG: Seems to be some issue of things in the buffers not all being on the
            # same device
            # assert self.device is not None
            # # TODO: Should we be storing these tensors in GPU memory though? Not sure if
            # # this makes sense.
            # env_representations = move(env_representations, device=self.device)
            # env_actions = move(env_actions, device=self.device)
            # env_rewards = move(env_rewards, device=self.device)

            self.representations[env_index].append(env_representations)
            self.actions[env_index].append(env_actions)
            self.rewards[env_index].append(env_rewards)

        self.num_steps_in_episode += 1
        # TODO:
        # If we want to accumulate the losses before backward, then we just return self.loss
        # If we DONT want to accumulate the losses before backward, then we do the
        # 'small' backward pass, and return a detached loss.
        if self.hparams.accumulate_losses_before_backward:
            if all(self.num_episodes_since_update >= self.hparams.min_episodes_before_update):
                # Every environment has seen the required number of episodes.
                # We return the accumulated loss, so that the model can do the backward
                # pass and update the weights.
                returned_loss = self.loss
                self.loss = Loss(self.name)
                self.detach_all_buffers()
                self.num_episodes_since_update[:] = 0
                return returned_loss
            return Loss(self.name)

        # Perform the backward pass as soon as a loss is available (with
        # retain_graph=True).
        if all(self.num_episodes_since_update >= self.hparams.min_episodes_before_update):
            # Every environment has seen the required number of episodes.
            # We return the loss for this step, with gradients, to indicate to the
            # Model that it can perform the backward pass and update the weights.
            returned_loss = self.loss
            self.loss = Loss(self.name)
            self.detach_all_buffers()
            self.num_episodes_since_update[:] = 0
            return returned_loss

        if self.loss.requires_grad:
            # Not all environments are done, but we have a Loss from one of them.
            self.loss.backward(retain_graph=True)
            # self.loss will be reset at each step in the `forward` method above.
            return self.loss.detach()

        # TODO: Why is self.loss non-zero here?
        if self.loss.loss != 0.0:
            # BUG: This is a weird edge-case, where at least one env produced
            # a loss, but that loss doesn't require grad.
            # This should only happen if the model isn't in training mode, for
            # instance.
            # assert not self.training, self.loss
            # return self.loss
            pass
        return self.loss

    def on_episode_end(self, env_index: int) -> None:
        self.num_episodes_since_update[env_index] += 1
        self.num_steps_in_episode[env_index] = 0
        self.clear_buffers(env_index)

    def get_episode_loss(self, env_index: int, done: bool) -> Optional[Loss]:
        """Calculate a loss to train with, given the last (up to
        max_episode_window_length) observations/actions/rewards of the current
        episode in the environment at the given index in the batch.

        If `done` is True, then this is for the end of an episode. If `done` is
        False, the episode is still underway.

        NOTE: While the Batch Observations/Actions/Rewards objects usually
        contain the "batches" of data coming from the N different environments,
        now they are actually a sequence of items coming from this single
        environment. For more info on how this is done, see the
        """
        inputs: Tensor
        actions: PolicyHeadOutput
        rewards: ContinualRLSetting.Rewards
        if not done:
            # This particular algorithm (REINFORCE) can't give a loss until the
            # end of the episode is reached.
            return None

        if len(self.actions[env_index]) == 0:
            logger.error(
                f"Weird, asked to get episode loss, but there is " f"nothing in the buffer?"
            )
            return None

        inputs, actions, rewards = self.stack_buffers(env_index)

        episode_length = actions.batch_size
        assert len(inputs) == len(actions.y_pred) == len(rewards.y)

        if episode_length <= 1:
            # TODO: If the episode has len of 1, we can't really get a loss!
            logger.error("Episode is too short!")
            return None

        log_probabilities = actions.y_pred_log_prob
        rewards = rewards.y

        loss_tensor = self.policy_gradient(
            rewards=rewards,
            log_probs=log_probabilities,
            gamma=self.hparams.gamma,
        )
        loss = Loss(self.name, loss_tensor)
        loss.metric = EpisodeMetrics(
            n_samples=1,
            mean_episode_reward=float(rewards.sum()),
            mean_episode_length=len(rewards),
        )
        # TODO: add something like `add_metric(self, metric: Metrics, name: str=None)`
        # to `Loss`.
        loss.metrics["gradient_usage"] = self.get_gradient_usage_metrics(env_index)
        return loss

    def get_gradient_usage_metrics(self, env_index: int) -> GradientUsageMetric:
        """Returns a Metrics object that describes how many of the actions
        from an episode that are used to calculate a loss still have their
        graphs, versus ones that don't have them (due to being created before
        the last model update, and therefore having been detached.)

        Does this by inspecting the contents of `self.actions[env_index]`.
        """
        episode_actions = self.actions[env_index]
        n_stored_items = len(self.actions[env_index])
        n_items_with_grad = sum(v.logits.requires_grad for v in episode_actions)
        n_items_without_grad = n_stored_items - n_items_with_grad
        return GradientUsageMetric(
            used_gradients=n_items_with_grad,
            wasted_gradients=n_items_without_grad,
        )

    @staticmethod
    def get_returns(rewards: Union[Tensor, List[Tensor]], gamma: float) -> Tensor:
        """Calculates the returns, as the sum of discounted future rewards at
        each step.
        """
        return discounted_sum_of_future_rewards(rewards, gamma=gamma)

    @staticmethod
    def policy_gradient(
        rewards: List[float], log_probs: Union[Tensor, List[Tensor]], gamma: float = 0.95
    ):
        """Implementation of the REINFORCE algorithm.

        Adapted from https://medium.com/@thechrisyoon/deriving-policy-gradients-and-implementing-reinforce-f887949bd63

        Parameters
        ----------
        - episode_rewards : List[Tensor]

            The rewards at each step in an episode

        - episode_log_probs : List[Tensor]

            The log probabilities associated with the actions that were taken at
            each step.

        Returns
        -------
        Tensor
            The "vanilla policy gradient" / REINFORCE gradient resulting from
            that episode.
        """
        return vanilla_policy_gradient(rewards, log_probs, gamma=gamma)

    @property
    def training(self) -> bool:
        return self._training

    @training.setter
    def training(self, value: bool) -> None:
        # logger.debug(f"setting training to {value} on the Policy output head")
        if hasattr(self, "_training") and value != self._training:
            before = "train" if self._training else "test"
            after = "train" if value else "test"
            logger.debug(
                f"Clearing buffers, since we're transitioning between from {before}->{after}"
            )
            self.clear_all_buffers()
            self.batch_size = None
            self.num_episodes_since_update[:] = 0
        self._training = value

    def clear_all_buffers(self) -> None:
        if self.batch_size is None:
            assert not self.rewards
            assert not self.representations
            assert not self.actions
            return
        for env_id in range(self.batch_size):
            self.clear_buffers(env_id)
        self.rewards.clear()
        self.representations.clear()
        self.actions.clear()
        self.batch_size = None

    def clear_buffers(self, env_index: int) -> None:
        """Clear the buffers associated with the environment at env_index."""
        self.representations[env_index].clear()
        self.actions[env_index].clear()
        self.rewards[env_index].clear()

    def detach_all_buffers(self):
        if not self.batch_size:
            assert not self.actions
            # No buffers to detach!
            return
        for env_index in range(self.batch_size):
            self.detach_buffers(env_index)

    def detach_buffers(self, env_index: int) -> None:
        """Detach all the tensors in the buffers for a given environment.

        We have to do this when we update the model while an episode in one of
        the enviroment isn't done.
        """
        # detached_representations = map(detach, )
        # detached_actions = map(detach, self.actions[env_index])
        # detached_rewards = map(detach, self.rewards[env_index])
        self.representations[env_index] = self._detach_buffer(self.representations[env_index])
        self.actions[env_index] = self._detach_buffer(self.actions[env_index])
        self.rewards[env_index] = self._detach_buffer(self.rewards[env_index])
        # assert False, (self.representations[0], self.representations[-1])

    def _detach_buffer(self, old_buffer: Sequence[Tensor]) -> deque:
        new_items = self._make_buffer()
        for item in old_buffer:
            detached = item.detach()
            new_items.append(detached)
        return new_items

    def _make_buffer(self, elements: Sequence[T] = None) -> Deque[T]:
        buffer: Deque[T] = deque(maxlen=self.hparams.max_episode_window_length)
        if elements:
            buffer.extend(elements)
        return buffer

    def _make_buffers(self) -> List[deque]:
        return [self._make_buffer() for _ in range(self.batch_size)]

    def stack_buffers(self, env_index: int):
        """Stack the observations/actions/rewards for this env and return them."""
        # episode_observations = tuple(self.observations[env_index])
        episode_representations = tuple(self.representations[env_index])
        episode_actions = tuple(self.actions[env_index])
        episode_rewards = tuple(self.rewards[env_index])
        assert len(episode_representations)
        assert len(episode_actions)
        assert len(episode_rewards)
        # BUG: Need to make sure that all tensors are on the same device:
        # assert self.device is not None
        # episode_representations = [
        #     move(item, device=self.device) for item in episode_representations
        # ]
        # episode_actions = [
        #     move(item, device=self.device) for item in episode_actions
        # ]
        # episode_rewards = [
        #     move(item, device=self.device) for item in episode_rewards
        # ]
        stacked_inputs = stack(episode_representations)
        stacked_actions = stack(episode_actions)
        stacked_rewards = stack(episode_rewards)
        return stacked_inputs, stacked_actions, stacked_rewards


def discounted_sum_of_future_rewards(rewards: Union[Tensor, List[Tensor]], gamma: float) -> Tensor:
    """Calculates the returns, as the sum of discounted future rewards at
    each step.
    """
    T = len(rewards)
    if not isinstance(rewards, Tensor):
        rewards = torch.as_tensor(rewards)
    # Construct a reward matrix, with previous rewards masked out (with each
    # row as a step along the trajectory).
    reward_matrix = rewards.expand([T, T]).triu()
    # Get the gamma matrix (upper triangular), see make_gamma_matrix for
    # more info.
    gamma_matrix = make_gamma_matrix(gamma, T, device=reward_matrix.device)
    # Multiplying by the gamma coefficients gives the discounted rewards.
    discounted_rewards = reward_matrix * gamma_matrix
    # Summing up over time gives the return at each step.
    return discounted_rewards.sum(-1)


def vanilla_policy_gradient(
    rewards: Sequence[float], log_probs: Union[Tensor, List[Tensor]], gamma: float = 0.95
):
    """Implementation of the REINFORCE algorithm.

    Adapted from https://medium.com/@thechrisyoon/deriving-policy-gradients-and-implementing-reinforce-f887949bd63

    Parameters
    ----------
    - episode_rewards : Sequence[float]

        The rewards at each step in an episode

    - episode_log_probs : List[Tensor]

        The log probabilities associated with the actions that were taken at
        each step.

    Returns
    -------
    Tensor
        The "vanilla policy gradient" / REINFORCE gradient resulting from
        that episode.
    """
    if isinstance(log_probs, Tensor):
        action_log_probs = log_probs
    else:
        action_log_probs = torch.stack(log_probs)
    reward_tensor = torch.as_tensor(rewards).type_as(action_log_probs)
    returns = PolicyHead.get_returns(reward_tensor, gamma=gamma)
    # Need both tensors to be 1-dimensional for the dot-product below.
    action_log_probs = action_log_probs.reshape(returns.shape)
    policy_gradient = -action_log_probs.dot(returns)
    return policy_gradient


# @torch.jit.script
# @lru_cache()
def make_gamma_matrix(gamma: float, T: int, device=None) -> Tensor:
    """
    Create an upper-triangular matrix [T, T] with the gamma factors,
    starting at 1.0 on the diagonal, and decreasing exponentially towards
    the right.
    """
    gamma_matrix = torch.empty([T, T]).triu_()
    # Neat indexing trick to fill up the upper triangle of the matrix:
    rows, cols = torch.triu_indices(T, T)
    # Precompute all the powers of gamma in range [0, T]
    all_gammas = gamma ** torch.arange(T)
    # Put the right value at each entry in the upper triangular matrix.
    gamma_matrix[rows, cols] = all_gammas[cols - rows]
    return gamma_matrix.to(device) if device else gamma_matrix


def normalize(x: Tensor):
    return (x - x.mean()) / (x.std() + 1e-9)


T = TypeVar("T")


def tuple_of_lists(list_of_tuples: List[Tuple[T, ...]]) -> Tuple[List[T], ...]:
    return tuple(map(list, zip(*list_of_tuples)))


def list_of_tuples(tuple_of_lists: Tuple[List[T], ...]) -> List[Tuple[T, ...]]:
    return list(zip(*tuple_of_lists))


================================================
FILE: sequoia/methods/models/output_heads/rl/policy_head_test.py
================================================
from functools import partial
from typing import Callable, Optional, Sequence

import gym
import numpy as np
import pytest
import torch
from gym import spaces
from gym.spaces.utils import flatdim
from gym.vector import SyncVectorEnv
from gym.vector.utils import batch_space
from torch import Tensor, nn

from sequoia.common.gym_wrappers import (
    AddDoneToObservation,
    ConvertToFromTensors,
    EnvDataset,
    PixelObservationWrapper,
)
from sequoia.common.loss import Loss
from sequoia.conftest import DummyEnvironment
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.settings.rl.continual.make_env import make_batched_env

from .policy_head import PolicyHead


class FakeEnvironment(SyncVectorEnv):
    def __init__(
        self,
        env_fn: Callable[[], gym.Env],
        batch_size: int,
        new_episode_length: Callable[[int], int],
        episode_lengths: Sequence[int] = None,
    ):
        super().__init__([env_fn for _ in range(batch_size)])
        self.new_episode_length = new_episode_length
        self.batch_size = batch_size
        self.episode_lengths = np.array(
            episode_lengths or [new_episode_length(i) for i in range(self.num_envs)]
        )
        self.steps_left_in_episode = self.episode_lengths.copy()

        reward_space = spaces.Box(*self.reward_range, shape=())
        self.single_reward_space = reward_space
        self.reward_space = batch_space(reward_space, batch_size)

    def step(self, actions):
        self.steps_left_in_episode[:] -= 1

        # obs, reward, done, info = super().step(actions)
        obs = self.observation_space.sample()
        reward = np.ones(self.batch_size)

        assert not any(self.steps_left_in_episode < 0)
        done = self.steps_left_in_episode == 0

        info = np.array([{} for _ in range(self.batch_size)])

        for env_index, env_done in enumerate(done):
            if env_done:
                next_episode_length = self.new_episode_length(env_index)
                self.episode_lengths[env_index] = next_episode_length
                self.steps_left_in_episode[env_index] = next_episode_length

        return obs, reward, done, info


@pytest.mark.parametrize("batch_size", [2, 5])
def test_with_controllable_episode_lengths(batch_size: int, monkeypatch):
    """TODO: Test out the PolicyHead in a very controlled environment, where we
    know exactly the lengths of each episode.
    """
    env = FakeEnvironment(
        partial(gym.make, "CartPole-v0"),
        batch_size=batch_size,
        episode_lengths=[5, *(10 for _ in range(batch_size - 1))],
        new_episode_length=lambda env_index: 10,
    )
    env = AddDoneToObservation(env)
    env = ConvertToFromTensors(env)
    env = EnvDataset(env)

    obs_space = env.single_observation_space
    x_dim = flatdim(obs_space["x"])
    # Create some dummy encoder.
    encoder = nn.Linear(x_dim, x_dim)
    representation_space = obs_space["x"]

    output_head = PolicyHead(
        input_space=representation_space,
        action_space=env.single_action_space,
        reward_space=env.single_reward_space,
        hparams=PolicyHead.HParams(
            max_episode_window_length=100,
            min_episodes_before_update=1,
            accumulate_losses_before_backward=False,
        ),
    )
    # TODO: Simulating as if the output head were attached to a BaseModel.
    PolicyHead.base_model_optimizer = torch.optim.Adam(output_head.parameters(), lr=1e-3)

    # Simplify the loss function so we know exactly what the loss should be at
    # each step.

    def mock_policy_gradient(
        rewards: Sequence[float], log_probs: Sequence[float], gamma: float = 0.95
    ) -> Optional[Loss]:
        log_probs = (log_probs - log_probs.clone()) + 1
        # Return the length of the episode, but with a "gradient" flowing back into log_probs.
        return len(rewards) * log_probs.mean()

    monkeypatch.setattr(output_head, "policy_gradient", mock_policy_gradient)

    batch_size = env.batch_size

    obs = env.reset()
    step_done = np.zeros(batch_size, dtype=np.bool)

    for step in range(200):
        x, obs_done = obs["x"], obs["done"]

        # The done from the obs should always be the same as the 'done' from the 'step' function.
        assert np.array_equal(obs_done, step_done)

        representations = encoder(x)
        observations = ContinualRLSetting.Observations(
            x=x,
            done=obs_done,
        )

        actions_obj = output_head(observations, representations)
        actions = actions_obj.y_pred

        # TODO: kinda useless to wrap a single tensor in an object..
        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=actions,
        )
        obs, rewards, step_done, info = env.step(actions)

        rewards_obj = ContinualRLSetting.Rewards(y=rewards)
        loss = output_head.get_loss(
            forward_pass=forward_pass,
            actions=actions_obj,
            rewards=rewards_obj,
        )
        print(f"Step {step}")
        print(f"num episodes since update: {output_head.num_episodes_since_update}")
        print(f"steps left in episode: {env.steps_left_in_episode}")
        print(f"Loss for that step: {loss}")

        if any(obs_done):
            assert loss != 0.0

        if step == 5.0:
            # Env 0 first episode from steps 0 -> 5
            assert loss.loss == 5.0
            assert loss.metrics["gradient_usage"].used_gradients == 5.0
            assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
        elif step == 10:
            # Envs[1:batch_size], first episode, from steps 0 -> 10
            # NOTE: At this point, both envs have reached the required number of episodes.
            # This means that the gradient usage on the next time any env reaches
            # an end-of-episode will be one less than the total number of items.
            assert loss.loss == 10.0 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].used_gradients == 10.0 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
        elif step == 15:
            # Env 0 second episode from steps 5 -> 15
            assert loss.loss == 10.0
            assert loss.metrics["gradient_usage"].used_gradients == 4
            assert loss.metrics["gradient_usage"].wasted_gradients == 6

        elif step == 20:
            # Envs[1:batch_size]: second episode, from steps 0 -> 10
            # NOTE: At this point, both envs have reached the required number of episodes.
            # This means that the gradient usage on the next time any env reaches
            # an end-of-episode will be one less than the total number of items.
            assert loss.loss == 10.0 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)

        elif step == 25:
            # Env 0 third episode from steps 5 -> 15
            assert loss.loss == 10.0
            assert loss.metrics["gradient_usage"].used_gradients == 4
            assert loss.metrics["gradient_usage"].wasted_gradients == 6

        elif step > 0 and step % 10 == 0:
            # Same pattern as step 20 above
            assert loss.loss == 10.0 * (batch_size - 1), step
            assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
            assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)

        elif step > 0 and step % 5 == 0:
            # Same pattern as step 25 above
            assert loss.loss == 10.0
            assert loss.metrics["gradient_usage"].used_gradients == 4
            assert loss.metrics["gradient_usage"].wasted_gradients == 6

        else:
            assert loss.loss == 0.0, step


@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_loss_is_nonzero_at_episode_end(batch_size: int):
    """Test that when stepping through the env, when the episode ends, a
    non-zero loss is returned by the output head.
    """
    with gym.make("CartPole-v0") as temp_env:
        temp_env = AddDoneToObservation(temp_env)
        obs_space = temp_env.observation_space
        action_space = temp_env.action_space
        reward_space = getattr(
            temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
        )

    env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
    env = AddDoneToObservation(env)
    env = ConvertToFromTensors(env)
    env = EnvDataset(env)

    head = PolicyHead(
        input_space=obs_space.x,
        action_space=action_space,
        reward_space=reward_space,
        hparams=PolicyHead.HParams(accumulate_losses_before_backward=False),
    )
    # TODO: Simulating as if the output head were attached to a BaseModel.
    PolicyHead.base_model_optimizer = torch.optim.Adam(head.parameters(), lr=1e-3)
    head.train()

    env.seed(123)
    obs = env.reset()

    # obs = torch.as_tensor(obs, dtype=torch.float32)

    done = torch.zeros(batch_size, dtype=bool)
    info = np.array([{} for _ in range(batch_size)])
    loss = None

    non_zero_losses = 0

    encoder = nn.Linear(4, 4)
    encoder.train()

    for i in range(100):
        representations = encoder(obs["x"])

        observations = ContinualRLSetting.Observations(
            x=obs["x"],
            done=done,
            # info=info,
        )
        head_output = head.forward(observations, representations=representations)
        actions = head_output.actions.numpy().tolist()
        # actions = np.zeros(batch_size, dtype=int).tolist()

        obs, rewards, done, info = env.step(actions)
        done = torch.as_tensor(done, dtype=bool)
        rewards = ContinualRLSetting.Rewards(rewards)
        assert len(info) == batch_size

        print(f"Step {i}, obs: {obs}, done: {done}, info: {info}")

        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=head_output,
        )
        loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
        print("loss:", loss)

        assert observations.done is not None
        for env_index, env_is_done in enumerate(observations.done):
            if env_is_done:
                print(f"Episode ended for env {env_index} at step {i}")
                assert loss.loss != 0.0
                non_zero_losses += 1
                break
        else:
            print(f"No episode ended on step {i}, expecting no loss.")
            assert loss is None or loss.loss == 0.0

    assert non_zero_losses > 0


@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_done_is_sometimes_True_when_iterating_through_env(batch_size: int):
    """Test that when *iterating* through the env, done is sometimes 'True'."""
    env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=True)
    env = AddDoneToObservation(env)
    env = ConvertToFromTensors(env)
    env = EnvDataset(env)
    for i, obs in zip(range(100), env):
        print(i, obs)
        _ = env.send(env.action_space.sample())
        if any(obs["done"]):
            break
    else:
        pytest.fail(reason="Never encountered done=True!")


@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_loss_is_nonzero_at_episode_end_iterate(batch_size: int):
    """Test that when *iterating* through the env (active-dataloader style),
    when the episode ends, a non-zero loss is returned by the output head.
    """
    with gym.make("CartPole-v0") as temp_env:
        temp_env = AddDoneToObservation(temp_env)

        obs_space = temp_env.observation_space
        action_space = temp_env.action_space
        reward_space = getattr(
            temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
        )

    env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
    env = AddDoneToObservation(env)
    env = ConvertToFromTensors(env)
    env = EnvDataset(env)

    head = PolicyHead(
        # observation_space=obs_space,
        input_space=obs_space["x"],
        action_space=action_space,
        reward_space=reward_space,
        hparams=PolicyHead.HParams(accumulate_losses_before_backward=False),
    )

    env.seed(123)
    non_zero_losses = 0

    for i, obs in zip(range(100), env):
        print(i, obs)
        x = obs["x"]
        done = obs["done"]
        representations = x
        assert isinstance(x, Tensor)
        assert isinstance(done, Tensor)
        observations = ContinualRLSetting.Observations(
            x=x,
            done=done,
            # info=info,
        )
        head_output = head.forward(observations, representations=representations)

        actions = head_output.actions.numpy().tolist()
        # actions = np.zeros(batch_size, dtype=int).tolist()

        rewards = env.send(actions)

        # print(f"Step {i}, obs: {obs}, done: {done}")
        assert isinstance(representations, Tensor)
        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=head_output,
        )
        rewards = ContinualRLSetting.Rewards(rewards)
        loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
        print("loss:", loss)

        for env_index, env_is_done in enumerate(observations.done):
            if env_is_done:
                print(f"Episode ended for env {env_index} at step {i}")
                assert loss.total_loss != 0.0
                non_zero_losses += 1
                break
        else:
            print(f"No episode ended on step {i}, expecting no loss.")
            assert loss.total_loss == 0.0

    assert non_zero_losses > 0


@pytest.mark.xfail(reason="TODO: Fix this test")
def test_buffers_are_stacked_correctly(monkeypatch):
    """TODO: Test that when "de-synced" episodes, when fed to the output head,
    get passed, re-stacked correctly, to the get_episode_loss function.
    """
    batch_size = 5

    starting_values = [i for i in range(batch_size)]
    targets = [10 for i in range(batch_size)]

    env = SyncVectorEnv(
        [
            partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
            for start, target in zip(starting_values, targets)
        ]
    )
    obs = env.reset()
    assert obs.tolist() == list(range(batch_size))

    reward_space = spaces.Box(*env.reward_range, shape=())
    output_head = PolicyHead(  # observation_space=spaces.Tuple([env.observation_space,
        #              spaces.Box(False, True, [batch_size], np.bool)]),
        input_space=spaces.Box(0, 1, (1,)),
        action_space=env.single_action_space,
        reward_space=reward_space,
    )
    # Set the max window length, for testing.
    output_head.hparams.max_episode_window_length = 100

    obs = env.reset()
    done = np.zeros(batch_size, dtype=bool)

    obs = torch.from_numpy(obs)
    done = torch.from_numpy(done)

    def mock_get_episode_loss(
        self: PolicyHead,
        env_index: int,
        inputs: Tensor,
        actions: ContinualRLSetting.Observations,
        rewards: ContinualRLSetting.Rewards,
        done: bool,
    ) -> Optional[Loss]:
        print(f"Environment at index {env_index}, episode ended: {done}")
        if done:
            print(f"Full episode: {inputs}")
        else:
            print(f"Episode so far: {inputs}")

        n_observations = len(inputs)

        assert inputs.flatten().tolist() == (env_index + np.arange(n_observations)).tolist()
        if done:
            # Unfortunately, we don't get the final state, because of how
            # VectorEnv works atm.
            assert inputs[-1] == targets[env_index] - 1

    monkeypatch.setattr(PolicyHead, "get_episode_loss", mock_get_episode_loss)

    # perform 10 iterations, incrementing each DummyEnvironment's counter at
    # each step (action of 1).
    # Therefore, at first, the counters should be [0, 1, 2, ... batch-size-1].
    info = [{} for _ in range(batch_size)]

    for step in range(10):
        print(f"Step {step}.")
        # Wrap up the obs to pretend that this is the data coming from a
        # ContinualRLSetting.
        observations = ContinualRLSetting.Observations(x=obs, done=done)  # , info=info)
        # We don't use an encoder for testing, so the representations is just x.
        representations = obs.reshape([batch_size, 1])
        assert observations.task_labels is None

        actions = output_head(observations.float(), representations.float())

        # Wrap things up to pretend like the output head is being used in the
        # BaseModel:

        forward_pass = ForwardPass(
            observations=observations,
            representations=representations,
            actions=actions,
        )

        action_np = actions.actions_np

        obs, rewards, done, info = env.step(action_np)

        obs = torch.from_numpy(obs)
        rewards = torch.from_numpy(rewards)
        done = torch.from_numpy(done)

        rewards = ContinualRLSetting.Rewards(y=rewards)
        _ = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)

        # Check the contents of the episode buffers.

        assert len(output_head.representations) == batch_size
        for env_index in range(batch_size):

            # obs_buffer = output_head.observations[env_index]
            representations_buffer = output_head.representations[env_index]
            action_buffer = output_head.actions[env_index]
            reward_buffer = output_head.rewards[env_index]

            if step >= batch_size:
                if step + env_index == targets[env_index]:
                    assert len(representations_buffer) == 1 and not output_head.done[env_index]
                # if env_index == step - batch_size:
                continue
            assert len(representations_buffer) == step + 1
            # Check to see that the last entry in the episode buffer for this
            # environment corresponds to the slice of the most recent
            # observations/actions/rewards at the index corresponding to this
            # environment.

            # observation_tuple = input_buffer[-1]
            step_action = action_buffer[-1]
            step_reward = reward_buffer[-1]
            # assert observation_tuple.x == observations.x[env_index]
            # assert observation_tuple.task_labels is None
            # assert observation_tuple.done == observations.done[env_index]

            # The last element in the buffer should be the slice in the batch
            # for that environment.
            assert step_action.y_pred == actions.y_pred[env_index]
            assert step_reward.y == rewards.y[env_index]

        if step < batch_size:
            assert obs.tolist() == (np.arange(batch_size) + step + 1).tolist()
        # if step >= batch_size:
        #     if step + env_index == targets[env_index]:
        #         assert done

    # assert False, (obs, rewards, done, info)
    # loss: Loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)


@pytest.mark.no_xvfb
def test_sanity_check_cartpole_done_vector():
    """TODO: Sanity check, make sure that cartpole has done=True at some point
    when using a BatchedEnv.
    """
    env = make_batched_env("CartPole-v0", batch_size=5, wrappers=[PixelObservationWrapper])
    env = AddDoneToObservation(env)
    obs = env.reset()

    for i in range(100):
        obs, rewards, done, info = env.step(env.action_space.sample())
        assert all(obs["done"] == done), i
        if any(done):
            break
    else:
        assert False, "Should have had at least one done=True, over the 100 steps!"


================================================
FILE: sequoia/methods/models/output_heads/rl/wasted_steps_calc.py
================================================
from typing import Callable, List

import numpy as np
import tqdm as tqdm


def get_fraction_of_observations_with_grad(
    n_envs: int,
    new_episode_length: Callable[[], int],
    n_updates: int = 10,
    min_episodes_before_update: int = 1,
):
    n_used_steps = 0
    n_wasted_steps = 0
    # min_episode_length = 0
    # max_episode_length = 10
    # n_envs = 10
    # new_episode_length = lambda: 10
    # The starting episode lengths for each env.
    # new_episode_length = lambda: 10
    # episode_lengths = [5, 10]
    # n_envs = 2
    episode_lengths = np.array([new_episode_length() for _ in range(n_envs)])
    steps_left_in_episode = episode_lengths.copy()
    num_finished_episodes = np.zeros(n_envs)

    for step in tqdm.tqdm(range(n_updates), leave=False):
        # print(f"Step {step}")
        steps_since_last_update = np.zeros(n_envs)
        finished_episodes_since_last_update = np.zeros(n_envs)

        # Loop over all the envs, until all of them have produced a loss (reached
        # the end of an episode).
        while not all(finished_episodes_since_last_update >= min_episodes_before_update):
            # print(f"Episode lengths: {episode_lengths}")
            # print(f"Steps left: {steps_left_in_episode}")
            # print(f"Completed episodes: {num_finished_episodes}")
            # print(f"Used steps: {n_used_steps}")
            # print(f"Wasted steps: {n_wasted_steps}")

            # print(steps_left_in_episode)
            for env in range(n_envs):
                if steps_left_in_episode[env] == 0:
                    # Perform the "backward()" for that env.
                    # This will use all steps since the last update (with grads).
                    used = steps_since_last_update[env]
                    n_used_steps += used
                    wasted = episode_lengths[env] - steps_since_last_update
                    # print(f"Step {step}, doing backward for env {env} using {used} steps.")
                    steps_since_last_update[env] = 0

                    finished_episodes_since_last_update[env] += 1
                    num_finished_episodes[env] += 1

                    # Sample the length of the next episode randomly.
                    length_of_next_episode = new_episode_length()
                    steps_left_in_episode[env] = length_of_next_episode
                else:
                    steps_left_in_episode[env] -= 1
                    steps_since_last_update[env] += 1

        # Perform the "optimizer step" for the model.
        # This 'wastes' all the prediction tensors (actions) in unfinished episodes
        # because it would detach them.
        wasted_per_env = steps_since_last_update
        n_wasted_steps += int(wasted_per_env.sum())
        # print(f"Updating model at step {step}, wasting {wasted_per_env} grads")
        # exit()
        # print(f"Ratio of used vs wasted so far: {n_used_steps}/{n_wasted_steps+n_used_steps}")
        # print(f"n episodes per env: {num_finished_episodes}")

    total_steps = n_used_steps + n_wasted_steps
    used_ratio = n_used_steps / total_steps
    wasted_ratio = n_wasted_steps / total_steps

    # print(f"Total steps: {total_steps}")
    # print(f"n_envs: {n_envs}")
    # print(f"n_updates: {n_updates}")
    # print(f"Used steps:   {n_used_steps} \t{used_ratio:.2%}")
    # print(f"Wasted steps: {n_wasted_steps} \t{wasted_ratio:.2%}")
    return n_used_steps, n_wasted_steps


if __name__ == "__main__":
    import matplotlib.pyplot as plt

    fig: plt.Figure
    axes: List[plt.Axes]
    n_updates_per_run: int = 20
    fig, axes = plt.subplots(1, 2)
    import textwrap

    # x: np.ndarray = np.random.randint(1, 32, size=100)
    x: np.ndarray = np.arange(63, dtype=int) + 1

    min_episodes_before_update = 3
    # min_episodes_before_updates = [1, 3, 5]

    min_episode_length: int = 5
    max_episode_length: int = 100
    episode_len_dist = f"U[{min_episode_length},{max_episode_length}]"

    # Normally distributed episode lengths:
    # episode_length_mean = (max_episode_length + min_episode_length) / 2
    episode_length_mean = 50
    # episode_length_std = np.sqrt(max_episode_length - episode_length_mean)
    # episode_len_dist = f"N({episode_length_mean:.1f}, {episode_length_std:.1f})"
    episode_length_stds = [1.0, 3.0, 5.0, 10.0]
    episode_len_dist = f"N({episode_length_mean:.1f}, {episode_length_stds})"

    s = "s" if min_episodes_before_update > 1 else ""
    fig.suptitle(
        textwrap.dedent(
            f"""\
        Episode length ~ {episode_len_dist},
        Updating model when all envs have finished at least {min_episodes_before_update} episode{s},
        {n_updates_per_run} total updates per run.
        """
        )
    )

    # for min_episodes_before_update in min_episodes_before_updates:
    for episode_length_std in episode_length_stds:
        label = f"episode_length_std={episode_length_std:.1f}"
        # label = f"min_episodes_before_update={min_episodes_before_update}"

        # new_episode_length = lambda: np.random.randint(min_episode_length, max_episode_length)
        new_episode_length = lambda: int(np.random.normal(episode_length_mean, episode_length_std))

        # x.sort()
        used_ = []
        wasted_ = []

        for n_envs in tqdm.tqdm(x, desc="n_envs"):
            used, wasted = get_fraction_of_observations_with_grad(
                n_envs=n_envs,
                new_episode_length=new_episode_length,
                min_episodes_before_update=min_episodes_before_update,
                n_updates=n_updates_per_run,
            )
            used_.append(used)
            wasted_.append(wasted)

        y_used = np.array(used_)
        y_wasted = np.array(wasted_)

        used_ratio = y_used / (y_used + y_wasted)
        wasted_ratio = 1 - used_ratio

        axes[0].set_title(f"Percentage of used vs 'wasted' gradients w.r.t. batch size")
        axes[0].scatter(x, used_ratio, label=label)
        axes[0].set_ylim(0.0, 1.0)

        used_per_env = y_used / x / n_updates_per_run
        axes[1].scatter(x, used_per_env)

    fig.legend()
    # xs, ys = x, used_ratio
    # # zip joins x and y coordinates in pairs
    # for x_i, y_i in zip(xs, ys):
    #     label = f"({int(x_i)}, {y_i:.2f})"
    #     axes[0].annotate(label, # this is the text
    #                 (x_i, y_i), # this is the point to label
    #                 textcoords="offset points", # how to position the text
    #                 xytext=(0,10), # distance from text to points (x,y)
    #                 ha='center') # horizontal alignment can be left, right or center

    axes[0].set_ylabel("% of used gradients")
    axes[0].set_xlabel("batch size (number of environments)")

    axes[1].set_title(f"''Data efficiency'': Average number of used steps per update per env")

    axes[1].set_xlabel(f"# of environments")
    axes[1].set_ylabel(f"# of used steps per env")

    plt.show()


================================================
FILE: sequoia/methods/models/output_heads.puml
================================================
@startuml output_heads

package output_heads {
    package output_head {
        abstract class OutputHead {
            + hparams: OutputHead.HParams
            {abstract} + forward(observations: Observations representations: Tensor): Actions
            {abstract} + get_loss(ForwardPass, Actions, Rewards) -> Loss
        }
        abstract class OutputHead.HParams {
            + {static} available_activations: ClassVar[Dict[str, Type[nn.Module]]]
            + hidden_layers: int
            + hidden_neurons: List[int]
            + activation: Type[nn.Module] = "tanh"
        }
    }

    package classification {
        class ClassificationHead implements OutputHead {
            + forward(Observations representations: Tensor): ClassificationHeadOutput
            + get_loss(ForwardPass, ClassificationOutput, Rewards): Loss
        }
        class ClassificationHead.HParams extends OutputHead.HParams {}
        class ClassificationHeadOutput extends settings.base.Actions {
            + y_pred: Tensor
            + logits: Tensor
        }

    }

    package regression {
        class RegressionHead implements OutputHead {}
    }

    package rl {
        package policy_head {
            class PolicyHead extends ClassificationHead {
                + forward(observations: Observations representations: Tensor): PolicyHeadOutput
                + hparams: PolicyHead.HParams
            }
            class PolicyHead.HParams extends ClassificationHead.HParams {
                + forward(observations: Observations representations: Tensor): PolicyHeadOutput
            }
            class PolicyHeadOutput extends ClassificationHeadOutput {
                action_dist: Distribution
            }
        }
        package episodic_a2c {
            class EpisodicA2C extends PolicyHead {
                + actor: nn.Module
                + critic: nn.Module
                + get_episode_loss(Observations, Actions, Rewards, done: bool): Loss
            }
            class EpisodicA2C.HParams extends PolicyHead.HParams {
                + normalize_advantages: bool = False
                + actor_loss_coef: float = 0.5
                + critic_loss_coef: float = 0.5
                + entropy_loss_coef: float = 0.1
                + max_policy_grad_norm: Optional[float] = None
                + gamma: float = 0.99
                + learning_rate: float = 1e-2
            }
            class A2CHeadOutput extends PolicyHeadOutput {
                + value: Tensor
            }
        }
        package actor_critic_head {
            class ActorCriticHead extends ClassificationHead {
                + hparams: ActorCriticHead.HParams
                + actor: nn.Module
                + critic: nn.Module 
            }
            class ActorCriticHead.HParams extends ClassificationHead.HParams {
                + gamma: float = 0.95
                + learning_rate: float = 1e-3
            }
        }
    }

' OutputHead *-- OutputHead.HParams
' ClassificationHead *-- ClassificationHead.HParams
' PolicyHead *-- PolicyHead.HParams
' ActorCriticHead *-- ActorCriticHead.HParams
' EpisodicA2C *-- EpisodicA2C.HParams

' OutputHead *-- Actions : outputs
' ClassificationHead *-- ClassificationHeadOutput : outputs
' PolicyHead *-- PolicyHeadOutput : outputs
' EpisodicA2C *-- A2CHeadOutput : outputs
}

@enduml

================================================
FILE: sequoia/methods/models/simple_convnet.py
================================================
from torch import Tensor, nn


class SimpleConvNet(nn.Module):
    def __init__(self, in_channels: int = 3, n_classes: int = 10):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(6),
            nn.ReLU(inplace=True),
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.AdaptiveAvgPool2d(output_size=(8, 8)),  # [16, 8, 8]
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False),  # [32, 6, 6]
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False),  # [32, 4, 4]
            nn.BatchNorm2d(32),
            nn.Flatten(),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 120),  # NOTE: This '512' is what gets used as the
            # hidden size of the encoder.
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, n_classes),
        )

    def forward(self, x: Tensor) -> Tensor:
        return self.fc(self.features(x))


================================================
FILE: sequoia/methods/models.puml
================================================
@startuml models
package models {
    class ForwardPass extends Batch {
        + observations: Observations
        + representations: Tensor
        + actions: Actions
    }
    ' TODO: Idk why, but this doesn't work if placed inside the 'models' package
    ' above.
    !include ./models/output_heads.puml
    !include ./models/base_model.puml
}
@enduml


================================================
FILE: sequoia/methods/packnet_method.py
================================================
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Type, Union

import torch
from pytorch_lightning import Callback, LightningModule, Trainer
from pytorch_lightning.callbacks import EarlyStopping
from simple_parsing.helpers import mutable_field
from simple_parsing.helpers.hparams import HyperParameters, uniform
from torch import Tensor, nn

from sequoia.common.config import Config
from sequoia.methods.base_method import BaseMethod, BaseModel
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Setting
from sequoia.settings.assumptions import IncrementalAssumption as IncrementalSetting
from sequoia.settings.sl import IncrementalSLSetting, TaskIncrementalSLSetting


class PackNet(Callback, nn.Module):
    """PyTorch-Lightning Callback that implements the PackNet algorithm for CL.

    TODO: Add a citation for the PackNet paper.
    """

    @dataclass
    class HParams(HyperParameters):
        """Hyper-parameters of the Packnet callback."""

        prune_instructions: Union[float, List[float]] = uniform(0.1, 0.9, default=0.5)

        train_epochs: int = uniform(1, 5, default=1)
        fine_tune_epochs: int = uniform(0, 5, default=1)

    def __init__(
        self,
        n_tasks: int,
        hparams: Optional["PackNet.HParams"] = None,
        prunable_types: Sequence[Type[nn.Module]] = (nn.Conv2d, nn.Linear),
        ignore_modules: Sequence[str] = None,
        ignore_parameters: Sequence[str] = ("bias",),
    ):
        """Create the PackNet callback.

        Parameters
        ----------
        n_tasks : int
            Number of tasks.
        hparams : PackNet.HParams
            Configuration options (hyper-parameters) of the PackNet algorithm.
        prunable_types : Sequence[Type[nn.Module]], optional
            The types of nn.Modules to consider for pruning. By default, only consideres
            layers of types `nn.Conv2d` and `nn.Linear`.
        ignore_modules : Sequence[str], optional
            List of flags for module names that should be ignored by PackNet.
            When one of these values is found within the name of a module, it is
            ignored. Doesn't ignore any modules by default.
        parameters_to_ignore : List[str], optional
            List of flags for parameter names that should be ignored by PackNet.
            When one of these values is found within the name of a parameter, it is
            ignored. Defaults to ["bias"].
        """
        super().__init__()
        hparams = hparams or self.HParams()
        self.n_tasks = n_tasks
        self.prune_instructions = hparams.prune_instructions
        self.prunable_types = prunable_types or [nn.Conv2d, nn.Linear]
        self.ignore_modules = list(ignore_modules or [])
        self.ignore_parameters = list(ignore_parameters or [])
        # Set up an array of quantiles for pruning procedure
        if n_tasks:
            self.config_instructions()

        self.PATH = None
        self.epoch_split = (hparams.train_epochs, hparams.fine_tune_epochs)
        self.current_task = 0
        # 3-dimensions: task, layer, parameter mask
        self.masks: List[Dict[str, Tensor]] = []
        self.mode: str = None
        self.params_dict: dict = None

    def filtered_parameter_iterator(self, module: nn.Module) -> Iterable[Tuple[str, nn.Parameter]]:
        """Iterator that, given a module, yields tuples with the full name of the
        parameters that will be modified by the PackNet callback, as well as the
        parameters themselves.

        This is used to remove a bit of boilerplate code in the for loops below.

        Parameters
        ----------
        module : nn.Module
            The module to iterate over.

        Returns
        -------
        Iterable[Tuple[str, nn.Parameter]]
            An Iterator of tuples containing parameter names ('{mod_name}.{param_name}')
            and parameters.
        """
        for mod_name, mod in module.named_modules():
            if not isinstance(mod, self.prunable_types):
                continue
            if any(ignored in mod_name for ignored in self.ignore_modules):
                continue
            for param_name, param in mod.named_parameters():
                if any(ignored in param_name for ignored in self.ignore_parameters):
                    continue

                param_full_name = f"{mod_name}.{param_name}"
                yield param_full_name, param

    @torch.no_grad()
    def prune(self, model: nn.Module, prune_quantile: float) -> Dict[str, Tensor]:
        """Create task-specific mask and prune least relevant weights

        [extended_summary]

        Parameters
        ----------
        model : nn.Module
            The model to be pruned.
        prune_quantile : float
            The percentage of weights to prune as a decimal.

        Returns
        -------
        Dict[str, Tensor]
            The masks to use to prune the layers of the given model.
        """
        # Calculate Quantile
        all_prunable_tensors: List[Tensor] = []

        for param_full_name, param_layer in self.filtered_parameter_iterator(model):
            # get fixed weights for this layer (on the same device)
            prev_mask = torch.zeros_like(param_layer, dtype=torch.bool)

            for task_masks in self.masks:
                if param_full_name in task_masks:
                    prev_mask |= task_masks[param_full_name]

            p = param_layer.masked_select(~prev_mask)

            if p is not None:
                all_prunable_tensors.append(p)

        all_parameters_tensor = torch.cat(all_prunable_tensors, -1)
        cutoff = torch.quantile(torch.abs(all_parameters_tensor), q=prune_quantile)

        masks = {}  # create mask for this task
        for param_full_name, param_layer in self.filtered_parameter_iterator(model):
            # get weight mask for this layer
            # p
            prev_mask = torch.zeros_like(param_layer, dtype=torch.bool)

            for task_masks in self.masks:
                # TODO: check for bug here
                # if param_full_name in task_masks:
                prev_mask |= task_masks[param_full_name]

            curr_mask = torch.abs(param_layer).ge(cutoff)  # q
            curr_mask &= ~prev_mask  # (q & ~p)

            # Zero non masked weights
            param_layer *= curr_mask | prev_mask

            masks[param_full_name] = curr_mask

        return masks

    def fine_tune_mask(self, model: nn.Module):
        """
        Zero the gradient of pruned weights this task as well as previously fixed weights
        Apply this mask before each optimizer step during fine-tuning
        """
        assert len(self.masks) > self.current_task
        for param_full_name, param in self.filtered_parameter_iterator(model):
            param.grad *= self.masks[self.current_task][param_full_name]

    def training_mask(self, model: nn.Module):
        """
        Zero the gradient of only fixed weights for previous tasks
        Apply this mask after .backward() and before
        optimizer.step() at every batch of training a new task
        """
        if len(self.masks) == 0:
            return

        for param_full_name, param in self.filtered_parameter_iterator(model):
            # get mask of weights from previous tasks
            prev_mask = torch.zeros_like(param, dtype=torch.bool)

            for task_masks in self.masks:
                # FIXME: Get the mask if it exists, otherwise set one and move on.
                # if param_full_name not in task_masks:
                #     task_masks[param_full_name] = torch.zeros_like(param, dtype=torch.bool)
                prev_mask |= task_masks[param_full_name]

            # zero grad of previous fixed weights
            # param.grad[prev_mask] = 0. # (NOTE: Equivalent)
            param.grad *= ~prev_mask

    def fix_biases(self, model: nn.Module):
        """
        Fix the gradient of prunable bias parameters
        """
        for mod_name, mod in model.named_modules():
            if not isinstance(mod, self.prunable_types):
                continue
            if any(ignore in mod_name for ignore in self.ignore_modules):
                continue
            for name, param_layer in mod.named_parameters():
                if "bias" in name:
                    param_layer.requires_grad = False

    def fix_batch_norm(self, model: nn.Module):
        """
        Fix batch norm gain, bias, running mean and variance
        """
        for mod_name, mod in model.named_modules():
            if isinstance(mod, nn.BatchNorm2d):
                mod.affine = False
                for param_layer in mod.parameters():
                    param_layer.requires_grad = False

    def set_params_dict(self, model: nn.Module):
        """
        Set a dictionary containing all prunable parameters
        useful for fixing all layers, but may be wasted memory
        """
        # TODO: This dict actually doesn't copy the parameters, it saves references.
        self.params_dict = dict()
        for param_full_name, param in self.filtered_parameter_iterator(model):
            self.params_dict[param_full_name] = param

    def fix_all_layers(self, model: nn.Module):
        """
        Fix grad of all parameters outside of params_dict
        """
        self.set_params_dict(model)  # Not necessary for fixed model

        # Fix grad of all non-prunable layers in this
        for mod_name, mod in model.named_modules():
            for param_name, param_layer in mod.named_parameters():
                key = f"{mod_name}.{param_name}"
                if key not in self.params_dict:
                    param_layer.requires_grad = False

    @torch.no_grad()
    def apply_eval_mask(self, model: nn.Module, task_idx: int):
        """
        Revert to final trained network state and apply mask for given task
        :param model: the model to apply the eval mask to
        :param task_idx: the task id to be evaluated (0 - > n_tasks)
        """

        assert len(self.masks) > task_idx
        for param_full_name, param in self.filtered_parameter_iterator(model):
            # get indices of all weights from previous masks
            prev_mask = torch.zeros_like(param, dtype=torch.bool)
            for task_id in range(0, task_idx + 1):
                prev_mask |= self.masks[task_id][param_full_name]

            # zero out all weights that are not in the mask for this task
            # param[prev_mask] = 0. (NOTE: Equivalent)
            param *= prev_mask

    def mask_remaining_params(self, model: nn.Module) -> Dict[str, Tensor]:
        """
        Create mask for remaining parameters
        """
        masks = {}
        for param_full_name, param in self.filtered_parameter_iterator(model):
            # Get mask of all weights assigned to previous tasks
            prev_mask = torch.zeros_like(param, dtype=torch.bool)
            for task_masks in self.masks:
                prev_mask |= task_masks[param_full_name]
            # Create mask of remaining parameters
            layer_mask = ~prev_mask
            masks[param_full_name] = layer_mask
        return masks
        # self.masks.append(mask)

    def total_epochs(self) -> int:
        return self.epoch_split[0] + self.epoch_split[1]

    def config_instructions(self):
        """
        Create pruning instructions for this task split
        :return: None
        """
        assert self.n_tasks is not None

        if not isinstance(self.prune_instructions, list):  # if a float is passed in
            assert 0 < self.prune_instructions < 1
            self.prune_instructions = [self.prune_instructions] * (self.n_tasks - 1)
        assert (
            len(self.prune_instructions) == self.n_tasks - 1
        ), "Must give prune instructions for every task"

    def save_final_state(self, model, PATH="model_weights.pth"):
        """
        Save the final weights of the model after training
        :param model: pl_module
        :param PATH: The path to weights file
        """
        self.PATH = PATH
        torch.save(model.state_dict(), PATH)

    def load_final_state(self, model):
        """
        Load the final state of the model
        """
        device = model.device
        model.load_state_dict(torch.load(self.PATH))
        model = model.to(device)

    def on_init_end(self, trainer: Trainer):
        self.mode = "train"

    def on_after_backward(self, trainer: Trainer, pl_module: LightningModule):
        if self.mode == "train":
            self.training_mask(pl_module)

        elif self.mode == "fine_tune":
            self.fine_tune_mask(pl_module)

    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, *args, **kwargs):
        super().on_train_epoch_end(trainer, pl_module)
        if pl_module.current_epoch == self.epoch_split[0] - 1:  # Train epochs completed
            self.mode = "fine_tune"
            new_masks: Dict[str, Tensor]
            if self.current_task == self.n_tasks - 1:
                new_masks = self.mask_remaining_params(pl_module)
            else:
                new_masks = self.prune(
                    model=pl_module,
                    prune_quantile=self.prune_instructions[self.current_task],
                )
            self.masks.append(new_masks)

    def on_fit_end(self, trainer: Trainer, pl_module: LightningModule):
        self.fix_biases(pl_module)  # Fix biases after first task
        self.fix_batch_norm(pl_module)  # Fix batch norm mean, var, and params

        # TODO: This may cause issues with output heads
        # self.fix_all_layers(pl_module)  # Fix all other layers -> may not be necessary?

        self.save_final_state(pl_module)
        self.mode = "train"


# TODO: Reset this to IncrementalAssumption after the fixes are made to BaseMethod in RL.
@dataclass
class PackNetMethod(BaseMethod, target_setting=IncrementalSLSetting):
    # NOTE: these two fields are also used to create the command-line arguments.
    # HyperParameters of the method.
    hparams: BaseModel.HParams = mutable_field(BaseModel.HParams)
    # Configuration options.
    config: Config = mutable_field(Config)
    # Options for the Trainer object.
    trainer_options: TrainerConfig = mutable_field(TrainerConfig)
    # Hyper-Parameters of the PackNet callback
    packnet_hparams: PackNet.HParams = mutable_field(PackNet.HParams)

    def __init__(
        self,
        hparams: BaseModel.HParams = None,
        config: Config = None,
        trainer_options: TrainerConfig = None,
        packnet_hparams: PackNet.HParams = None,
        **kwargs,
    ):
        super().__init__(hparams=hparams, config=config, trainer_options=trainer_options)
        self.packnet_hparams = packnet_hparams or PackNet.HParams()
        self.p_net: PackNet  # This gets set in configure

    def configure(self, setting: Setting):
        # NOTE: super().configure creates the Trainer and calls `configure_callbacks()`,
        # so we have to create `self.p_net` before calling `super().configure`.

        # Ignore all the modules that are task-specific when the setting gives task ids:
        # NOTE: Always ignore the `output_heads` dict, as it contains output heads for
        # each task.
        # NOTE: `model.output_heads[<current_task>]` is the same as `model.output_head`.
        ignored_modules: List[str] = ["output_heads"]
        if setting.task_labels_at_test_time:
            # Also ignore the main output_head.
            ignored_modules.append("output_head")

        self.p_net = PackNet(
            n_tasks=setting.nb_tasks,
            hparams=self.packnet_hparams,
            ignore_modules=ignored_modules,
        )

        self.p_net.current_task = -1
        self.p_net.config_instructions()
        super().configure(setting)

    def fit(self, train_env, valid_env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching between tasks.

        Args:
            task_id (int, optional): the id of the new task. When None, we are
            basically being informed that there is a task boundary, but without
            knowing what task we're switching to.
        """
        super().on_task_switch(task_id=task_id)
        if task_id is not None and len(self.p_net.masks) > task_id:
            self.p_net.load_final_state(model=self.model)
            self.p_net.apply_eval_mask(task_idx=task_id, model=self.model)
        self.p_net.current_task = task_id

    def configure_callbacks(self, setting: TaskIncrementalSLSetting = None) -> List[Callback]:
        """Create the PyTorch-Lightning Callbacks for this Setting.

        These callbacks will get added to the Trainer in `create_trainer`.

        Parameters
        ----------
        setting : SettingType
            The `Setting` on which this Method is going to be applied.

        Returns
        -------
        List[Callback]
            A List of `Callback` objects to use during training.
        """
        callbacks = super().configure_callbacks(setting=setting)
        assert self.p_net not in callbacks

        for i in range(len(callbacks)):
            if isinstance(callbacks[i], EarlyStopping):
                callbacks.pop(i)
        print(callbacks)
        if not setting.stationary_context:
            callbacks.append(self.p_net)
        return callbacks

    def create_trainer(self, setting) -> Trainer:
        """Creates a Trainer object from pytorch-lightning for the given setting.
        Returns:
            Trainer: the Trainer object.
        """
        self.trainer_options.max_epochs = (
            self.packnet_hparams.train_epochs + self.packnet_hparams.fine_tune_epochs
        )

        return super().create_trainer(setting)

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        self.hparams = self.hparams.replace(**new_hparams)
        self.packnet_hparams = self.packnet_hparams.replace(**new_hparams["packnet_hparams"])

    def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        hparam_priors: Dict = super().get_search_space(setting=setting)
        hparam_priors["packnet_hparams"] = self.packnet_hparams.get_orion_space_dict()
        return hparam_priors


================================================
FILE: sequoia/methods/packnet_method_test.py
================================================
from typing import ClassVar, Type

from sequoia.methods.base_method_test import TestBaseMethod as BaseMethodTests
from sequoia.methods.packnet_method import PackNetMethod


class TestPackNetMethod(BaseMethodTests):
    Method: ClassVar[Type[PackNetMethod]] = PackNetMethod

    def validate_results(self, setting, method, results):
        """Called at the end of each test run to check that the results make sense for
        the given setting and method.
        """
        super().validate_results(setting, method, results)
        # TODO: Add checks to make sure that the packnet callback's state makes sense
        # for the given setting.


================================================
FILE: sequoia/methods/pl_bolts_methods/__init__.py
================================================
""" TODO: Add some of the pytorch lightning bolts models and such as Methods
targetting the IID Setting.

TODO: Also figure out a way to consider LightningDataModules that aren't Settings
as 'IID' settings, so we can get all the methods and models and datamodules
from pl_bolts for free. 
"""


================================================
FILE: sequoia/methods/pl_dqn.py
================================================
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Deep Reinforcement Learning: Deep Q-network (DQN)

The template illustrates using Lightning for Reinforcement Learning. The example builds a basic DQN using the
classic CartPole environment.

To run the template, just run:
`python template/methods/rl/dqn_pl.py`

After ~1500 steps, you will see the total_reward hitting the max score of 475+.
Open up TensorBoard to see the metrics:

`tensorboard --logdir default`

References
----------

[1] https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On-
Second-Edition/blob/master/Chapter06/02_dqn_pong.py
"""
import dataclasses
from collections import defaultdict, deque
from dataclasses import dataclass
from typing import (
    Any,
    Callable,
    Container,
    Deque,
    Generic,
    Iterator,
    List,
    Optional,
    Sequence,
    SupportsFloat,
    SupportsInt,
    Tuple,
    Type,
    TypeVar,
    Union,
)

import gym
import numpy as np
import pytorch_lightning as pl
import simple_parsing
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
from gym.spaces import Discrete
from sequoia.common.spaces.typed_dict import TypedDictSpace
from simple_parsing import ArgumentParser, Serializable
from torch import Tensor
from torch.nn import functional as F
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader
from torch.utils.data.dataset import IterableDataset


class DQN(nn.Module):
    """Simple MLP network."""

    def __init__(self, obs_size: int, n_actions: int, hidden_size: int = 128):
        """
        Args:
            obs_size: observation/state size of the environment
            n_actions: number of discrete actions available in the environment
            hidden_size: size of hidden layers
        """
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(obs_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, n_actions),
        )

    def forward(self, x: Tensor) -> Tensor:
        return self.net(torch.as_tensor(x, dtype=torch.float32))


T = TypeVar("T", np.ndarray, Tensor)
V = TypeVar("V", np.ndarray, Tensor)


@dataclass
class Experience(Generic[T]):
    """Experience for one step."""

    state: T
    action: SupportsInt
    reward: SupportsFloat
    done: bool
    new_state: T


@dataclass
class ExperienceBatch(Generic[T]):
    """Experience for more than one step.

    Note: neighbouring indices can be independant, i.e. this isn't a sequence of actions in an env.
    """

    states: T
    actions: T
    rewards: T
    dones: T
    new_states: T

    def __len__(self) -> int:
        return len(self.dones)

    def __getitem__(self, index: Union[int, slice]) -> Union[Experience[T], "ExperienceBatch[T]"]:
        if isinstance(index, int):
            return Experience(  # type: ignore
                state=self.states[index],
                action=self.actions[index],
                reward=self.rewards[index],
                done=bool(self.dones[index]),
                new_state=self.new_states[index],
            )
        return ExperienceBatch(
            states=self.states[index],
            actions=self.actions[index],
            rewards=self.rewards[index],
            dones=self.dones[index],
            new_states=self.new_states[index],
        )

    @classmethod
    def stack(cls, items: Sequence["Experience[T]"]) -> "ExperienceBatch[T]":
        field_names = set(f.name for item in items for f in dataclasses.fields(item))
        field_values = defaultdict(list)
        for item in items:
            for field_name in field_names:
                f_value = getattr(item, field_name)
                field_values[field_name].append(f_value)
        stack_fn = np.stack if isinstance(items[0].state, np.ndarray) else torch.stack
        return cls(  # type: ignore
            **{f_name + "s": stack_fn(f_values) for f_name, f_values in field_values.items()}
            # states=np.concatenate(states),
            # actions=np.concatenate(actions),
            # rewards=np.concatenate(rewards, dtype=np.float32),
            # dones=np.concatenate(dones, dtype=bool),
            # new_states=np.concatenate(next_states),
        )

    def _map(self, fn: Callable[[T], V]) -> "ExperienceBatch[V]":
        return type(self)(  # type: ignore
            **{f.name: fn(getattr(self, f.name)) for f in dataclasses.fields(self)}
        )

    def numpy(self) -> "ExperienceBatch[np.ndarray]":
        def _numpy(v) -> np.ndarray:
            return v.detach().cpu().numpy() if isinstance(v, Tensor) else np.array(v)

        return self._map(_numpy)

    def to(self, device: torch.device = None, **kwargs) -> "ExperienceBatch[Tensor]":
        return self._map(lambda v: torch.as_tensor(v, device=device, **kwargs))


E = TypeVar("E", bound=Experience)


class ReplayBuffer(Generic[T]):
    """Replay Buffer for storing past experiences allowing the agent to learn from them.

    >>> buffer = ReplayBuffer(5)
    """

    def __init__(self, capacity: int) -> None:
        """
        Args:
            capacity: size of the buffer
        """
        self.buffer: Deque[Experience[T]] = deque(maxlen=capacity)

    def __len__(self) -> int:
        return len(self.buffer)

    def append(self, experience: Experience[T]) -> None:
        """Add experience to the buffer.

        Args:
            experience: tuple (state, action, reward, done, new_state)
        """
        self.buffer.append(experience)

    def sample(
        self,
        batch_size: int,
    ) -> ExperienceBatch[T]:
        indices = np.random.choice(len(self.buffer), batch_size, replace=False)
        samples: List[Experience[T]] = [self.buffer[idx] for idx in indices]
        return ExperienceBatch.stack(samples)


class RLDataset(IterableDataset[ExperienceBatch[T]]):
    """Iterable Dataset containing the buffer which will be updated with new experiences during
    training.

    >>> dataset = RLDataset(ReplayBuffer(5))
    """

    def __init__(self, buffer: ReplayBuffer, sample_size: int = 200) -> None:
        """
        Args:
            buffer: replay buffer
            sample_size: number of experiences to sample at a time
        """
        self.buffer = buffer
        self.sample_size = sample_size

    def __iter__(self) -> Iterator[Experience[T]]:
        sampled_experience_batch = self.buffer.sample(self.sample_size)
        for sampled_experience in sampled_experience_batch:
            assert isinstance(sampled_experience, Experience), sampled_experience
            yield sampled_experience


class Agent:
    """Base Agent class handling the interaction with the environment.

    ```python
    env = gym.make("CartPole-v1")
    buffer = ReplayBuffer(10)
    agent = Agent(env, buffer)
    ```
    """

    def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None:
        """
        Args:
            env: training environment
            replay_buffer: replay buffer storing experiences
        """
        self.env = env
        self.replay_buffer = replay_buffer
        self.reset()
        self.state = self.env.reset()

    def reset(self) -> None:
        """Resets the environment and updates the state."""
        self.state = self.env.reset()

    def get_action(self, state: Tensor, net: nn.Module, epsilon: float) -> int:
        """Using the given network, decide what action to carry out using an epsilon-greedy policy.

        Args:
            net: DQN network
            epsilon: value to determine likelihood of taking a random action
            device: current device

        Returns:
            action
        """
        if np.random.random() < epsilon:
            action = self.env.action_space.sample()
        else:
            q_values = net(state)
            _, action = torch.max(q_values, dim=-1)
            # TODO: Adapt this for batched actions.
            action = int(action.item())

        return action

    @torch.no_grad()
    def play_step(
        self,
        net: nn.Module,
        epsilon: float = 0.0,
        device: Union[str, torch.device] = "cpu",
    ) -> Tuple[float, bool]:
        """Carries out a single interaction step between the agent and the environment.

        Args:
            net: DQN network
            epsilon: value to determine likelihood of taking a random action
            device: current device

        Returns:
            reward, done
        """
        state = torch.as_tensor([self.state], device=torch.device(device))

        action = self.get_action(state=state, net=net, epsilon=epsilon)

        # do step in the environment
        new_state, reward, done, _ = self.env.step(action)

        exp = Experience(
            state=self.state,
            action=action,
            reward=reward,
            done=done,
            new_state=new_state,
        )

        self.replay_buffer.append(exp)

        self.state = new_state
        if done:
            self.state = self.env.reset()
        return reward, done


class DQNLightning(pl.LightningModule):
    """Basic DQN Model.

    ```python
    DQNLightning(env="CartPole-v1")
    ```
    """

    @dataclass
    class HParams(Serializable):
        # Size of the batches.
        batch_size: int = 16

        # learning rate.
        lr: float = 1e-2

        # Discount factor.
        gamma: float = 0.99

        # Interval at which we update the target network.
        sync_rate: int = 10

        # Capacity of the replay buffer.
        replay_size: int = 1000

        # How many samples do we use to fill our buffer at the start of training.
        warm_start_steps: int = 1000

        # The frame at which epsilon should stop decaying.
        eps_last_frame: int = 1000

        # Starting value of epsilon.
        eps_start: float = 1.0

        # Final value of epsilon
        eps_end: float = 0.01

        # Max length of an episode.
        episode_length: int = 200

    def __init__(self, env: Union[str, gym.Env[np.ndarray, int]], hp: HParams = None) -> None:
        super().__init__()
        self.hp = hp or self.HParams()
        self.save_hyperparameters({"hp": self.hp.to_dict()})

        self.env = gym.make(env) if isinstance(env, str) else env
        from gym.spaces import Box, Discrete

        self.episode_length: Optional[int] = get_max_episode_length(self.env)

        if not isinstance(self.env.observation_space, Box):
            raise RuntimeError(
                f"Only works on envs with Box observation space, not {self.env.observation_space}."
            )
        if not isinstance(self.env.action_space, Discrete):
            raise RuntimeError(
                f"Only works on envs with Discrete action space, not {self.env.action_space}."
            )

        from gym.spaces.utils import flatdim

        # TODO: Adapt this to also work with image observations.
        obs_size = flatdim(self.env.observation_space)
        n_actions = self.env.action_space.n

        self.net = DQN(obs_size, n_actions)
        self.target_net = DQN(obs_size, n_actions)

        self.buffer = ReplayBuffer(self.hp.replay_size)
        self.agent = Agent(self.env, self.buffer)
        self.total_reward = 0
        self.episode_reward = 0
        self.trainer: Optional[pl.Trainer]
        self.populate(self.hp.warm_start_steps)

    def populate(self, steps: int = 1000) -> None:
        """Carries out several random steps through the environment to initially fill up the replay buffer with
        experiences.

        Args:
            steps: number of random steps to populate the buffer with
        """
        for i in range(steps):
            try:
                self.agent.play_step(self.net, epsilon=1.0)
            except gym.error.ClosedEnvironmentError as err:
                print(f"Unable to add more data to the buffer: env closed after {i} steps.")
                break

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Passes in a state `x` through the network and gets the `q_values` of each action as an output.

        Args:
            x: environment state

        Returns:
            q values
        """
        output = self.net(x)
        return output

    def dqn_mse_loss(self, batch: ExperienceBatch[Tensor]) -> torch.Tensor:
        """Calculates the mse loss using a mini batch from the replay buffer.

        Args:
            batch: current mini batch of replay data

        Returns:
            loss
        """
        states = batch.states
        actions = batch.actions
        rewards = batch.rewards.type(dtype=torch.float32)
        dones = batch.dones
        next_states = batch.new_states

        values: Tensor = self.net(states)
        state_action_values = values.gather(1, actions.unsqueeze(-1)).squeeze(-1)

        with torch.no_grad():
            next_state_values: Tensor = self.target_net(next_states).max(1)[0]
            next_state_values[dones] = 0.0
            next_state_values = next_state_values.detach()

        expected_state_action_values = next_state_values * self.hp.gamma + rewards
        return F.mse_loss(state_action_values, expected_state_action_values)

    def training_step(self, batch: ExperienceBatch[Tensor], batch_idx: int) -> Optional[Tensor]:
        """Carries out a single step through the environment to update the replay buffer. Then calculates loss
        based on the minibatch received.

        Args:
            batch: current mini batch of replay data
            batch_idx: batch index

        Returns:
            Training loss and log metrics
        """
        device = batch.states.device
        epsilon = max(
            self.hp.eps_end,
            self.hp.eps_start - (self.global_step + 1) / self.hp.eps_last_frame,
        )
        try:
            # step through environment with agent
            reward, done = self.agent.play_step(self.net, epsilon, device)
        except gym.error.ClosedEnvironmentError:
            print(f"Environment closed at batch {batch_idx}")
            assert self.trainer is not None
            self.trainer.should_stop = True
            return

        self.episode_reward += reward

        # calculates training loss
        loss = self.dqn_mse_loss(batch)

        if done:
            self.total_reward = self.episode_reward
            self.episode_reward = 0

        # Soft update of target network
        if self.global_step % self.hp.sync_rate == 0:
            self.target_net.load_state_dict(self.net.state_dict())

        self.log_dict(
            {
                "total_reward": self.total_reward,
                "reward": reward,
                "steps": float(self.global_step),
            },
            prog_bar=True,
        )
        return loss

    def configure_optimizers(self) -> List[Optimizer]:
        """Initialize Adam optimizer."""
        optimizer = optim.Adam(self.net.parameters(), lr=self.hp.lr)
        return [optimizer]

    def __dataloader(self) -> DataLoader:
        """Initialize the Replay Buffer dataset used for retrieving experiences."""
        dataset = RLDataset(self.buffer, sample_size=self.episode_length or 200)
        dataloader = DataLoader(
            dataset=dataset,
            batch_size=self.hp.batch_size,
            sampler=None,
            collate_fn=ExperienceBatch.stack,
        )
        return dataloader

    def train_dataloader(self) -> DataLoader:
        """Get train loader."""
        return self.__dataloader()

    def get_device(self, batch) -> str:
        """Retrieve device currently being used by minibatch."""
        return batch[0].device.index if self.on_gpu else "cpu"

    @classmethod
    def add_model_specific_args(cls, parent_parser: ArgumentParser):  # pragma: no-cover
        parent_parser.add_arguments(cls.HParams, "hp")
        return parent_parser


def get_max_episode_length(env: Union[gym.Env, gym.Wrapper]) -> Optional[int]:
    """Inspects the env to get the max episode length, if it is wrapped with a
    `gym.wrappers.TimeLimit` wrapper.
    If the env isn't wrapped with a TimeLimit, then returns None.
    """
    while isinstance(env, gym.Wrapper):
        if isinstance(env, gym.wrappers.TimeLimit):
            return env._max_episode_steps
        env = env.env
    if env.spec is not None:
        return env.spec.max_episode_steps
    return None


from sequoia import Method
from sequoia.settings.rl import RLEnvironment, RLSetting
from sequoia.settings.rl.objects import Actions, Observations, Rewards


class PlDqnMethod(Method, target_setting=RLSetting):
    def __init__(self, hp: DQNLightning.HParams = None) -> None:
        super().__init__()
        self.hp = hp or DQNLightning.HParams()
        self.model: Optional[DQNLightning] = None

    def configure(self, setting: RLSetting) -> None:
        self.model = None
        self.train_max_steps = setting.train_max_steps

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        from sequoia.common.gym_wrappers import (
            TransformAction,
            TransformObservation,
            TransformReward,
        )

        # Our simple DQN model expects to get arrays / integer actions, so we adapt the env a bit
        # using some wrappers.
        train_env = TransformObservation(train_env, lambda obs: obs.x)
        train_env = TransformReward(train_env, lambda rew: rew.y)
        if isinstance(train_env.action_space, TypedDictSpace):
            actions_type: Type[Actions] = train_env.action_space.dtype
            # Make it possible to send just ints to the env, and wrap them up into an Actions object.
            train_env = TransformAction(train_env, lambda act: actions_type(y_pred=act))

        if self.model is None:
            self.model = DQNLightning(env=train_env, hp=self.hp)

        trainer = pl.Trainer(
            gpus=1,
            strategy="dp",
            val_check_interval=100,
            max_steps=self.train_max_steps,
        )
        trainer.fit(self.model)

    def get_actions(self, observations: Observations, action_space: Discrete) -> Actions:
        assert self.model is not None
        with torch.no_grad():
            obs = torch.as_tensor(
                observations.x,
                device=torch.device(self.model.device),
                dtype=self.model.dtype,
            )
            v = self.model.forward(obs)
        selected_action = v.argmax(-1).cpu().numpy()
        return selected_action


def main() -> None:
    parser = ArgumentParser()
    parser = DQNLightning.add_model_specific_args(parser)
    parser.add_argument("--seed", type=int, default=None, help="Random seed")

    args = parser.parse_args()

    # env = gym.make("CartPole-v1")
    # hp: DQNLightning.HParams = args.hp

    # model = DQNLightning(env=env, hp=hp)
    # pl.seed_everything(args.seed)

    # trainer = pl.Trainer(gpus=1, strategy="dp", val_check_interval=100)

    # trainer.fit(model)
    from sequoia.settings.rl import TraditionalRLSetting, MultiTaskRLSetting

    setting = MultiTaskRLSetting(
        dataset="CartPole-v1",
        nb_tasks=1,
        train_max_steps=2_000,
    )
    setting.prepare_data()
    setting.setup()
    setting.train_dataloader()
    setting.test_dataloader()
    method = PlDqnMethod()
    from sequoia.common.config import Config

    results = setting.apply(method, config=Config(debug=True))
    print(results)
    return


if __name__ == "__main__":

    main()


================================================
FILE: sequoia/methods/pnn/__init__.py
================================================
from .layers import PNNConvLayer, PNNLinearBlock
from .model_rl import PnnA2CAgent
from .model_sl import PnnClassifier
from .pnn_method import PnnMethod


================================================
FILE: sequoia/methods/pnn/layers.py
================================================
import torch.nn as nn
import torch.nn.functional as F

"""
Based on https://github.com/TomVeniat/ProgressiveNeuralNetworks.pytorch
"""


class PNNConvLayer(nn.Module):
    def __init__(self, col, depth, n_in, n_out, kernel_size=3):
        super(PNNConvLayer, self).__init__()
        self.col = col
        self.layer = nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1)

        self.u = nn.ModuleList()
        if depth > 0:
            self.u.extend(
                [nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) for _ in range(col)]
            )

    def forward(self, inputs):
        if not isinstance(inputs, list):
            inputs = [inputs]

        cur_column_out = self.layer(inputs[-1])
        prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]

        return F.relu(cur_column_out + sum(prev_columns_out))


class PNNLinearBlock(nn.Module):
    def __init__(self, col: int, depth: int, n_in: int, n_out: int):
        super(PNNLinearBlock, self).__init__()
        self.layer = nn.Linear(n_in, n_out)

        self.u = nn.ModuleList()
        if depth > 0:
            self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)])

    def forward(self, inputs):
        if not isinstance(inputs, list):
            inputs = [inputs]

        cur_column_out = self.layer(inputs[-1])
        prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]

        return F.relu(cur_column_out + sum(prev_columns_out))


================================================
FILE: sequoia/methods/pnn/model_rl.py
================================================
from typing import List

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

from .layers import PNNConvLayer, PNNLinearBlock


class PnnA2CAgent(nn.Module):
    """
    @article{rusu2016progressive,
      title={Progressive neural networks},
      author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
      journal={arXiv preprint arXiv:1606.04671},
      year={2016}
    }
    """

    def __init__(self, arch="mlp", hidden_size=256):
        super(PnnA2CAgent, self).__init__()
        self.columns_actor = nn.ModuleList([])
        self.columns_critic = nn.ModuleList([])
        self.columns_conv = nn.ModuleList([])
        self.arch = arch
        self.hidden_size = hidden_size
        # TODO: This doesn't take the observation space into account at all!
        # Only works for Pixel Cartpole at the moment.
        # Original size 3 x 400 x 600
        self.transformation = transforms.Compose(
            [
                transforms.ToPILImage(),
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
            ]
        )

    def forward(self, observations):
        assert (
            self.columns_actor
        ), "PNN should at least have one column (missing call to `new_task` ?)"
        t = observations.task_labels

        if self.arch == "mlp":
            x = torch.from_numpy(observations.x).unsqueeze(0).float()
            inputs_critic = [c[1](c[0](x)) for c in self.columns_critic]
            inputs_actor = [c[1](c[0](x)) for c in self.columns_actor]

            outputs_critic = []
            outputs_actor = []
            for i, column in enumerate(self.columns_critic):
                outputs_critic.append(column[2](inputs_critic[: i + 1]))
                outputs_actor.append(self.columns_actor[i][2](inputs_actor[: i + 1]))

            ind_depth = 3

        else:
            x = self.transfor_img(observations.x).unsqueeze(0).float()
            inputs = [c[1](c[0](x)) for c in self.columns_conv]

            outputs = []
            for i, column in enumerate(self.columns_conv):
                outputs.append(column[3](column[2](inputs[: i + 1])))

            inputs = outputs
            outputs = []
            for i, column in enumerate(self.columns_conv):
                outputs.append(column[5](column[4](inputs[: i + 1])))

            inputs_critic = [c[6](outputs[i]).view(1, -1) for i, c in enumerate(self.columns_conv)]
            inputs_actor = inputs_critic[:]

            outputs_critic = []
            outputs_actor = []
            for i, column in enumerate(self.columns_critic):
                outputs_critic.append(column[0](inputs_critic[: i + 1]))
                outputs_actor.append(self.columns_actor[i][0](inputs_actor[: i + 1]))

            ind_depth = 1

        critic = []
        for i, column in enumerate(self.columns_critic):
            critic.append(column[ind_depth](outputs_critic[i]))

        actor = []
        for i, column in enumerate(self.columns_actor):
            actor.append(F.softmax(column[ind_depth](outputs_actor[i]), dim=1))

        return critic[t], actor[t]

    def new_task(self, device, num_inputs, num_actions=5):
        task_id = len(self.columns_actor)

        if self.arch == "conv":
            sizes = [num_inputs, 32, 64, self.hidden_size]
            modules_conv = nn.Sequential()

            modules_conv.add_module("Conv1", PNNConvLayer(task_id, 0, sizes[0], sizes[1]))
            modules_conv.add_module("MaxPool1", nn.MaxPool2d(3))
            modules_conv.add_module("Conv2", PNNConvLayer(task_id, 1, sizes[1], sizes[2]))
            modules_conv.add_module("MaxPool2", nn.MaxPool2d(3))
            modules_conv.add_module("Conv3", PNNConvLayer(task_id, 2, sizes[2], sizes[3]))
            modules_conv.add_module("MaxPool3", nn.MaxPool2d(3))
            modules_conv.add_module("globavgpool2d", nn.AdaptiveAvgPool2d((1, 1)))
            self.columns_conv.append(modules_conv)

        modules_actor = nn.Sequential()
        modules_critic = nn.Sequential()

        if self.arch == "mlp":
            modules_actor.add_module("linAc1", nn.Linear(num_inputs, self.hidden_size))
            modules_actor.add_module("relAc", nn.ReLU(inplace=True))
        modules_actor.add_module(
            "linAc2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
        )
        modules_actor.add_module("linAc3", nn.Linear(self.hidden_size, num_actions))

        if self.arch == "mlp":
            modules_critic.add_module("linCr1", nn.Linear(num_inputs, self.hidden_size))
            modules_critic.add_module("relCr", nn.ReLU(inplace=True))
        modules_critic.add_module(
            "linCr2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
        )
        modules_critic.add_module("linCr3", nn.Linear(self.hidden_size, 1))

        self.columns_actor.append(modules_actor)
        self.columns_critic.append(modules_critic)

        print("Add column of the new task")

    def unfreeze_columns(self):
        for i, c in enumerate(self.columns_actor):
            for params in c.parameters():
                params.requires_grad = True

            for params in self.columns_critic[i].parameters():
                params.requires_grad = True

        for i, c in enumerate(self.columns_conv):
            for params in c.parameters():
                params.requires_grad = True

    def freeze_columns(self, skip: List[int] = None):
        if skip is None:
            skip = []

        self.unfreeze_columns()

        for i, c in enumerate(self.columns_actor):
            if i not in skip:
                for params in c.parameters():
                    params.requires_grad = False

                for params in self.columns_critic[i].parameters():
                    params.requires_grad = False

        for i, c in enumerate(self.columns_conv):
            if i not in skip:
                for params in c.parameters():
                    params.requires_grad = False

        print("Freeze columns from previous tasks")

    def parameters(self, task_id):
        param = []
        for p in self.columns_critic[task_id].parameters():
            param.append(p)
        for p in self.columns_actor[task_id].parameters():
            param.append(p)

        if len(self.columns_conv) > 0:
            for p in self.columns_conv[task_id].parameters():
                param.append(p)

        return param

    def transfor_img(self, img):
        return self.transformation(img)
        # return lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255.


================================================
FILE: sequoia/methods/pnn/model_sl.py
================================================
from typing import List, Optional, Tuple

import torch
import torch.nn as nn
from torch import Tensor

from sequoia.settings import Actions, PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Observations, Rewards
from sequoia.utils.logging_utils import get_logger

from .layers import PNNLinearBlock

logger = get_logger(__name__)


class PnnClassifier(nn.Module):
    """
    @article{rusu2016progressive,
      title={Progressive neural networks},
      author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
      journal={arXiv preprint arXiv:1606.04671},
      year={2016}
    }
    """

    def __init__(self, n_layers):
        super().__init__()
        self.n_layers = n_layers
        self.columns = nn.ModuleList([])

        self.loss = torch.nn.CrossEntropyLoss()
        self.device = None
        self.n_tasks = 0
        self.n_classes_per_task: List[int] = []

    def forward(self, observations: Observations):
        assert self.columns, "PNN should at least have one column (missing call to `new_task` ?)"
        x = observations.x
        x = torch.flatten(x, start_dim=1)
        task_labels: Optional[Tensor] = observations.task_labels
        batch_size = x.shape[0]
        n_known_tasks = len(self.columns)
        last_known_task_id = n_known_tasks - 1

        if task_labels is None:
            # TODO: Use random output heads per item?
            logger.warning(
                f"Encoutering None task labels, assigning a fake random task id for each sample."
            )
            task_labels = torch.randint(n_known_tasks, (batch_size,))
            # task_labels = np.array([None for _ in range(len(x))])

        unique_task_labels = set(task_labels.tolist())
        # TODO: Debug this:
        column_outputs = [
            column[0](x) + n_classes_in_task
            for n_classes_in_task, column in zip(self.n_classes_per_task, self.columns)
        ]
        inputs = column_outputs
        for layer in range(1, self.n_layers):
            outputs = []

            for i, column in enumerate(self.columns):
                outputs.append(column[layer](inputs[: i + 1]))

            inputs = outputs

        y_logits: Optional[Tensor] = None
        task_masks = {}
        # BUG: Can't apply PNN to the ClassIncrementalSetting at the moment.

        for task_id in unique_task_labels:
            task_mask = task_labels == task_id
            task_masks[task_id] = task_mask
            if task_id is None or task_id >= n_known_tasks:
                logger.warning(
                    f"Task id {task_id} is encountered, but we haven't trained for it yet!"
                )
                task_id = last_known_task_id

            if y_logits is None:
                y_logits = inputs[task_id]
            else:
                y_logits[task_mask] = inputs[task_id][task_mask]

        assert y_logits is not None, "Can't get prediction in model PNN"
        return y_logits

    # def new_task(self, device, num_inputs, num_actions = 5):
    def new_task(self, device, sizes: List[int]):
        assert len(sizes) == self.n_layers + 1, (
            f"Should have the out size for each layer + input size (got {len(sizes)} "
            f"sizes but {self.n_layers} layers)."
        )
        self.n_tasks += 1
        # TODO: Fix this to use the actual number of classes per task.
        n_outputs = sizes[-1]
        self.n_classes_per_task.append(n_outputs)
        task_id = len(self.columns)
        modules = []
        # TODO: Would it also be possible to use convolutional layers here?
        for i in range(0, self.n_layers):
            modules.append(PNNLinearBlock(col=task_id, depth=i, n_in=sizes[i], n_out=sizes[i + 1]))

        new_column = nn.ModuleList(modules).to(device)
        self.columns.append(new_column)
        self.device = device

        print("Add column of the new task")

    def freeze_columns(self, skip: List[int] = None):
        if skip == None:
            skip = []

        for i, c in enumerate(self.columns):
            for params in c.parameters():
                params.requires_grad = True

        for i, c in enumerate(self.columns):
            if i not in skip:
                for params in c.parameters():
                    params.requires_grad = False

        print("Freeze columns from previous tasks")

    def shared_step(
        self,
        batch: Tuple[Observations, Optional[Rewards]],
        environment: PassiveEnvironment,
    ):
        """Shared step used for both training and validation.

        Parameters
        ----------
        batch : Tuple[Observations, Optional[Rewards]]
            Batch containing Observations, and optional Rewards. When the Rewards are
            None, it means that we'll need to provide the Environment with actions
            before we can get the Rewards (e.g. image labels) back.

            This happens for example when being applied in a Setting which cares about
            sample efficiency or training performance, for example.

        environment : Environment
            The environment we're currently interacting with. Used to provide the
            rewards when they aren't already part of the batch (as mentioned above).

        Returns
        -------
        Tuple[Tensor, Dict]
            The Loss tensor, and a dict of metrics to be logged.
        """
        # Since we're training on a Passive environment, we will get both observations
        # and rewards, unless we're being evaluated based on our training performance,
        # in which case we will need to send actions to the environments before we can
        # get the corresponding rewards (image labels).
        observations: Observations = batch[0].to(self.device)
        rewards: Optional[Rewards] = batch[1]

        # Get the predictions:
        logits = self(observations)
        y_pred = logits.argmax(-1)
        # TODO: PNN is coded for the DomainIncrementalSetting, where the action space
        # is the same for each task.

        # Get the rewards, if necessary:
        if rewards is None:
            rewards = environment.send(Actions(y_pred))

        image_labels = rewards.y.to(self.device)
        # print(logits.size())
        loss = self.loss(logits, image_labels)

        accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
        metrics_dict = {"accuracy": accuracy}
        return loss, metrics_dict

    def parameters(self, task_id):
        return self.columns[task_id].parameters()


================================================
FILE: sequoia/methods/pnn/pnn_method.py
================================================
from argparse import Namespace
from dataclasses import dataclass
from typing import Any, Dict, Mapping, Optional, Union

import gym
import numpy as np
import torch
import tqdm
from gym import spaces
from gym.spaces import Box
from numpy import inf
from simple_parsing import ArgumentParser
from wandb.wandb_run import Run

from sequoia.common import Config
from sequoia.common.hparams import HyperParameters, categorical, log_uniform, uniform
from sequoia.common.spaces import Image
from sequoia.common.transforms.utils import is_image
from sequoia.methods import register_method
from sequoia.settings import (
    Actions,
    Method,
    Observations,
    PassiveEnvironment,
    RLSetting,
    Setting,
    TaskIncrementalRLSetting,
    TaskIncrementalSLSetting,
)
from sequoia.settings.assumptions import IncrementalAssumption
from sequoia.settings.base import Environment
from sequoia.utils import get_logger

from .model_rl import PnnA2CAgent
from .model_sl import PnnClassifier

logger = get_logger(__name__)

# BUG: Can't apply PNN to the ClassIncrementalSetting at the moment.
# BUG: Can't apply PNN to any RL Settings at the moment.
# (it was hard-coded to handle pixel cartpole).
# TODO: When those bugs get fixed, restore the 'IncrementalAssumption' as the target
# setting.
# TODO: Debugging PNN on Incremental rather than TaskIncremental


@register_method
class PnnMethod(Method, target_setting=IncrementalAssumption):
    """
    PNN Method.

    Applicable to both RL and SL Settings, as long as there are clear task boundaries
    during training (IncrementalAssumption).
    """

    @dataclass
    class HParams(HyperParameters):
        """Hyper-parameters of the Pnn method."""

        # Learning rate of the optimizer. Defauts to 0.0001 when in SL.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=2e-4)
        num_steps: int = 200  # (only applicable in RL settings.)
        # Discount factor (Only used in RL settings).
        gamma: float = uniform(0.9, 0.999, default=0.99)
        # Number of hidden units (only used in RL settings.)
        hidden_size: int = categorical(64, 128, 256, default=256)
        # Batch size in SL, and number of parallel environments in RL.
        # Defaults to None in RL, and 32 when in SL.
        batch_size: Optional[int] = None
        # Maximum number of training epochs per task. (only used in SL Settings)
        max_epochs_per_task: int = uniform(1, 100, default=10)

    def __init__(self, hparams: HParams = None):
        # We will create those when `configure` will be called, before training.
        self.config: Optional[Config] = None
        self.task_id: Optional[int] = 0
        self.hparams: Optional[PnnMethod.HParams] = hparams
        self.model: Union[PnnA2CAgent, PnnClassifier]
        self.optimizer: torch.optim.Optimizer

    def configure(self, setting: Setting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """

        input_space: Box = setting.observation_space["x"]

        # For now all Settings have `Discrete` (i.e. classification) action spaces.
        action_space: spaces.Discrete = setting.action_space

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.num_actions = action_space.n
        self.num_inputs = np.prod(input_space.shape)

        self.added_tasks = []
        if not (setting.task_labels_at_train_time and setting.task_labels_at_test_time):
            logger.warning(
                RuntimeWarning(
                    "TODO: PNN doesn't have 'propper' task inference, and task labels "
                    "arent always available! This will use an output head at random."
                )
            )
        if isinstance(setting, RLSetting):
            # If we're applied to an RL setting:

            # Used these as the default hparams in RL:
            self.hparams = self.hparams or self.HParams()
            assert self.hparams
            self.train_steps_per_task = setting.steps_per_task

            # We want a batch_size of None, i.e. only one observation at a time.
            setting.batch_size = None

            self.num_steps = self.hparams.num_steps
            # Otherwise, we can train basically as long as we want on each task.
            self.loss_function = {
                "gamma": self.hparams.gamma,
            }
            if is_image(setting.observation_space.x):
                # Observing pixel input.
                self.arch = "conv"
            else:
                # Observing state input (e.g. the 4 floats in cartpole rather than images)
                self.arch = "mlp"
            self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)

        else:
            # If we're applied to a Supervised Learning setting:
            # Used these as the default hparams in SL:
            self.hparams = self.hparams or self.HParams(
                learning_rate=0.0001,
                batch_size=32,
            )
            if self.hparams.batch_size is None:
                self.hparams.batch_size = 32

            # Set the batch size on the setting.
            setting.batch_size = self.hparams.batch_size
            # For now all Settings on the supervised side of the tree have images as
            # inputs, so the observation spaces are of type `Image` (same as Box, but with
            # additional `h`, `w`, `c` and `b` attributes).
            assert isinstance(input_space, Image)
            assert (
                setting.increment == setting.test_increment
            ), "Assuming same number of classes per task for training and testing."
            # TODO: (@lebrice): Temporarily 'fixing' this by making it so each output
            # head has as many outputs as there are classes in total, which might make
            # no sense, but currently works.
            # It would be better to refactor this so that each output head can have only
            # as many outputs as is required, and then reshape / offset the predictions.
            n_outputs = setting.increment
            n_outputs = setting.action_space.n
            self.layer_size = [self.num_inputs, 256, n_outputs]
            self.model = PnnClassifier(
                n_layers=len(self.layer_size) - 1,
            )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting."""
        # This method gets called if task boundaries are known in the current
        # setting. Furthermore, if task labels are available, task_id will be
        # the index of the new task. If not, task_id will be None.
        # For example, you could do something like this:
        # self.model.current_task = task_id
        if self.training:
            self.model.freeze_columns([task_id])

        if task_id not in self.added_tasks:
            if isinstance(self.model, PnnA2CAgent):
                self.model.new_task(
                    device=self.device,
                    num_inputs=self.num_inputs,
                    num_actions=self.num_actions,
                )
            else:
                self.model.new_task(device=self.device, sizes=self.layer_size)

            self.added_tasks.append(task_id)

        self.task_id = task_id

    def set_optimizer(self):
        self.optimizer = torch.optim.Adam(
            self.model.parameters(self.task_id),
            lr=self.hparams.learning_rate,
        )

    def get_actions(self, observations: Observations, action_space: spaces.Space) -> Actions:
        """Get a batch of predictions (aka actions) for the given observations."""

        observations = observations.to(self.device)
        with torch.no_grad():
            if isinstance(self.model, PnnA2CAgent):
                predictions = self.model(observations)
                _, logit = predictions
                # get the predicted action:
                action = torch.argmax(logit).item()
            else:
                logits = self.model(observations)
                # Get the predicted classes
                y_pred = logits.argmax(dim=-1).cpu().numpy()
                action = y_pred

        assert action in action_space, (action, action_space)
        return action

    def fit(self, train_env: Environment, valid_env: Environment):
        """Train and validate this method using the "environments" for the current task.

        NOTE: `train_env` and `valid_env` are both `gym.Env`s as well as `DataLoader`s.
        This means that if you want to write a "regular" SL training loop, you totally
        can, and if you want to write you RL-style training loop, you can also do that.
        """
        if isinstance(train_env.unwrapped, PassiveEnvironment):
            self.fit_sl(train_env, valid_env)
        else:
            self.fit_rl(train_env, valid_env)

    def fit_rl(self, train_env: gym.Env, valid_env: gym.Env):
        """Training loop for Reinforcement Learning (a.k.a. "active") environment."""
        """
        base on https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f
        """
        if self.model is None:
            self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)
        assert isinstance(self.model, PnnA2CAgent)

        self.set_optimizer()
        assert self.hparams
        # self.model.float()

        all_lengths = []
        average_lengths = []
        all_rewards = []
        entropy_term = 0

        for episode in range(self.train_steps_per_task):
            values = []
            rewards = []
            log_probs = []

            state = train_env.reset()
            for steps in range(self.num_steps):
                value, policy_dist = self.model(state)

                value = value.item()
                dist = policy_dist.detach().numpy()

                action = np.random.choice(self.num_actions, p=np.squeeze(dist))
                log_prob = torch.log(policy_dist.squeeze(0)[action])
                entropy = -np.sum(np.mean(dist) * np.log(dist))
                new_state, reward, done, _ = train_env.step(action)

                rewards.append(reward.y)
                values.append(value)
                log_probs.append(log_prob)
                entropy_term += entropy
                state = new_state

                if done or steps == self.num_steps - 1:
                    Qval, _ = self.model(state)
                    Qval = Qval.item()
                    all_rewards.append(np.sum(rewards))
                    all_lengths.append(steps)
                    average_lengths.append(np.mean(all_lengths[-10:]))

                    if episode % 10 == 0:
                        print(
                            f"episode: {episode}, "
                            f"reward: {np.sum(rewards)}, "
                            f"total length: {steps}, "
                            f"average length: {average_lengths[-1]}"
                        )
                    break

            Qvals = np.zeros_like(values)
            for t in reversed(range(len(rewards))):
                Qval = rewards[t] + self.hparams.gamma * Qval
                Qvals[t] = Qval

            # update actor critic
            values_tensor = torch.as_tensor(values, dtype=torch.float)
            Qvals = torch.as_tensor(Qvals, dtype=torch.float)
            log_probs_tensor = torch.stack(log_probs)

            advantage = Qvals - values_tensor
            actor_loss = (-log_probs_tensor * advantage).mean()
            critic_loss = 0.5 * advantage.pow(2).mean()
            ac_loss = actor_loss + critic_loss + 0.001 * entropy_term

            self.optimizer.zero_grad()
            ac_loss.backward()
            self.optimizer.step()

    def fit_sl(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
        """Train on a Supervised Learning (a.k.a. "passive") environment."""
        observations: TaskIncrementalSLSetting.Observations = train_env.reset()
        cuda_observations = observations.to(self.device)
        assert isinstance(self.model, PnnClassifier)
        assert self.hparams

        self.set_optimizer()

        best_val_loss = inf
        best_epoch = 0
        for epoch in range(self.hparams.max_epochs_per_task):
            self.model.train()
            print(f"Starting epoch {epoch}")
            # Training loop:
            with torch.set_grad_enabled(True), tqdm.tqdm(train_env) as train_pbar:
                postfix: Dict[str, Any] = {}
                train_pbar.set_description(f"Training Epoch {epoch}")
                for i, batch in enumerate(train_pbar):
                    loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=train_env,
                    )
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()
                    postfix.update(metrics_dict)
                    train_pbar.set_postfix(postfix)

            # Validation loop:
            self.model.eval()
            with torch.set_grad_enabled(False), tqdm.tqdm(valid_env) as val_pbar:
                postfix = {}
                val_pbar.set_description(f"Validation Epoch {epoch}")
                epoch_val_loss = 0.0

                for i, batch in enumerate(val_pbar):
                    batch_val_loss, metrics_dict = self.model.shared_step(
                        batch,
                        environment=valid_env,
                    )
                    epoch_val_loss += batch_val_loss
                    postfix.update(metrics_dict, val_loss=epoch_val_loss)
                    val_pbar.set_postfix(postfix)

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser) -> None:
        parser.add_arguments(cls.HParams, dest="hparams", default=None)

    @classmethod
    def from_argparse_args(cls, args: Namespace) -> "PnnMethod":
        hparams: PnnMethod.HParams = args.hparams
        method = cls(hparams=hparams)
        return method

    def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        return self.hparams.get_orion_space()

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        # Here we overwrite the corresponding attributes with the new suggested values
        # leaving other fields unchanged.
        # NOTE: These new hyper-paramers will be used in the next run in the sweep,
        # since each call to `configure` will create a new Model.
        self.hparams = self.hparams.replace(**new_hparams)

    def setup_wandb(self, run: Run) -> None:
        """Called by the Setting when using Weights & Biases, after `wandb.init`.

        This method is here to provide Methods with the opportunity to log some of their
        configuration options or hyper-parameters to wandb.

        NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
        this point.

        Parameters
        ----------
        run : wandb.Run
            Current wandb Run.
        """
        run.config["hparams"] = self.hparams.to_dict()


def main_rl():
    """Applies the PnnMethod in a RL Setting."""
    parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)

    Config.add_argparse_args(parser, dest="config")
    PnnMethod.add_argparse_args(parser, dest="method")

    setting = TaskIncrementalRLSetting(
        dataset="cartpole",
        nb_tasks=2,
        train_task_schedule={
            0: {"gravity": 10, "length": 0.3},
            1000: {"gravity": 10, "length": 0.5},
        },
    )

    args = parser.parse_args()

    config: Config = Config.from_argparse_args(args, dest="config")
    method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")
    method.config = config

    # 2. Creating the Method
    # method = ImproveMethod()

    # 3. Applying the method to the setting:
    results = setting.apply(method, config=config)

    print(results.summary())
    print(f"objective: {results.objective}")
    return results


def main_sl():
    """Applies the PnnMethod in a SL Setting."""
    parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)

    # Add arguments for the Setting
    # TODO: PNN is coded for the DomainIncrementalSetting, where the action space
    # is the same for each task.
    # parser.add_arguments(DomainIncrementalSetting, dest="setting")
    parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
    # TaskIncrementalSLSetting.add_argparse_args(parser, dest="setting")
    Config.add_argparse_args(parser, dest="config")

    # Add arguments for the Method:
    PnnMethod.add_argparse_args(parser, dest="method")

    args = parser.parse_args()

    # setting: TaskIncrementalSLSetting = args.setting
    setting: TaskIncrementalSLSetting = TaskIncrementalSLSetting.from_argparse_args(
        # setting: DomainIncrementalSetting = DomainIncrementalSetting.from_argparse_args(
        args,
        dest="setting",
    )
    config: Config = Config.from_argparse_args(args, dest="config")

    method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")

    method.config = config

    results = setting.apply(method, config=config)
    print(results.summary())
    return results


if __name__ == "__main__":
    # Run RL Setting
    main_sl()
    # Run SL Setting
    # main_rl()


================================================
FILE: sequoia/methods/random_baseline.py
================================================
"""A random baseline Method that gives random predictions for any input.

Should be applicable to any Setting.
"""

from argparse import Namespace
from typing import Any, Dict, Mapping, Optional, Union

import gym
import numpy as np
import tqdm
from simple_parsing import ArgumentParser
from torch import Tensor

from sequoia.methods import register_method
from sequoia.settings import Setting
from sequoia.settings.base import Actions, Environment, Method, Observations
from sequoia.settings.sl import SLSetting
from sequoia.utils import get_logger

logger = get_logger(__name__)


@register_method
class RandomBaselineMethod(Method, target_setting=Setting):
    """Baseline method that gives random predictions for any given setting.

    This method doesn't have a model or any parameters. It just returns a random
    action for every observation.
    """

    def __init__(self):
        self.max_train_episodes: Optional[int] = None

    def configure(self, setting: Setting):
        """Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        if isinstance(setting, SLSetting):
            # Being applied in SL, we will only do one 'epoch" (a.k.a. "episode").
            self.max_train_episodes = 1

    def fit(
        self,
        train_env: Environment,
        valid_env: Environment,
    ):
        episodes = 0
        with tqdm.tqdm(desc="training") as train_pbar:
            while not train_env.is_closed():
                for i, batch in enumerate(train_env):
                    if isinstance(batch, Observations):
                        observations, rewards = batch, None
                    else:
                        observations, rewards = batch

                    batch_size = observations.x.shape[0]
                    y_pred = train_env.action_space.sample()

                    # If we're at the last batch, it might have a different size, so w
                    # give only the required number of values.
                    if isinstance(y_pred, (np.ndarray, Tensor)):
                        if y_pred.shape[0] != batch_size:
                            y_pred = y_pred[:batch_size]

                    if rewards is None:
                        rewards = train_env.send(y_pred)

                    train_pbar.set_postfix({"Episode": episodes, "Step": i})
                    train_pbar.update()
                    # train as you usually would.

                    if train_env.is_closed():
                        break

                episodes += 1
                if self.max_train_episodes and episodes >= self.max_train_episodes:
                    train_env.close()
                    break

    def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
        return action_space.sample()

    def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        logger.warning(
            UserWarning(
                "Hey, you seem to be trying to perform an HPO sweep using the random "
                "baseline method?"
            )
        )
        # Assuming that this is just used for debugging, so giving back a simple space.
        return {"foo": "choices([0, 1, 2])"}

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        foo = new_hparams["foo"]
        print(f"Using new suggested value {foo}")

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser):
        pass

    @classmethod
    def from_argparse_args(cls, args: Namespace):
        return cls()


if __name__ == "__main__":
    RandomBaselineMethod.main()


================================================
FILE: sequoia/methods/random_baseline_test.py
================================================
# TODO: Create a sort of reusable fixture for the Method
# TODO: Figure out how to ACTUALLY set the checkpoint dir in pytorch-lightning!
from typing import List

from sequoia.settings import all_settings

from .random_baseline import RandomBaselineMethod

# Use 'Method' as an alias for the actual Method cusblass under test. (since at
# the moment quite a few tests share some common code.

# List of datasets that are currently supported.
supported_datasets: List[str] = [
    "mnist",
    "fashionmnist",
    "cifar10",
    "cifar100",
    "kmnist",
    "cartpole",
]


def test_is_applicable_to_all_settings():
    settings = RandomBaselineMethod.get_applicable_settings()
    assert set(settings) == set(all_settings)


================================================
FILE: sequoia/methods/stable_baselines3_methods/__init__.py
================================================
from .a2c import A2CMethod, A2CModel
from .base import SB3BaseHParams, StableBaselines3Method
from .ddpg import DDPGMethod, DDPGModel
from .dqn import DQNMethod, DQNModel
from .off_policy_method import OffPolicyMethod, OffPolicyModel
from .on_policy_method import OnPolicyMethod, OnPolicyModel
from .policy_wrapper import PolicyWrapper
from .ppo import PPOMethod, PPOModel
from .sac import SACMethod, SACModel
from .td3 import TD3Method, TD3Model


================================================
FILE: sequoia/methods/stable_baselines3_methods/a2c.py
================================================
""" Method that uses the A2C model from stable-baselines3 and targets the RL
settings in the tree.
"""
import math
from dataclasses import dataclass
from typing import Callable, ClassVar, Dict, Mapping, Optional, Type, Union

import gym
import torch
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.a2c import A2C

from sequoia.common.hparams import log_uniform, uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils import get_logger

from .on_policy_method import OnPolicyMethod, OnPolicyModel

logger = get_logger(__name__)


class A2CModel(A2C, OnPolicyModel):
    """Advantage Actor Critic (A2C) model imported from stable-baselines3.

    Paper: https://arxiv.org/abs/1602.01783
    Code: The SB3 implementation borrows code from
    https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and
    and Stable Baselines (https://github.com/hill-a/stable-baselines)

    Introduction to A2C:
    https://hackernoon.com/intuitive-rl-intro-to-advantage-actor-critic-a2c-4ff545978752
    """

    @dataclass
    class HParams(OnPolicyModel.HParams):
        """Hyper-parameters of the A2C Model.

        TODO: Set actual 'good' priors for these hyper-parameters, as these were set
        somewhat arbitrarily. (They do however use the same defaults as in SB3).
        """

        # learning rate for the optimizer, it can be a function of the current
        # progress remaining (from 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=7e-4)

        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        # NOTE: Default value here is much lower than in PPO, which might indicate
        # that this A2C is more "on-policy"? (i.e. that it requires data to be super
        # "fresh")?
        n_steps: int = uniform(3, 64, default=5, discrete=True)
        # Discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
        # Equivalent to classic advantage when set to 1.
        gae_lambda: float = 1.0
        # gae_lambda: float = uniform(0.5, 1.0, default=1.0)

        # Entropy coefficient for the loss calculation
        ent_coef: float = 0.0
        # ent_coef: float = uniform(0.0, 1.0, default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = 0.5
        # vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = 0.5
        # max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # RMSProp epsilon. It stabilizes square root computation in denominator of
        # RMSProp update.
        rms_prop_eps: float = 1e-5
        # rms_prop_eps: float = log_uniform(1e-7, 1e-3, default=1e-5)

        # Whether to use RMSprop (default) or Adam as optimizer
        use_rms_prop: bool = True
        # use_rms_prop: bool = categorical(True, False, default=True)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = False
        # use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE.
        # Default: -1 (only sample at the beginning of the rollout)
        sde_sample_freq: int = -1
        # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Whether to normalize or not the advantage
        normalize_advantage: bool = False
        # normalize_advantage: bool = categorical(True, False, default=False)

        # The log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 0

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run.
        # Setting it to auto, the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"

        # :param _init_setup_model: Whether or not to build the network at the
        # creation of the instance
        # _init_setup_model: bool = True


@register_method
@dataclass
class A2CMethod(OnPolicyMethod):
    """Method that uses the A2C model from stable-baselines3."""

    # changing the 'name' in this case here, because the default name would be
    # 'a_2_c'.
    name: ClassVar[str] = "a2c"
    Model: ClassVar[Type[A2CModel]] = A2CModel

    # Hyper-parameters of the A2C model.
    hparams: A2CModel.HParams = mutable_field(A2CModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)
        if setting.steps_per_phase:
            if self.hparams.n_steps > setting.steps_per_phase:
                self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase)
                logger.info(
                    f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}"
                )
            # NOTE: We limit the number of trainign steps per task, such that we never
            # attempt to fill the buffer using more samples than the environment allows.
            self.train_steps_per_task = min(
                self.train_steps_per_task,
                setting.steps_per_phase - self.hparams.n_steps - 1,
            )
            logger.info(f"Limitting training steps per task to {self.train_steps_per_task}")

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> A2CModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)

    def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
        search_space = super().get_search_space(setting)
        if isinstance(setting.action_space, spaces.Discrete):
            # From stable_baselines3/common/base_class.py", line 170:
            # > Generalized State-Dependent Exploration (gSDE) can only be used with
            #   continuous actions
            # Therefore we remove related entries in the search space, so they keep
            # their default values.
            search_space.pop("use_sde", None)
            search_space.pop("sde_sample_freq", None)
        return search_space


if __name__ == "__main__":
    results = A2CMethod.main()
    print(results)


================================================
FILE: sequoia/methods/stable_baselines3_methods/a2c_test.py
================================================
from typing import ClassVar, Type

from .a2c import A2CMethod, A2CModel
from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import DiscreteActionSpaceMethodTests


class TestA2C(DiscreteActionSpaceMethodTests):
    Method: ClassVar[Type[StableBaselines3Method]] = A2CMethod
    Model: ClassVar[Type[BaseAlgorithm]] = A2CModel


================================================
FILE: sequoia/methods/stable_baselines3_methods/base.py
================================================
""" Example of creating an A2C agent using the simplebaselines3 package.

See https://stable-baselines3.readthedocs.io/en/master/guide/install.html
"""
from abc import ABC
from dataclasses import dataclass
from typing import Any, Callable, ClassVar, Dict, List, Mapping, Optional, Type, Union

import gym
import torch
from gym import spaces
from simple_parsing import choice, mutable_field
from simple_parsing.helpers.hparams import HyperParameters, categorical, log_uniform
from stable_baselines3.common.base_class import BaseAlgorithm, BasePolicy, MaybeCallback

# from stable_baselines3.common.vec_env.obs_dict_wrapper import ObsDictWrapper
from wandb.wandb_run import Run

from sequoia.common.transforms.utils import is_image
from sequoia.settings import Method, Setting
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import register_decoding_fn

logger = get_logger(__name__)

# "Patch" the _wrap_env function of the BaseAlgorithm class of
# stable_baselines, to make it recognize the VectorEnv from gym.vector as a
# vectorized environment.
# Stable-Baselines3 has a lot of duplicated code from openai gym


# def _wrap_env(env: GymEnv, verbose: int = 0, monitor_wrapper: bool = True) -> VecEnv:
#     """ "
#     Wrap environment with the appropriate wrappers if needed.
#     For instance, to have a vectorized environment
#     or to re-order the image channels.

#     :param env:
#     :param verbose:
#     :param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible.
#     :return: The wrapped environment.
#     """

#     # if not isinstance(env, VecEnv):
#     if not (
#         isinstance(env, (VecEnv, VectorEnv))
#         or isinstance(env.unwrapped, (VecEnv, VectorEnv))
#     ):
#         # if not is_wrapped(env, Monitor) and monitor_wrapper:
#         if monitor_wrapper and not (
#             is_wrapped(env, Monitor)
#             or is_wrapped(env, gym.wrappers.Monitor)
#             or has_wrapper(env, gym.wrappers.Monitor)
#         ):
#             if verbose >= 1:
#                 print("Wrapping the env with a `Monitor` wrapper")
#             env = Monitor(env)
#         if verbose >= 1:
#             print("Wrapping the env in a DummyVecEnv.")
#         env = DummyVecEnv([lambda: env])

#     if is_image_space(env.observation_space) and not is_wrapped(env, VecTransposeImage):
#         if verbose >= 1:
#             print("Wrapping the env in a VecTransposeImage.")
#         env = VecTransposeImage(env)

#     # check if wrapper for dict support is needed when using HER
#     if isinstance(env.observation_space, gym.spaces.dict.Dict):
#         env = ObsDictWrapper(env)

#     return env


# BaseAlgorithm._wrap_env = staticmethod(_wrap_env)


class RemoveInfoWrapper(gym.Wrapper):
    """Wrapper used to remove the 'info' dict, since there seems to be a bug in sb3
    whenever there is something in the 'info' dict.
    """

    def step(self, action):
        obs, rewards, done, info = self.env.step(action)
        info = {}
        return obs, rewards, done, info


@dataclass
class SB3BaseHParams(HyperParameters):
    """Hyper-parameters of a model from the `stable_baselines3` package.

    The command-line arguments for these are created with simple-parsing.
    """

    # The policy model to use (MlpPolicy, CnnPolicy, ...)
    policy: Optional[Union[str, Type[BasePolicy]]] = choice("MlpPolicy", "CnnPolicy", default=None)
    # # The base policy used by this method
    # policy_base: Type[BasePolicy]

    # learning rate for the optimizer, it can be a function of the current
    # progress remaining (from 1 to 0)
    learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-4)
    # Additional arguments to be passed to the policy on creation
    policy_kwargs: Optional[Dict[str, Any]] = None
    # the log location for tensorboard (if None, no logging)
    tensorboard_log: Optional[str] = None
    # The verbosity level: 0 none, 1 training information, 2 debug
    verbose: int = 1
    # Device on which the code should run. By default, it will try to use a Cuda
    # compatible device and fallback to cpu if it is not possible.
    device: Union[torch.device, str] = "auto"

    # # Whether the algorithm supports training with multiple environments (as in A2C)
    # support_multi_env: bool = False

    # Whether to create a second environment that will be used for evaluating
    # the agent periodically. (Only available when passing string for the
    # environment)
    create_eval_env: bool = False

    # # When creating an environment, whether to wrap it or not in a Monitor wrapper.
    # monitor_wrapper: bool = True

    # Seed for the pseudo random generators
    seed: Optional[int] = None
    # # Whether to use generalized State Dependent Exploration (gSDE) instead of
    # action noise exploration (default: False)
    # use_sde: bool = False
    # # Sample a new noise matrix every n steps when using gSDE Default: -1
    # (only sample at the beginning of the rollout)
    # sde_sample_freq: int = -1

    # Wether to clear the experience buffer at the beginning of a new task.
    # NOTE: We use to_dict here so that it doesn't get passed do the Policy class.
    clear_buffers_between_tasks: bool = categorical(True, False, default=False, to_dict=False)


@dataclass
class StableBaselines3Method(Method, ABC, target_setting=ContinualRLSetting):
    """Base class for the methods that use models from the stable_baselines3
    repo.
    """

    family: ClassVar[str] = "sb3"

    # Class variable that represents what kind of Model will be used.
    # (This is just here so we can easily create one Method class per model type
    # by just changing this class attribute.)
    Model: ClassVar[Type[BaseAlgorithm]]

    # HyperParameters of the Method.
    hparams: SB3BaseHParams = mutable_field(SB3BaseHParams)

    # The number of training steps to run per task.
    # NOTE: This shouldn't be set to more than the task length when applying this method
    # on a ContinualRLSetting, because we don't currently have a way of "resetting"
    # the nonstationarity in the environment, and there is only one task,
    # therefore if we trained for say 10 million steps, while the
    # non-stationarity only lasts for 10_000 steps, we'd have seen an almost
    # stationary distribution, since the environment would have stopped changing after
    # 10_000 steps.
    # train_steps_per_task: int = 10_000

    # callback(s) called at every step with state of the algorithm.
    callback: MaybeCallback = None
    # The number of timesteps before logging.
    log_interval: int = 100
    # the name of the run for TensorBoard logging
    tb_log_name: str = "run"
    # Evaluate the agent every ``eval_freq`` timesteps (this may vary a little)
    # TODO: Log the evaluations to wandb.
    eval_freq: int = 5_000
    # Number of episode to evaluate the agent
    n_eval_episodes = 5
    # Path to a folder where the evaluations will be saved
    eval_log_path: Optional[str] = None

    def __post_init__(self):
        self.model: Optional[BaseAlgorithm] = None
        # Extra wrappers to add to the train_env and valid_env before passing
        # them to the `learn` method from stable-baselines3.
        import operator
        from functools import partial

        from sequoia.common.gym_wrappers import TransformObservation, TransformReward

        self.extra_train_wrappers: List[Callable[[gym.Env], gym.Env]] = [
            partial(TransformObservation, f=operator.itemgetter("x")),
            # partial(TransformAction, f=operator.itemgetter("y_pred"),
            partial(TransformReward, f=operator.itemgetter("y")),
            RemoveInfoWrapper,
        ]
        self.extra_valid_wrappers: List[Callable[[gym.Env], gym.Env]] = [
            partial(TransformObservation, f=operator.itemgetter("x")),
            partial(TransformReward, f=operator.itemgetter("y")),
            RemoveInfoWrapper,
        ]
        # Number of timesteps to train on for each task.
        self.total_timesteps_per_task: int = 0

        self.train_env: gym.Env = None
        self.valid_env: gym.Env = None

    def configure(self, setting: ContinualRLSetting):
        # Delete the model, if present.
        self.model = None
        # For now, we don't batch the space because stablebaselines3 will add an
        # additional batch dimension if we do.
        # TODO: Still need to debug the batching stuff with stablebaselines,
        # some methods support it, some don't, and it doesn't recognize
        # VectorEnvs from gym.
        setting.batch_size = None

        # BUG: Need to fix an issue when using the CnnPolicy and Atary envs, the
        # input shape isn't what they expect (only 2 channels instead of three
        # apparently.)
        # from sequoia.common.transforms import Transforms
        # NOTE: Important to not use any transforms, since the SB3 methods want to get
        # the 'raw' np.uint8 image as an input.
        transforms = [
            # Transforms.to_tensor,
            # Transforms.three_channels,
            # Transforms.channels_first_if_needed,
        ]
        setting.transforms = transforms
        setting.train_transforms = transforms
        setting.val_transforms = transforms
        setting.test_transforms = transforms

        if self.hparams.policy is None:
            if is_image(setting.observation_space.x):
                self.hparams.policy = "CnnPolicy"
            else:
                self.hparams.policy = "MlpPolicy"

        logger.debug(f"Will use {self.hparams.policy} as the policy.")
        # TODO: Double check that some settings might not impose a limit on
        # number of training steps per environment (e.g. task-incremental RL?)
        if setting.steps_per_phase:
            # if self.train_steps_per_task > setting.steps_per_phase:
            #     warnings.warn(
            #         RuntimeWarning(
            #             f"Can't train for the requested {self.train_steps_per_task} "
            #             f"steps, since we're (currently) only allowed a maximum of "
            #             f"{setting.steps_per_phase} steps.)"
            #         )
            #     )
            # Use as many training steps as possible.
            self.train_steps_per_task = setting.steps_per_phase - 1
        # Otherwise, we can train basically as long as we want on each task.

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm:
        """Create a Model given the training and validation environments."""
        model_kwargs = self.hparams.to_dict()
        assert "clear_buffers_between_tasks" not in model_kwargs
        return self.Model(env=train_env, **model_kwargs)

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        # Remove the extra information that the Setting gives us.
        for wrapper in self.extra_train_wrappers:
            train_env = wrapper(train_env)

        for wrapper in self.extra_valid_wrappers:
            valid_env = wrapper(valid_env)

        if self.model is None:
            self.model = self.create_model(train_env, valid_env)
        else:
            # TODO: "Adapt"/re-train the model on the new environment.
            # BUG: In the MT10 benchmark, the last entry in the observation space is
            # very slightly different, which prevents us from doing this:
            """
            >>> env.observation_space.low
            array([-0.525 ,  0.348 , -0.0525, -1.    ,    -inf,    -inf,    -inf,
                    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,
                    -inf,    -inf,    -inf,    -inf, -0.525 ,  0.348 , -0.0525,
                    -1.,    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,
                    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,
                    -inf, -0.1   ,  0.8   ,  0.01  ], dtype=float32)
            >>> observation_space.low
            array([-0.525 ,  0.348 , -0.0525, -1.    ,    -inf,    -inf,    -inf,
                    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,
                    -inf,    -inf,    -inf,    -inf, -0.525 ,  0.348 , -0.0525,
                    -1.,    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,
                    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,    -inf,
                    -inf, -0.1   ,  0.8   ,  0.05  ], dtype=float32)
            """
            if self.train_env is not None:
                # BUG: MT10 has *slightly* different values in 'low' between tasks!
                if (
                    isinstance(train_env.observation_space, spaces.Box)
                    and train_env.observation_space.shape[-1] == 39
                ):
                    train_env.observation_space = self.train_env.observation_space
            self.model.set_env(train_env)
        self.train_env = train_env
        self.valid_env = valid_env

        # Decide how many steps to train on.
        total_timesteps = self.train_steps_per_task
        # TODO: Get the max number of steps directly from the env, rather than from the
        # setting's fields.
        logger.info(f"Starting training, for a maximum of {total_timesteps} steps.")
        # todo: Customize the parametrers of the model and/or of this "learn"
        # method if needed.
        self.model = self.model.learn(
            # The total number of samples (env steps) to train on
            total_timesteps=total_timesteps,
            eval_env=valid_env,
            callback=self.callback,
            log_interval=self.log_interval,
            tb_log_name=self.tb_log_name,
            eval_freq=self.eval_freq,
            n_eval_episodes=self.n_eval_episodes,
            eval_log_path=self.eval_log_path,
            # whether or not to reset the current timestep number (used in logging)
            reset_num_timesteps=True,
        )

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        obs = observations.x
        predictions = self.model.predict(obs)
        action, _ = predictions
        assert action in action_space, (observations, action, action_space)
        return action

    def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        return {
            "algo_hparams": self.hparams.get_orion_space(),
        }

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        # Here we overwrite the corresponding attributes with the new suggested values
        # leaving other fields unchanged.
        # NOTE: These new hyper-paramers will be used in the next run in the sweep,
        # since each call to `configure` will create a new Model.
        self.hparams = self.hparams.replace(**new_hparams["algo_hparams"])

    def setup_wandb(self, run: Run) -> None:
        """Called by the Setting when using Weights & Biases, after `wandb.init`.

        This method is here to provide Methods with the opportunity to log some of their
        configuration options or hyper-parameters to wandb.

        NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
        this point.

        Parameters
        ----------
        run : wandb.Run
            Current wandb Run.
        """
        run.config["hparams"] = self.hparams.to_dict()

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        if self.hparams.clear_buffers_between_tasks:
            self.clear_buffers()

    def clear_buffers(self):
        """Clears out the experience buffer of the Policy."""
        # I think that's the right way to do it.. not sure.
        # assert False, self.model.replay_buffer.pos
        if self.model:
            # TODO: These are really interesting methods!
            # self.model.save_replay_buffer
            # self.model.load_replay_buffer

            self.model.replay_buffer.reset()


# We do this just to prevent errors when trying to decode the hparams class above, and
# also to silence the related warnings from simple-parsing's decoding.py module.

register_decoding_fn(Type[BasePolicy], lambda v: v)
register_decoding_fn(Callable, lambda v: v)


================================================
FILE: sequoia/methods/stable_baselines3_methods/base_test.py
================================================
from inspect import Parameter, Signature, getsourcefile, signature
from typing import ClassVar, Dict, Type

import pytest
from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm
from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm

from sequoia.common.config import Config
from sequoia.conftest import monsterkong_required
from sequoia.methods.method_test import MethodTests
from sequoia.settings.base import Results
from sequoia.settings.rl import DiscreteTaskAgnosticRLSetting, IncrementalRLSetting, RLSetting

from .base import BaseAlgorithm, StableBaselines3Method

# @pytest.mark.parametrize(
#     "MethodType, AlgoType",
#     [
#         (OnPolicyMethod, OnPolicyAlgorithm),
#         (OffPolicyMethod, OffPolicyAlgorithm),
#         (A2CMethod, A2C),
#         (DDPGMethod, DDPG),
#         (PPOMethod, PPO),
#         (DQNMethod, DQN),
#         (TD3Method, TD3),
#         (SACMethod, SAC),
#     ],
# )


class StableBaselines3MethodTests(MethodTests):
    Method: ClassVar[Type[StableBaselines3Method]] = StableBaselines3Method
    Model: ClassVar[Type[BaseAlgorithm]]
    SB3_Algo: ClassVar[Type[BaseAlgorithm]]
    debug_kwargs: ClassVar[Dict] = {}

    @pytest.mark.parametrize("clear_buffers", [False, True])
    def test_clear_buffers_between_tasks(self, clear_buffers: bool, config: Config):
        setting_kwargs = dict(
            nb_tasks=2,
            train_steps_per_task=1_000,
            test_steps_per_task=1_000,
            config=config,
        )
        setting_kwargs.update(self.setting_kwargs)
        setting = DiscreteTaskAgnosticRLSetting(**setting_kwargs)
        setting.setup()
        assert setting.train_max_steps == 2_000
        assert setting.test_max_steps == 2_000
        method = self.Method(hparams=self.Model.HParams(clear_buffers_between_tasks=clear_buffers))
        method.configure(setting)
        method.fit(
            train_env=setting.train_dataloader(),
            valid_env=setting.val_dataloader(),
        )
        assert method.hparams.clear_buffers_between_tasks == clear_buffers

        # TODO: Not clear how to check the length of the replay buffer!
        length_before_task_switch = get_current_length_of_replay_buffer(method.model)

        method.on_task_switch(task_id=1)

        if clear_buffers:
            assert get_current_length_of_replay_buffer(method.model) == 0
        else:
            assert get_current_length_of_replay_buffer(method.model) == length_before_task_switch

    def test_hparams_have_same_defaults_as_in_sb3(
        self,
    ):
        hparams = self.Model.HParams()
        AlgoType = [
            cls for cls in self.Model.mro() if cls.__module__.startswith("stable_baselines3")
        ][0]
        sig: Signature = signature(AlgoType.__init__)

        for attr_name, value_in_hparams in hparams.to_dict().items():
            params_names = list(sig.parameters.keys())
            assert attr_name in params_names, f"Hparams has extra field {attr_name}"
            algo_constructor_parameter = sig.parameters[attr_name]
            sb3_default = algo_constructor_parameter.default
            if sb3_default is Parameter.empty:
                continue
            if attr_name in "verbose":
                continue  # ignore the default value of the 'verbose' param which we change.

            if (
                attr_name == "train_freq"
                and isinstance(sb3_default, tuple)
                and len(sb3_default) == 2
            ):
                # Convert the default of (1, "steps") to 1, since that's the format we use.
                if sb3_default[1] == "step":
                    sb3_default = sb3_default[0]
                if isinstance(value_in_hparams, list):
                    value_in_hparams = tuple(value_in_hparams)

            assert value_in_hparams == sb3_default, (
                f"{self.Method.__name__} in Sequoia has different default value for "
                f"hyper-parameter '{attr_name}' than in SB3: \n"
                f"\t{value_in_hparams} != {sb3_default}\n"
                f"Path to sequoia implementation: {getsourcefile(self.Method)}\n"
                f"Path to SB3 implementation: {getsourcefile(AlgoType)}\n"
            )

    @classmethod
    @pytest.fixture
    def method(cls, config: Config) -> StableBaselines3Method:
        """Fixture that returns the Method instance to use when testing/debugging."""
        return cls.Method(**cls.debug_kwargs)

    def validate_results(
        self,
        setting: RLSetting,
        method: StableBaselines3Method,
        results: RLSetting.Results,
    ) -> None:
        assert results
        assert results.objective
        # TODO: Set some 'reasonable' bounds on the performance here, depending on the
        # setting/dataset.

    def test_debug(self, method: StableBaselines3Method, setting: RLSetting, config: Config):
        results: Results = setting.apply(method, config=config)
        assert results.objective is not None
        print(results.summary())
        self.validate_results(setting=setting, method=method, results=results)


class DiscreteActionSpaceMethodTests(StableBaselines3MethodTests):
    debug_kwargs: ClassVar[Dict] = {}
    expected_debug_mean_episode_reward: ClassVar[float] = 135
    setting_kwargs: ClassVar[str] = {"dataset": "CartPole-v0"}

    @pytest.mark.timeout(120)
    @monsterkong_required
    def test_monsterkong(self):
        method = self.Method(**self.debug_kwargs)
        setting = IncrementalRLSetting(
            dataset="monsterkong",
            nb_tasks=2,
            train_steps_per_task=1_000,
            test_steps_per_task=1_000,
        )
        results: IncrementalRLSetting.Results = setting.apply(method, config=Config(debug=True))
        print(results.summary())


from functools import singledispatch

from stable_baselines3.common.buffers import RolloutBuffer


@singledispatch
def get_current_length_of_replay_buffer(algo: BaseAlgorithm) -> int:
    """Returns the current length of the replay buffer of the given Algorithm."""
    raise NotImplementedError(algo)


@get_current_length_of_replay_buffer.register
def _(algo: OffPolicyAlgorithm):
    return algo.replay_buffer.pos


@get_current_length_of_replay_buffer.register
def _(algo: OnPolicyAlgorithm):
    rollout_buffer: RolloutBuffer
    return algo.rollout_buffer.pos


class ContinuousActionSpaceMethodTests(StableBaselines3MethodTests):
    setting_kwargs: ClassVar[str] = {"dataset": "MountainCarContinuous-v0"}


================================================
FILE: sequoia/methods/stable_baselines3_methods/ddpg.py
================================================
""" Method that uses the DDPG model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union

import gym
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.common.off_policy_algorithm import TrainFreq
from stable_baselines3.ddpg import DDPG

from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger

from .off_policy_method import OffPolicyMethod, OffPolicyModel

logger = get_logger(__name__)


class DDPGModel(DDPG, OffPolicyModel):
    """Customized version of the DDPG model from stable-baselines-3."""

    @dataclass
    class HParams(OffPolicyModel.HParams):
        """Hyper-parameters of the DDPG Model."""

        # TODO: Add hparams specific to DDPG here.
        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3)

        # The verbosity level: 0 none, 1 training information, 2 debug
        verbose: int = 0

        train_freq: TrainFreq = TrainFreq(frequency=1, unit="episode")

        # Minibatch size for each gradient update
        batch_size: int = 100

        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = -1
        # gradient_steps: int = categorical(1, -1, default=-1)


@register_method
@dataclass
class DDPGMethod(OffPolicyMethod):
    """Method that uses the DDPG model from stable-baselines3."""

    Model: ClassVar[Type[DDPGModel]] = DDPGModel

    # Hyper-parameters of the DDPG model.
    hparams: DDPGModel.HParams = mutable_field(DDPGModel.HParams)

    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 2_048.0

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DDPGModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)


if __name__ == "__main__":
    results = DDPGMethod.main()
    print(results)


================================================
FILE: sequoia/methods/stable_baselines3_methods/ddpg_test.py
================================================
from typing import ClassVar, Type

import pytest

from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import ContinuousActionSpaceMethodTests
from .ddpg import DDPGMethod, DDPGModel


@pytest.mark.timeout(60)
class TestDDPG(ContinuousActionSpaceMethodTests):
    Method: ClassVar[Type[StableBaselines3Method]] = DDPGMethod
    Model: ClassVar[Type[BaseAlgorithm]] = DDPGModel


================================================
FILE: sequoia/methods/stable_baselines3_methods/dqn.py
================================================
""" Method that uses the DQN model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union

import gym
from gym import spaces
from simple_parsing import mutable_field
from simple_parsing.helpers.hparams import log_uniform, uniform
from stable_baselines3.dqn import DQN

from sequoia.common.hparams import categorical
from sequoia.common.transforms import ChannelsFirst
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger

from .off_policy_method import OffPolicyMethod, OffPolicyModel

logger = get_logger(__name__)


class DQNModel(DQN, OffPolicyModel):
    """Customized version of the DQN model from stable-baselines-3."""

    @dataclass
    class HParams(OffPolicyModel.HParams):
        """Hyper-parameters of the DQN model from `stable_baselines3`.

        The command-line arguments for these are created with simple-parsing.
        """

        # ------------------
        # overwritten hparams
        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4)
        # size of the replay buffer
        buffer_size: int = uniform(100_000, 10_000_000, default=1_000_000)
        # --------------------

        # How many steps of the model to collect transitions for before learning
        # starts.
        learning_starts: int = 50_000

        # Minibatch size for each gradient update
        batch_size: int = 32

        # Update the model every ``train_freq`` steps. Set to `-1` to disable.
        train_freq: int = 4
        # train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=4)

        # The soft update coefficient ("Polyak update", between 0 and 1) default
        # 1 for hard update
        tau: float = 1.0
        # tau: float = uniform(0., 1., default=1.0)
        # Update the target network every ``target_update_interval`` environment
        # steps.
        target_update_interval: int = categorical(1, 10, 100, 1_000, 10_000, default=10_000)
        # Fraction of entire training period over which the exploration rate is
        # reduced.
        exploration_fraction: float = 0.1
        # exploration_fraction: float = uniform(0.05, 0.3, default=0.1)
        # Initial value of random action probability.
        exploration_initial_eps: float = 1.0
        # exploration_initial_eps: float = uniform(0.5, 1.0, default=1.0)
        # final value of random action probability.
        exploration_final_eps: float = 0.05
        # exploration_final_eps: float = uniform(0, 0.1, default=0.05)
        # The maximum value for the gradient clipping.
        max_grad_norm: float = 10
        # max_grad_norm: float = uniform(1, 100, default=10)

    def train(self, gradient_steps: int, batch_size: int = 100) -> None:
        super().train(gradient_steps, batch_size=batch_size)


@register_method
@dataclass
class DQNMethod(OffPolicyMethod):
    """Method that uses a DQN model from the stable-baselines3 package."""

    Model: ClassVar[Type[DQNModel]] = DQNModel

    # Hyper-parameters of the DQN model.
    hparams: DQNModel.HParams = mutable_field(DQNModel.HParams)

    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 1_024 * 10.0

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)
        # NOTE: Need to change some attributes depending on the maximal number of steps
        # in the environment allowed in the given Setting.
        if setting.steps_per_phase:
            ten_percent_of_step_budget = setting.steps_per_phase // 10
            if self.hparams.target_update_interval > ten_percent_of_step_budget:
                # Same for the 'update target network' interval.
                self.hparams.target_update_interval = ten_percent_of_step_budget // 2
                logger.info(
                    f"Reducing the target network update interval to "
                    f"{self.hparams.target_update_interval}, because of the limit on "
                    f"training steps imposed by the Setting."
                )

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DQNModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        obs = observations.x
        # Temp fix for monsterkong and DQN:
        if obs.shape == (64, 64, 3):
            obs = ChannelsFirst.apply(obs)
        predictions = self.model.predict(obs)
        action, _ = predictions
        assert action in action_space, (observations, action, action_space)
        return action

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)


if __name__ == "__main__":
    results = DQNMethod.main()
    print(results)


================================================
FILE: sequoia/methods/stable_baselines3_methods/dqn_test.py
================================================
from typing import ClassVar, Dict, Type

import numpy as np
import pytest
from gym import spaces

from sequoia.common.config import Config
from sequoia.common.spaces import Image
from sequoia.settings.rl import IncrementalRLSetting

from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import DiscreteActionSpaceMethodTests
from .dqn import DQNMethod, DQNModel
from .off_policy_method_test import OffPolicyMethodTests


class TestDQN(DiscreteActionSpaceMethodTests, OffPolicyMethodTests):
    Method: ClassVar[Type[StableBaselines3Method]] = DQNMethod
    Model: ClassVar[Type[BaseAlgorithm]] = DQNModel
    debug_kwargs: ClassVar[Dict] = {}

    # TODO: Maybe this is because of the buffer isn't filled up enough with the short
    # number of allowed steps?
    @pytest.mark.xfail(reason="DQN really sucks on cartpole?")
    def test_classic_control_state(self, config: Config):
        super().test_classic_control_state(config=config)

    @pytest.mark.xfail(reason="DQN really sucks on cartpole?")
    def test_incremental_classic_control_state(self, config: Config):
        super().test_incremental_classic_control_state(config=config)

    def test_dqn_monsterkong_adds_channel_first_transform(self):
        method = self.Method(**self.debug_kwargs)
        setting = IncrementalRLSetting(
            dataset="monsterkong",
            nb_tasks=2,
            train_steps_per_task=1_000,
            test_steps_per_task=1_000,
        )
        assert setting.train_max_steps == 2_000
        assert setting.test_max_steps == 2_000
        assert setting.nb_tasks == 2
        assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
        assert setting.observation_space.task_labels.n == 2
        # assert setting.observation_space == TypedDictSpace(
        #     spaces={
        #         "x": Image(0, 255, shape=(64, 64, 3), dtype=np.uint8),
        #         "task_labels": Sparse(spaces.Discrete(2), sparsity=0.5),
        #         "done": Sparse(spaces.Box(False, True, (), dtype=np.bool), sparsity=1),
        #     },
        #     dtype=setting.Observations,
        # )
        assert setting.observation_space.dtype is setting.Observations
        assert setting.action_space == spaces.Discrete(6)  # monsterkong has 6 actions.

        # (Before the method gets to change the Setting):
        # By default the setting gives the same shape of obs as the underlying env.
        for env_method in [
            setting.train_dataloader,
            setting.val_dataloader,
            setting.test_dataloader,
        ]:
            print(f"Testing method {env_method.__name__}")
            with env_method() as env:
                reset_obs = env.reset()
                # TODO: Fix this so the 'x' space actually gets tensor support.
                # assert reset_obs in env.observation_space
                assert reset_obs.numpy() in env.observation_space
                assert reset_obs.x.shape == (64, 64, 3)

        # Let the Method configure itself on the Setting:
        method.configure(setting)

        # (After the method gets to change the Setting):

        for env_method in [
            setting.train_dataloader,
            setting.val_dataloader,
            setting.test_dataloader,
        ]:
            with env_method() as env:
                reset_obs = env.reset()
                # Fix this numpy bug.
                assert reset_obs.numpy() in env.observation_space
                assert reset_obs.x.shape == (64, 64, 3)


================================================
FILE: sequoia/methods/stable_baselines3_methods/off_policy_method.py
================================================
""" Base class used to not duplicate the tweaks made all the off-policy algos from SB3.
"""
import math
import warnings
from abc import ABC
from dataclasses import dataclass
from typing import Any, Callable, ClassVar, Optional, Type, Union

import gym
from gym import spaces
from gym.spaces.utils import flatten_space
from simple_parsing import mutable_field
from simple_parsing.helpers.serialization import register_decoding_fn
from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm, TrainFreq

from sequoia.common.hparams import log_uniform, uniform
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger

from .base import SB3BaseHParams, StableBaselines3Method

logger = get_logger(__name__)


def decode_trainfreq(v: Any):
    if isinstance(v, list) and len(v) == 2:
        return TrainFreq(v[0], v[1])
    return v


register_decoding_fn(TrainFreq, decode_trainfreq)


class OffPolicyModel(OffPolicyAlgorithm, ABC):
    """Tweaked version of the OffPolicyAlgorithm from SB3."""

    @dataclass
    class HParams(SB3BaseHParams):
        """Hyper-parameters common to all off-policy algos from SB3."""

        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4)
        # size of the replay buffer
        buffer_size: int = uniform(100, 10_000_000, default=1_000_000)

        # How many steps of the model to collect transitions for before learning
        # starts.
        learning_starts: int = 100

        # Minibatch size for each gradient update
        batch_size: int = 256
        # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32)

        # The soft update coefficient ("Polyak update", between 0 and 1) default
        # 1 for hard update
        tau: float = 0.005
        # tau: float = uniform(0., 1., default=1.0)

        # The discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Update the model every ``train_freq`` steps. Set to `-1` to disable.
        train_freq: int = 1
        # train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=10)

        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = 1
        # gradient_steps: int = categorical(1, -1, default=1)

        # Enable a memory efficient variant of the replay buffer at a cost of
        # more complexity.
        # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
        optimize_memory_usage: bool = False

        # Whether to create a second environment that will be used for
        # evaluating the agent periodically. (Only available when passing string
        # for the environment)
        create_eval_env: bool = False

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1


@dataclass
class OffPolicyMethod(StableBaselines3Method, ABC):
    """ABC for a Method that uses an off-policy Algorithm from SB3."""

    # Type of model to use. This has to be overwritten in a subclass.
    Model: ClassVar[Type[OffPolicyModel]] = OffPolicyModel
    # Hyper-parameters of the DDPG model.
    hparams: OffPolicyModel.HParams = mutable_field(OffPolicyModel.HParams)
    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 2_048.0

    def __post_init__(self):
        super().__post_init__()
        self.model: OffPolicyAlgorithm

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)
        # The default value for the buffer size in the DQN model is WAY too
        # large, so we re-size it depending on the size of the observations.
        # NOTE: (issue #156) Only consider the images, not the task labels for these
        # buffer size calculations (since the task labels might be None and have the
        # np.object dtype).
        x_space = setting.observation_space.x
        flattened_observation_space = flatten_space(x_space)
        observation_size_bytes = flattened_observation_space.sample().nbytes

        # IF there are more than a few dimensions per observation, then we
        # should probably reduce the size of the replay buffer according to
        # the size of the observations.
        max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024
        max_buffer_length = max_buffer_size_bytes // observation_size_bytes

        if max_buffer_length == 0:
            raise RuntimeError(
                f"Couldn't even fit a single observation in the buffer, "
                f"given the  specified max_buffer_size_megabytes "
                f"({self.max_buffer_size_megabytes}) and the size of a "
                f"single observation ({observation_size_bytes} bytes)!"
            )

        if self.hparams.buffer_size > max_buffer_length:
            calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size
            calculated_size_gb = calculated_size_bytes / 1024**3
            warnings.warn(
                RuntimeWarning(
                    f"The selected buffer size ({self.hparams.buffer_size} is "
                    f"too large! (It would take roughly around "
                    f"{calculated_size_gb:.3f}Gb to hold  many observations alone! "
                    f"The buffer size will be capped at {max_buffer_length} "
                    f"entries."
                )
            )

            self.hparams.buffer_size = int(max_buffer_length)

        # NOTE: Need to change some attributes depending on the maximal number of steps
        # in the environment allowed in the given Setting.
        if setting.train_max_steps:
            logger.info(
                f"Total training steps are limited to {setting.train_steps_per_task} "
                f"steps per task, {setting.train_max_steps} steps in total."
            )
            ten_percent_of_step_budget = setting.steps_per_phase // 10

            if self.hparams.buffer_size > ten_percent_of_step_budget:
                warnings.warn(
                    RuntimeWarning("Reducing max buffer size to ten percent of the step budget.")
                )
                self.hparams.buffer_size = ten_percent_of_step_budget

            if self.hparams.learning_starts > ten_percent_of_step_budget:
                logger.info(
                    f"The model was originally going to use the first "
                    f"{self.hparams.learning_starts} steps for pure random "
                    f"exploration, but the setting has a max number of steps set to "
                    f"{setting.train_max_steps}, therefore we will limit the number of "
                    f"exploration steps to 10% of that 'step budget' = "
                    f"{ten_percent_of_step_budget} steps."
                )
                self.hparams.learning_starts = ten_percent_of_step_budget
                if self.hparams.train_freq != -1 and isinstance(self.hparams.train_freq, int):
                    # Update the model at least 2 times during each task, and at most
                    # once per step.
                    self.hparams.train_freq = min(
                        self.hparams.train_freq,
                        int(0.5 * ten_percent_of_step_budget),
                    )
                    self.hparams.train_freq = max(self.hparams.train_freq, 1)

                logger.info(f"Training frequency: {self.hparams.train_freq}")

        logger.info(f"Will use a Replay buffer of size {self.hparams.buffer_size}.")

        if setting.steps_per_phase:
            if not isinstance(self.hparams.train_freq, int):
                if self.hparams.train_freq[1] == "step":
                    self.hparams.train_freq = self.hparams.train_freq[0]
                else:
                    assert self.hparams.train_freq[1] == "episode"

                    # Use some value based of the maximum episode length if available,
                    # else use a "reasonable" default value.
                    # TODO: Double-check that this makes sense.
                    if setting.max_episode_steps:
                        self.hparams.train_freq = setting.max_episode_steps
                    else:
                        self.hparams.train_freq = 10

                    warnings.warn(
                        RuntimeWarning(
                            f"Need the training frequency units to be steps for now! "
                            f"(Train freq has been changed to every "
                            f"{self.hparams.train_freq} steps)."
                        )
                    )

            # NOTE: We limit the number of training steps per task, such that we never
            # attempt to fill the buffer using more samples than the environment allows.
            if self.hparams.train_freq > setting.steps_per_phase:
                self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase)
                logger.info(
                    f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}"
                )

            self.train_steps_per_task = min(
                self.train_steps_per_task,
                setting.steps_per_phase - self.hparams.train_freq - 1,
            )
            logger.info(f"Limitting training steps per task to {self.train_steps_per_task}")

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OffPolicyModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)

    def clear_buffers(self):
        """Clears out the experience buffer of the Policy."""
        # I think that's the right way to do it.. not sure.
        if self.model:
            # TODO: These are really interesting methods!
            # self.model.save_replay_buffer
            # self.model.load_replay_buffer
            self.model.replay_buffer.reset()


================================================
FILE: sequoia/methods/stable_baselines3_methods/off_policy_method_test.py
================================================
from typing import ClassVar, Dict, Type

from .off_policy_method import OffPolicyAlgorithm, OffPolicyMethod


class OffPolicyMethodTests:
    Method: ClassVar[Type[OffPolicyMethod]]
    Model: ClassVar[Type[OffPolicyAlgorithm]]
    debug_dataset: ClassVar[str]
    debug_kwargs: ClassVar[Dict] = {}


================================================
FILE: sequoia/methods/stable_baselines3_methods/on_policy_method.py
================================================
""" Base class used to not duplicate the tweaks made all the on-policy algos from SB3.
"""
import math
import warnings
from abc import ABC
from dataclasses import dataclass
from typing import Callable, ClassVar, Dict, Mapping, Optional, Type, Union

import gym
import torch
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm

from sequoia.common.hparams import log_uniform, uniform
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger

from .base import SB3BaseHParams, StableBaselines3Method

logger = get_logger(__name__)


class OnPolicyModel(OnPolicyAlgorithm, ABC):
    """Tweaked version of the OnPolicyAlgorithm from SB3."""

    @dataclass
    class HParams(SB3BaseHParams):
        """Hyper-parameters common to all on-policy algos from SB3."""

        # learning rate for the optimizer, it can be a function of the current
        # progress remaining (from 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-3)
        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        # NOTE: Default value here is much lower than in PPO, which might indicate
        # that this A2C is more "on-policy"? (i.e. that it requires data to be super
        # "fresh")?
        n_steps: int = uniform(3, 64, default=5, discrete=True)
        # Discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
        # Equivalent to classic advantage when set to 1.
        gae_lambda: float = 1.0
        # gae_lambda: float = uniform(0.5, 1.0, default=1.0)

        # Entropy coefficient for the loss calculation
        ent_coef: float = 0.0
        # ent_coef: float = uniform(0.0, 1.0, default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = 0.5
        # vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = 0.5
        # max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = False
        # use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE.
        # Default: -1 (only sample at the beginning of the rollout)
        sde_sample_freq: int = -1
        # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # The log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run.
        # Setting it to auto, the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"

        # :param _init_setup_model: Whether or not to build the network at the
        # creation of the instance
        # _init_setup_model: bool = True


@dataclass
class OnPolicyMethod(StableBaselines3Method, ABC):
    """Method that uses the A2C model from stable-baselines3."""

    Model: ClassVar[Type[OnPolicyModel]] = OnPolicyModel

    # Hyper-parameters of the model/algorithm.
    hparams: OnPolicyModel.HParams = mutable_field(OnPolicyModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)
        if setting.steps_per_phase:
            min_model_updates = 20
            if self.hparams.n_steps > setting.steps_per_phase // min_model_updates:
                # Set the number of steps per update so that there are *at least*
                # `min_model_updates` model updates during a single `fit` call.
                new_n_steps = math.ceil(setting.steps_per_phase / min_model_updates)
                warnings.warn(
                    RuntimeWarning(
                        f"Capping the number of steps per update to {new_n_steps}, in "
                        f"order to update the model at least {min_model_updates} "
                        f"times per phase (call to `fit`)."
                    )
                )
                assert new_n_steps > 1
                self.hparams.n_steps = new_n_steps
            # NOTE: We limit the number of trainign steps per task, such that we never
            # attempt to fill the buffer using more samples than the environment allows.
            self.train_steps_per_task = min(
                self.train_steps_per_task,
                setting.steps_per_phase - self.hparams.n_steps - 1,
            )
            logger.info(f"Limitting training steps per task to {self.train_steps_per_task}")

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OnPolicyModel:
        logger.info("Creating model with hparams: \n" + self.hparams.dumps_json(indent="\t"))
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)

    def clear_buffers(self):
        """Clears out the experience buffer of the Policy."""
        # I think that's the right way to do it.. not sure.
        if self.model:
            # TODO: These are really interesting methods!
            # self.model.save_replay_buffer
            # self.model.load_replay_buffer
            self.model.rollout_buffer.reset()

    def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
        search_space = super().get_search_space(setting)
        if isinstance(setting.action_space, spaces.Discrete):
            # From stable_baselines3/common/base_class.py", line 170:
            # > Generalized State-Dependent Exploration (gSDE) can only be used with
            #   continuous actions
            # Therefore we remove related entries in the search space, so they keep
            # their default values.
            search_space.pop("use_sde", None)
            search_space.pop("sde_sample_freq", None)
        return search_space


================================================
FILE: sequoia/methods/stable_baselines3_methods/policy_wrapper.py
================================================
from abc import ABC, abstractmethod
from functools import wraps
from typing import ClassVar, Dict, Generic, Optional, Type, TypeVar, Union

from stable_baselines3.a2c import A2C
from stable_baselines3.a2c.policies import ActorCriticPolicy
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.policies import BasePolicy
from torch import Tensor

from sequoia.utils import get_logger

logger = get_logger(__name__)

T = TypeVar("T")
Policy = TypeVar("Policy", bound=BasePolicy)
SB3Algo = TypeVar("SB3Algo", bound=BaseAlgorithm)

Wrapper = TypeVar("Wrapper", bound="PolicyWrapper")


class PolicyWrapper(BasePolicy, ABC, Generic[Policy]):
    """Base class for 'wrappers' to be applied to policies from SB3.

    This adds "hooks" into the `step()` and `zero_grad()` method of the Policy's
    optimizer.

    NOTE: Hasn't been worked on in a while, would not recommend using this unless you're
    very familiar with SB3 source code and there is no other way of doing what you want.
    """

    # Dictionary that stores the types of policies that have been 'wrapped' with
    # this mixin.
    _wrapped_classes: ClassVar[Dict[Type[T], Type[Union[T, "PolicyWrapper"]]]] = {}

    def __init__(self, *args, _already_initialized: bool = False, **kwargs):
        # When calling `EWCMixin.__init__(existing_policy)`, we don't want
        # to actually call the policy's __init__.
        if not _already_initialized:
            super().__init__(*args, **kwargs)

    @abstractmethod
    def get_loss(self: Policy) -> Union[float, Tensor]:
        """This will get called before the call to `policy.optimizer.step()`
        from within the `train` method of the algos from stable-baselines3.

        You can use this to return some kind of loss tensor to use.
        """

    def before_optimizer_step(self: Policy):
        """Called before executing `self.policy.optimizer.step()` in the training
        loop of the SB3 algos.
        """

    def after_zero_grad(self: Policy):
        """Called after `self.policy.optimizer.zero_grad()` in the training
        loop of the SB3 algos.
        """
        # Backpropagate the loss here, by default, so that any grad clipping
        # also affects the grads of the loss, for instance.
        wrapper_loss = self.get_loss()
        logger.debug(f"{type(self).__name__} loss: {wrapper_loss}")
        if isinstance(wrapper_loss, Tensor) and wrapper_loss.requires_grad:
            wrapper_loss.backward(retain_graph=True)

    @classmethod
    def wrap_policy(
        cls: Type[Wrapper], policy: Policy, **mixin_init_kwargs
    ) -> Union[Policy, Wrapper]:
        """IDEA: "Wrap" a Policy, so that every time its optimizer's `step()`
        method gets called, it actually first backpropagates an EWC loss.

        Parameters
        ----------
        policy : Policy
            [description]

        Returns
        -------
        Union[Policy, EWCMixin]
            [description]
        """
        assert isinstance(policy, BasePolicy)
        if not isinstance(policy, cls):
            # Dynamically change the class of this single instance to be a subclass
            # of its current class, with the addition of the EWCMixin base class.
            policy.__class__ = cls.wrap_policy_class(type(policy))
            # 'initialize' the existing object for this mixin type.
            cls.__init__(policy, _already_initialized=True, **mixin_init_kwargs)

        assert isinstance(policy, cls)
        optimizer = policy.optimizer or policy.optimizer_class
        if optimizer is None:
            raise NotImplementedError("Need to have an optimizer instance atm")

        # 'Replace' the `policy.optimizer.step` with a function that might first
        # backpropagates the loss.
        _step = optimizer.step
        # NOTE: Setting the policy's `optimizer` attribute to a new value will
        # will actually break this.
        @wraps(optimizer.step)
        def new_optimizer_step(*args, **kwargs):
            policy.before_optimizer_step()
            return _step(*args, **kwargs)

        optimizer.step = new_optimizer_step

        _zero_grad = optimizer.zero_grad

        @wraps(optimizer.zero_grad)
        def new_zero_grad(*args, **kwargs):
            _zero_grad(*args, **kwargs)
            policy.after_zero_grad()

        optimizer.zero_grad = new_zero_grad

        return policy

    @classmethod
    def wrap_policy_class(
        cls: Type[Wrapper], policy_type: Type[Policy]
    ) -> Type[Union[Policy, Wrapper]]:
        """Add the wrapper as a base class to a policy type from SB3."""
        assert issubclass(policy_type, BasePolicy)
        if issubclass(policy_type, cls):
            # It already has the mixin, so return the class unchanged.
            return policy_type

        # Save the results so we don't create two wrappers for the same class.
        if policy_type in cls._wrapped_classes:
            return cls._wrapped_classes[policy_type]

        class WrappedPolicy(policy_type, cls):  # type: ignore
            pass

        WrappedPolicy.__name__ = policy_type.__name__ + "With" + cls.__name__
        cls._wrapped_classes[policy_type] = WrappedPolicy
        return WrappedPolicy

    @classmethod
    def wrap_algorithm(cls: Type[Wrapper], algo: SB3Algo, **wrapper_kwargs) -> SB3Algo:
        """Wrap an existing algorithm's policy using this wrapper."""
        assert isinstance(algo, BaseAlgorithm)
        if not isinstance(algo.policy, cls):
            # Dynamically change the class of this single instance to be a subclass
            # of its current class, with the addition of the EWCMixin base class.
            if algo.policy is None:
                # We want to wrap the _setup_model so the policy gets wrapped.
                # raise NotImplementedError("TODO")
                _original_setup_model = algo._setup_model

                @wraps(algo._setup_model)
                def _wrapped_setup_model(*args, **kwargs) -> None:
                    _original_setup_model(*args, **kwargs)
                    assert isinstance(algo.policy, BasePolicy)
                    algo.policy = cls.wrap_policy(algo.policy, **wrapper_kwargs)

                algo._setup_model = _wrapped_setup_model
            else:
                algo.policy = cls.wrap_policy(algo.policy, **wrapper_kwargs)
        return algo

    @classmethod
    def wrap_algorithm_class(
        cls: Type[Wrapper], algo_type: Type[SB3Algo]
    ) -> Type[Union[SB3Algo, Wrapper]]:
        """Same idea, but wraps a class of algorithm, so that its policies are
        wrapped with this mixin.
        """
        if algo_type in cls._wrapped_classes:
            return cls._wrapped_classes[algo_type]

        class WrappedAlgo(algo_type):  # type: ignore
            def __init__(self, *args, **kwargs):
                # IDEA Extract the arguments that could be used for the wrapper?
                super().__init__(*args, **kwargs)
                self.policy: Union[BasePolicy, Wrapper]

            def _setup_model(self):
                super()._setup_model()
                # TODO: Figure out a way of passing the kwargs to the policy?
                # maybe using the 'policy_kwargs' argument to the constructor?
                self.policy = cls.wrap_policy(self.policy)

            # No need to change the train loop anymore!
            # def train(self) -> None:
            #     return super().train()

            # IDEA: Redirect any failing attribute lookups to the policy?
            def __getattr__(self, attr: str):
                try:
                    return super().__getattribute__(attr)
                except AttributeError as e:
                    if hasattr(self.policy, attr):
                        return getattr(self.policy, attr)
                    raise e

            # The above would remove the need for any of these:
            # def on_task_switch(self, task_id: Optional[int]):
            #     self.policy.on_task_switch(task_id)

            # def ewc_loss(self) -> Union[float, Tensor]:
            #     return self.policy.ewc_loss()

        WrappedAlgo.__name__ = algo_type.__name__ + "With" + cls.__name__

        cls._wrapped_classes[algo_type] = WrappedAlgo
        return WrappedAlgo


from stable_baselines3 import A2C


# Either 'manually', like this:
class A2CWithEWC(A2C):
    def __init__(self, *args, ewc_coefficient: float = 1.0, ewc_p_norm: int = 2, **kwargs):
        self.ewc_coefficient = ewc_coefficient
        self.ewc_p_norm = ewc_p_norm
        super().__init__(*args, **kwargs)
        self.policy: Union[ActorCriticPolicy, EWC]

    def _setup_model(self):
        super()._setup_model()
        # Just to show that the policy was just wrapped.
        self.policy = EWC._wrap_policy(
            self.policy,
            ewc_coefficient=self.ewc_coefficient,
            ewc_p_norm=self.ewc_p_norm,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        self.policy.on_task_switch(task_id)


## OR automatically, like this!
# A2CWithEWC = EWC._wrap_algorithm_class(A2C)
# DQNWithEWC = EWC._wrap_algorithm_class(DQN)
# PPOWithEWC = EWC._wrap_algorithm_class(PPO)
# DDPGWithEWC = EWC._wrap_algorithm_class(DDPG)
# SACWithEWC = EWC._wrap_algorithm_class(SAC)


================================================
FILE: sequoia/methods/stable_baselines3_methods/ppo.py
================================================
""" Method that uses the PPO model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Mapping, Optional, Type, Union

import gym
import torch
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.ppo import PPO

from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger

from .on_policy_method import OnPolicyMethod, OnPolicyModel

logger = get_logger(__name__)


class PPOModel(PPO, OnPolicyModel):
    """Proximal Policy Optimization algorithm (PPO) (clip version) - from SB3.

    Paper: https://arxiv.org/abs/1707.06347
    Code: The SB3 implementation borrows code from OpenAI Spinning Up
    (https://github.com/openai/spinningup/)
    https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and
    and Stable Baselines (PPO2 from https://github.com/hill-a/stable-baselines)

    Introduction to PPO: https://spinningup.openai.com/en/latest/algorithms/ppo.html
    """

    @dataclass
    class HParams(OnPolicyModel.HParams):
        """Hyper-parameters of the PPO Model."""

        # # The policy model to use (MlpPolicy, CnnPolicy, ...)
        # policy: Union[str, Type[ActorCriticPolicy]]

        # # The environment to learn from (if registered in Gym, can be str)
        # env: Union[GymEnv, str]

        # The learning rate, it can be a function of the current progress remaining
        # (from 1 to 0)
        learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)

        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        n_steps: int = log_uniform(32, 8192, default=2048, discrete=True)

        # Minibatch size
        batch_size: int = 64
        # batch_size: Optional[int] = categorical(16, 32, 64, 128, default=64)

        # Number of epoch when optimizing the surrogate loss
        n_epochs: int = 10

        # Discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator
        gae_lambda: float = 0.95
        # gae_lambda: float = uniform(0.8, 1.0, default=0.95)

        # Clipping parameter, it can be a function of the current progress remaining
        # (from 1 to 0).
        clip_range: float = 0.2
        # clip_range: float = uniform(0.05, 0.4, default=0.2)

        # Clipping parameter for the value function, it can be a function of the current
        # progress remaining (from 1 to 0). This is a parameter specific to the OpenAI
        # implementation. If None is passed (default), no clipping will be done on the
        # value function. IMPORTANT: this clipping depends on the reward scaling.
        clip_range_vf: Optional[float] = None

        # Entropy coefficient for the loss calculation
        ent_coef: float = 0.0
        # ent_coef: float = uniform(0., 1., default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = 0.5
        # vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = 0.5
        # max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = False
        # use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE Default: -1 (only
        # sample at the beginning of the rollout)
        sde_sample_freq: int = -1
        # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Limit the KL divergence between updates, because the clipping is not enough to
        # prevent large update see issue #213
        # (cf https://github.com/hill-a/stable-baselines/issues/213)
        # By default, there is no limit on the kl div.
        target_kl: Optional[float] = None

        # the log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run. Setting it to auto,
        # the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"

        # Whether or not to build the network at the creation of the instance
        # _init_setup_model: bool = True


@register_method
@dataclass
class PPOMethod(OnPolicyMethod):
    """Method that uses the PPO model from stable-baselines3."""

    Model: ClassVar[Type[PPOModel]] = PPOModel
    # Hyper-parameters of the PPO Model.
    hparams: PPOModel.HParams = mutable_field(PPOModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel:
        logger.info("Creating model with hparams: \n" + self.hparams.dumps_json(indent="\t"))
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)

    def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
        return super().get_search_space(setting)


if __name__ == "__main__":
    results = PPOMethod.main()
    print(results)


================================================
FILE: sequoia/methods/stable_baselines3_methods/ppo_test.py
================================================
from typing import ClassVar, Type

from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import DiscreteActionSpaceMethodTests
from .ppo import PPOMethod, PPOModel


class TestPPO(DiscreteActionSpaceMethodTests):
    Method: ClassVar[Type[StableBaselines3Method]] = PPOMethod
    Model: ClassVar[Type[BaseAlgorithm]] = PPOModel


================================================
FILE: sequoia/methods/stable_baselines3_methods/sac.py
================================================
""" Method that uses the SAC model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union

import gym
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.sac.sac import SAC

from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger

from .off_policy_method import OffPolicyMethod, OffPolicyModel

logger = get_logger(__name__)


class SACModel(SAC, OffPolicyModel):
    """Customized version of the SAC model from stable-baselines-3."""

    @dataclass
    class HParams(OffPolicyModel.HParams):
        """Hyper-parameters of the SAC Model."""

        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=3e-4)
        buffer_size: int = 1_000_000
        learning_starts: int = 100
        batch_size: int = 256
        tau: float = 0.005
        gamma: float = 0.99
        train_freq = 1
        gradient_steps: int = 1
        # action_noise: Optional[ActionNoise] = None
        optimize_memory_usage: bool = False
        ent_coef: Union[str, float] = "auto"
        target_update_interval: int = 1
        target_entropy: Union[str, float] = "auto"
        use_sde: bool = False
        sde_sample_freq: int = -1


@register_method
@dataclass
class SACMethod(OffPolicyMethod):
    """Method that uses the SAC model from stable-baselines3."""

    Model: ClassVar[Type[SACModel]] = SACModel

    # Hyper-parameters of the SAC model.
    hparams: SACModel.HParams = mutable_field(SACModel.HParams)

    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 2_048.0

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> SACModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)


if __name__ == "__main__":
    results = SACMethod.main()
    print(results)


================================================
FILE: sequoia/methods/stable_baselines3_methods/sac_test.py
================================================
from typing import ClassVar, Type

import pytest

from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.settings import Setting
from sequoia.settings.rl import ContinualRLSetting, IncrementalRLSetting, TaskIncrementalRLSetting

from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import ContinuousActionSpaceMethodTests
from .sac import SACMethod, SACModel


@slow
@pytest.mark.timeout(120)
class TestSAC(ContinuousActionSpaceMethodTests):
    Method: ClassVar[Type[StableBaselines3Method]] = SACMethod
    Model: ClassVar[Type[BaseAlgorithm]] = SACModel

    # TODO: Look into why SAC is so slow, there's probably a parameter which isn't being set
    # properly.
    @slow
    @pytest.mark.timeout(120)
    @pytest.mark.parametrize(
        "Setting", [ContinualRLSetting, IncrementalRLSetting, TaskIncrementalRLSetting]
    )
    @pytest.mark.parametrize("observe_state", [True, False])
    def test_continuous_mountaincar(self, Setting: Type[Setting], observe_state: bool):
        method = self.Method()
        setting = Setting(
            dataset="MountainCarContinuous-v0",
            nb_tasks=2,
            train_steps_per_task=1_000,
            test_steps_per_task=1_000,
        )
        results: ContinualRLSetting.Results = setting.apply(method, config=Config(debug=True))
        print(results.summary())


================================================
FILE: sequoia/methods/stable_baselines3_methods/td3.py
================================================
""" TODO: Implement and test DDPG. """
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union

import gym
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.common.off_policy_algorithm import TrainFreq
from stable_baselines3.td3 import TD3

from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger

from .off_policy_method import OffPolicyMethod, OffPolicyModel

logger = get_logger(__name__)


class TD3Model(TD3, OffPolicyModel):
    @dataclass
    class HParams(OffPolicyModel.HParams):
        """Hyper-parameters of the TD3 model."""

        # TODO: Add HParams specific to TD3 here, if any, and also check that the
        # default values are correct.

        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3)

        # Minibatch size for each gradient update
        batch_size: int = 100
        # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32)

        train_freq: TrainFreq = (1, "episode")

        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = -1
        # gradient_steps: int = categorical(1, -1, default=1)


@register_method
@dataclass
class TD3Method(OffPolicyMethod):
    """Method that uses the TD3 model from stable-baselines3."""

    Model: ClassVar[Type[TD3Model]] = TD3Model
    hparams: TD3Model.HParams = mutable_field(TD3Model.HParams)

    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 2_048.0

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> TD3Model:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)


if __name__ == "__main__":
    results = TD3Method.main()
    print(results)


================================================
FILE: sequoia/methods/stable_baselines3_methods/td3_test.py
================================================
from typing import ClassVar, Type

from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import ContinuousActionSpaceMethodTests
from .td3 import TD3Method, TD3Model


class TestTD3(ContinuousActionSpaceMethodTests):
    Method: ClassVar[Type[StableBaselines3Method]] = TD3Method
    Model: ClassVar[Type[BaseAlgorithm]] = TD3Model


================================================
FILE: sequoia/methods/trainer.py
================================================
""" 'Patch' for the Trainer of Pytorch Lightning so it can use gym environment as
dataloaders (via the GymDataLoader class of Sequoia).
"""
import os
from dataclasses import dataclass
from functools import singledispatch
from pathlib import Path
from typing import Any, Callable, Iterable, List, Optional, Union

import gym
import pytorch_lightning.trainer.connectors.data_connector
import pytorch_lightning.utilities.apply_func
import torch
from pytorch_lightning import Callback
from pytorch_lightning import Trainer as _Trainer
from pytorch_lightning.loggers import LightningLoggerBase
from pytorch_lightning.trainer.connectors.data_connector import DataConnector
from pytorch_lightning.trainer.supporters import CombinedLoader
from pytorch_lightning.utilities.apply_func import apply_to_collection
from simple_parsing import choice
from torch.utils.data import DataLoader

from sequoia.common import Batch
from sequoia.common.config import Config
from sequoia.common.gym_wrappers.utils import IterableWrapper, has_wrapper
from sequoia.common.hparams import HyperParameters, uniform
from sequoia.settings.rl.continual.environment import GymDataLoader
from sequoia.settings.sl import PassiveEnvironment
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable

logger = get_logger(__name__)


@dataclass
class TrainerConfig(HyperParameters, Parseable):
    """Configuration dataclass for a pytorch-lightning Trainer.

    See the docs for the Trainer from pytorch lightning for more info on the options.

    TODO: Pytorch Lightning already has a mechanism for adding argparse
    arguments for the Trainer.. It would be nice to find a way to use the 'native' way
    of adding arguments in PL in addition to using simple-parsing.
    """

    gpus: int = torch.cuda.device_count()
    overfit_batches: float = 0.0
    fast_dev_run: bool = False

    # Maximum number of epochs to train for.
    max_epochs: int = uniform(1, 100, default=10)

    # Number of nodes to use.
    num_nodes: int = 1
    accelerator: Optional[str] = None
    log_gpu_memory: bool = False

    val_check_interval: Union[int, float] = 1.0

    auto_scale_batch_size: Optional[str] = None
    auto_lr_find: bool = False
    # Floating point precision to use in the model. (See pl.Trainer)
    precision: int = choice(16, 32, default=32)

    default_root_dir: Path = Path(os.environ.get("RESULTS_DIR", os.getcwd() + "/results"))

    # How much of training dataset to check (floats = percent, int = num_batches)
    limit_train_batches: Union[int, float] = 1.0
    # How much of validation dataset to check (floats = percent, int = num_batches)
    limit_val_batches: Union[int, float] = 1.0
    # How much of test dataset to check (floats = percent, int = num_batches)
    limit_test_batches: Union[int, float] = 1.0

    # If ``True``, enable checkpointing.
    # It will configure a default ModelCheckpoint callback if there is no user-defined
    # ModelCheckpoint in the `callbacks`.
    checkpoint_callback: bool = True

    def make_trainer(
        self,
        config: Config,
        callbacks: Optional[List[Callback]] = None,
        loggers: Iterable[LightningLoggerBase] = None,
    ) -> "Trainer":
        """Create a Trainer object from the command-line args.
        Adds the given loggers and callbacks as well.
        """
        # FIXME: Trying to subclass the DataConnector to fix issues while iterating
        # over gym envs, that arise because of the _with_is_last() function from
        # lightning.
        import pytorch_lightning.trainer.trainer
        from pytorch_lightning.trainer.connectors.data_connector import DataConnector

        setattr(pytorch_lightning.trainer.trainer, "DataConnector", DataConnector)
        trainer = Trainer(
            logger=loggers,
            callbacks=callbacks,
            gpus=self.gpus,
            num_nodes=self.num_nodes,
            max_epochs=self.max_epochs,
            accelerator=self.accelerator,
            log_gpu_memory=self.log_gpu_memory,
            overfit_batches=self.overfit_batches,
            fast_dev_run=self.fast_dev_run,
            auto_scale_batch_size=self.auto_scale_batch_size,
            auto_lr_find=self.auto_lr_find,
            # TODO: Either move the log-dir-related stuff from Config to this
            # class, or figure out a way to pass the value from Config to this
            # function
            default_root_dir=self.default_root_dir,
            limit_train_batches=self.limit_train_batches,
            limit_val_batches=self.limit_val_batches,
            limit_test_batches=self.limit_train_batches,
            checkpoint_callback=self.checkpoint_callback,
            profiler=None,  # TODO: Seem to have an impact on the problem below.
        )
        return trainer


class Trainer(_Trainer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def fit(self, model, train_dataloader=None, val_dataloaders=None, datamodule=None):
        # TODO: Figure out what method to overwrite to fix the problem of accessing two
        # batches in a row in the environment. (with_is_last annoyance.)
        if isinstance(train_dataloader, gym.Env):
            if has_wrapper(train_dataloader, GymDataLoader):
                train_env = train_dataloader
                # raise NotImplementedError("TODO: Fix this.")
        return super().fit(
            model,
            train_dataloader=train_dataloader,
            val_dataloaders=val_dataloaders,
            datamodule=datamodule,
        )


# TODO: Debugging/fixing this buggy method from Pytorch-Lightning.


# def _apply_to_collection(
#     data: Any,
#     dtype: Union[type, tuple],
#     function: Callable,
#     *args,
#     wrong_dtype: Optional[Union[type, tuple]] = None,
#     **kwargs
# ) -> Any:


apply_to_collection = singledispatch(apply_to_collection)
setattr(pytorch_lightning.utilities.apply_func, "apply_to_collection", apply_to_collection)

# import pytorch_lightning.overrides.data_parallel
# setattr(pytorch_lightning.overrides.data_parallel, "apply_to_collection", apply_to_collection)


@apply_to_collection.register(Batch)
def _apply_to_batch(
    data: Batch,
    dtype: Union[type, tuple],
    function: Callable,
    *args,
    wrong_dtype: Optional[Union[type, tuple]] = None,
    **kwargs,
) -> Any:
    # assert False, f"YAY! {type(data)}"
    # logger.debug(f"{type(data)}, {dtype}, {function}, {args}, {wrong_dtype}, {kwargs}")
    return type(data)(
        **{
            k: apply_to_collection(v, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
            for k, v in data.items()
        }
    )


class ProfiledEnvironment(IterableWrapper, DataLoader):
    def __iter__(self):
        for i, obs in enumerate(super().__iter__()):
            # logger.debug(f"Step {i}, obs.done={obs.done}")
            done = obs.done
            if not isinstance(done, bool) or not done.shape:
                # TODO: When we have batch size of 1, or more generally in RL, do we
                # want one call to `trainer.fit` to last a given number of episodes ?
                # TODO: Look into the `max_steps` argument to Trainer.
                done = all(done)
            # done = done or self.is_closed()
            done = self.is_closed()
            yield i, (obs, done)


class PatchedDataConnector(DataConnector):
    def get_profiled_train_dataloader(self, train_dataloader: DataLoader):
        if isinstance(train_dataloader, CombinedLoader) and isinstance(
            train_dataloader.loaders, gym.Env
        ):
            env = train_dataloader.loaders
            # TODO: Replacing this 'CombinedLoader' on the Trainer with the env, since I
            # don't think we need it (not using multiple train dataloaders with PL atm.)
            self.trainer.train_dataloader = env
            if not isinstance(env.unwrapped, PassiveEnvironment):
                # Only really need to do this 'profile' thing for 'active' environments.
                return ProfiledEnvironment(env)
        else:
            # This gets called before each epoch, so we get here on the start of the
            # second training epoch.
            # TODO: Check that this isn't causing issues between tasks
            assert train_dataloader is self.trainer.train_dataloader

        profiled_dl = self.trainer.profiler.profile_iterable(
            enumerate(prefetch_iterator(train_dataloader)), "get_train_batch"
        )
        return profiled_dl


setattr(
    pytorch_lightning.trainer.connectors.data_connector,
    "DataConnector",
    PatchedDataConnector,
)
pytorch_lightning.trainer.connectors.data_connector.DataConnector = PatchedDataConnector


================================================
FILE: sequoia/methods.puml
================================================
@startuml methods

' !include gym.plantuml
' remove gym.spaces
' TODO: There must be a simpler way to only keep a single node, right?
' !include settings.puml
' remove settings.active
' remove settings.assumptions
' remove settings.passive
' remove SettingABC
' !include settings/base.puml

package methods {
    package base_method {
        class BaseMethod implements Method {
            + hparams: BaseModel.HParams
            + config: Config
            + trainer_options: TrainerConfig
            + trainer: Trainer
        }
    }
    package aux_tasks {
        package auxiliary_task {
            abstract class AuxiliaryTask {
                + options: AuxiliaryTask.Options
                + get_loss(ForwardPass, Actions, Rewards): Loss
                
            }
            abstract class AuxiliaryTask.Options {
                + coefficient: float
            }
            AuxiliaryTask *-- AuxiliaryTask.Options
        }
    }
    !include ./methods/models.puml
}
@enduml


================================================
FILE: sequoia/sequoia.puml
================================================
@startuml sequoia
package sequoia {
    !include common.puml
    !include settings.puml
    !include methods.puml
}
@enduml

================================================
FILE: sequoia/settings/README.md
================================================
# Sequoia - Settings

### (WIP) Adding a new Setting:

Prerequisites:


- Take a quick look at the `dataclasses` example
- Take a quick look at [simple_parsing](https://github.com/lebrice/SimpleParsing) (A python package I've created) which we use to generate the command-line arguments for the Settings.


<!-- MAKETREE -->


## Available Settings:


- ## [Setting](sequoia/settings/base/setting.py)

  Base class for all research settings in ML: Root node of the tree.

  A 'setting' is loosely defined here as a learning problem with a specific
  set of assumptions, restrictions, and an evaluation procedure.

  For example, Reinforcement Learning is a type of Setting in which we assume
  that an Agent is able to observe an environment, take actions upon it, and
  receive rewards back from the environment. Some of the assumptions include
  that the reward is dependant on the action taken, and that the actions have
  an impact on the environment's state (and on the next observations the agent
  will receive). The evaluation procedure consists in trying to maximize the
  reward obtained from an environment over a given number of steps.

  This 'Setting' class should ideally represent the most general learning
  problem imaginable, with almost no assumptions about the data or evaluation
  procedure.

  This is a dataclass. Its attributes are can also be used as command-line
  arguments using `simple_parsing`.

  Abstract (required) methods:
  - **apply** Applies a given Method on this setting to produce Results.
  - **prepare_data** (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode).
  - **setup**  (things to do on every accelerator in distributed mode).
  - **train_dataloader** the training environment/dataloader.
  - **val_dataloader** the val environments/dataloader(s).
  - **test_dataloader** the test environments/dataloader(s).

  "Abstract"-ish (required) class attributes:
  - `Results`: The class of Results that are created when applying a Method on
    this setting.
  - `Observations`: The type of Observations that will be produced  in this
      setting.
  - `Actions`: The type of Actions that are expected from this setting.
  - `Rewards`: The type of Rewards that this setting will (potentially) return
    upon receiving an action from the method.


  - ## [RLSetting](sequoia/settings/rl/setting.py)

    LightningDataModule for an 'active' setting.

    This is to be the parent of settings like RL or maybe Active Learning.


    - ## [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)

      Reinforcement Learning Setting where the environment changes over time.

      This is an Active setting which uses gym environments as sources of data.
      These environments' attributes could change over time following a task
      schedule. An example of this could be that the gravity increases over time
      in cartpole, making the task progressively harder as the agent interacts with
      the environment.


      - ## [DiscreteTaskAgnosticRLSetting](sequoia/settings/rl/discrete/setting.py)

        Continual Reinforcement Learning Setting where there are clear task boundaries,
        but where the task information isn't available.


        - ## [IncrementalRLSetting](sequoia/settings/rl/incremental/setting.py)

          Continual RL setting in which:
          - Changes in the environment's context occur suddenly (same as in Discrete, Task-Agnostic RL)
          - Task boundary information (and task labels) are given at training time
          - Task boundary information is given at test time, but task identity is not.


          - ## [TaskIncrementalRLSetting](sequoia/settings/rl/task_incremental/setting.py)

            Continual RL setting with clear task boundaries and task labels.

            The task labels are given at both train and test time.


            - ## [MultiTaskRLSetting](sequoia/settings/rl/multi_task/setting.py)

              Reinforcement Learning setting where the environment alternates between a set
              of tasks sampled uniformly.

              Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled
              during training.


          - ## [TraditionalRLSetting](sequoia/settings/rl/traditional/setting.py)

            Your usual "Classical" Reinforcement Learning setting.

            Implemented as a MultiTaskRLSetting, but with a single task.


            - ## [MultiTaskRLSetting](sequoia/settings/rl/multi_task/setting.py)

              Reinforcement Learning setting where the environment alternates between a set
              of tasks sampled uniformly.

              Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled
              during training.


  - ## [SLSetting](sequoia/settings/sl/setting.py)

    Supervised Learning Setting.

    Core assuptions:
    - Current actions have no influence on future observations.
    - The environment gives back "dense feedback", (the 'reward' associated with all
      possible actions at each step, rather than a single action)

    For example, supervised learning is a Passive setting, since predicting a
    label has no effect on the reward you're given (the label) or on the next
    samples you observe.


    - ## [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)

      Continuous, Task-Agnostic, Continual Supervised Learning.

      This is *currently* the most "general" Supervised Continual Learning setting in
      Sequoia.

      - Data distribution changes smoothly over time.
      - Smooth transitions between "tasks"
      - No information about task boundaries or task identity (no task IDs)
      - Maximum of one 'epoch' through the environment.


      - ## [DiscreteTaskAgnosticSLSetting](sequoia/settings/sl/discrete/setting.py)

        Continual Supervised Learning Setting where there are clear task boundaries, but
        where the task information isn't available.


        - ## [IncrementalSLSetting](sequoia/settings/sl/incremental/setting.py)

          Supervised Setting where the data is a sequence of 'tasks'.

          This class is basically is the supervised version of an Incremental Setting


          The current task can be set at the `current_task_id` attribute.


          - ## [TaskIncrementalSLSetting](sequoia/settings/sl/task_incremental/setting.py)

            Setting where data arrives in a series of Tasks, and where the task
            labels are always available (both train and test time).


            - ## [MultiTaskSLSetting](sequoia/settings/sl/multi_task/setting.py)

              IID version of the Task-Incremental Setting, where the data is shuffled.

              Can be used to estimate the upper bound performance of Task-Incremental CL Methods.


          - ## [DomainIncrementalSLSetting](sequoia/settings/sl/domain_incremental/setting.py)

            Supervised CL Setting where the input domain shifts incrementally.

            Task labels and task boundaries are given at training time, but not at test-time.
            The crucial difference between the Domain-Incremental and Class-Incremental settings
            is that the action space is smaller in domain-incremental learning, as it is a
            `Discrete(n_classes_per_task)`, rather than the `Discrete(total_classes)` in
            Class-Incremental setting.

            For example: Create a classifier for odd vs even hand-written digits. It first be
            trained on digits 0 and 1, then digits 2 and 3, then digits 4 and 5, etc.
            At evaluation time, it will be evaluated on all digits


          - ## [TraditionalSLSetting](sequoia/settings/sl/traditional/setting.py)

            Your 'usual' supervised learning Setting, where the samples are i.i.d.

            This Setting is slightly different than the others, in that it can be recovered in
            *two* different ways:
            - As a variant of Task-Incremental learning, but where there is only one task;
            - As a variant of Domain-Incremental learning, but where there is only one task.


            - ## [MultiTaskSLSetting](sequoia/settings/sl/multi_task/setting.py)

              IID version of the Task-Incremental Setting, where the data is shuffled.

              Can be used to estimate the upper bound performance of Task-Incremental CL Methods.


================================================
FILE: sequoia/settings/__init__.py
================================================
"""
"""
import inspect
from typing import Any, Dict, Iterable, List, Set, Type

from .base.bases import Method, SettingABC
from .base.environment import Environment
from .base.objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .base.results import Results
from .base.setting import Setting, SettingType
from .rl import *
from .sl import *

# # all concrete settings:
# all_settings: List[Type[Setting]] = [
#     ClassIncrementalSetting,
#     DomainIncrementalSetting,
#     TaskIncrementalSLSetting,
#     TraditionalSLSetting,
#     MultiTaskSetting,
#     ContinualRLSetting,
#     IncrementalRLSetting,
#     TaskIncrementalRLSetting,
#     RLSetting,
# ]
# Or, get All the settings:
all_settings: Set[Type[SettingABC]] = set([Setting, *Setting.children()])
# FIXME: Remove this, just checking the inspect atm.:
# import inspect
# import pprint

# print(Setting.get_tree_string())
# exit()

# print(inspect.getclasstree(all_settings, unique=True))
# assert False
# assert False, all_settings


================================================
FILE: sequoia/settings/assumptions/__init__.py
================================================
""" WIP: Mixin-style classes that define 'traits'/'assumptions' about a Setting.

IDEA: This package could define things that are to be reused in both the RL and 
the CL branches, kindof like a horizontal slice accross the tree.

The reasoning behind this is that some methods might require task labels, but
apply on both sides of the tree.
An alternative to this could also be to allow Methods to target multiple
settings, but this could get weird pretty quick.
"""
from .incremental import IncrementalAssumption

# from .task_incremental import TaskIncrementalSLSetting


================================================
FILE: sequoia/settings/assumptions/assumptions.puml
================================================
@startuml assumptions


package assumptions {
    '  TODO: How to describe relationship between gym.Env and these other 
    ' assumptions about the env?
    ' abstract class Environment {

    ' }
    ' gym.Env --|> Environment

    package "assumptions about the environment" as supervision_assumptions {
        package "effect of future actions on the environment" as active_vs_passive
        {
            interface PossiblyActiveEnvironment <<Assumption>> {
                # Actions MAY influence future observations
            }
            abstract class ActiveEnvironment <<Assumption>> extends PossiblyActiveEnvironment {
                # Actions DO influence future observations
                --
                Examples:
                Playing tennis
            }
            abstract class PassiveEnvironment <<Assumption>> extends PossiblyActiveEnvironment {
                Actions DONT influence future observations
                --
                Examples:
                + Predicting what might happen next when watching a movie.
            }
            ' Environment --|> PossiblyActiveEnvironment
        }

        package "type of feedback (rewards)" as feedback_type_assumption
        {
            interface Feedback <<Assumption>> {}
            abstract class SparseFeedback <<Assumption>> extends Feedback {
                the environment only gives back the reward associated with the action taken.
                --
                Example: When you play a game, you get a reward based on how good your action was.
            }
            abstract class DenseFeedback <<Assumption>> extends SparseFeedback {
                The environment gives the reward for all possible actions at every step.
                --
                Example: Image classification: The method is told what the image was and
                what it was not. The reward (correct vs incorrect prediction) is given
                for all the potential actions!
            }
        }
    }

    package "assumptions about the context" as context_assumption_family {
        package "discrete vs continuous" as context_continuous_vs_discrete {
            abstract class ContinuousContext <<Assumption>>  {
                The context variable is continuous: c ∈ R
                Example: Varying friction with the ground in an environment.
            }
            abstract class DiscreteContext <<Assumption>>  extends ContinuousContext {
                The context variable is discrete: c ∈ N
                Example: A list of possible tasks
            }
            abstract class FixedContext <<Assumption>> extends DiscreteContext {
                The context variable is fixed to a single value
            }
        }
        package "observability" as context_observability {
            abstract class HiddenContext <<Assumption>>  {
                Methods don't have access to the context variable.
            }
            ' abstract class BoundariesObservable <<Assumption>> extends HiddenContext {
            '     Task boundaries are given during training
            ' }
            abstract class PartiallyObservableContext <<Assumption>>  extends HiddenContext {
                Methods may have access to the context variable some of the time
                Example: Have task labels during training, but not during testing.
            }
            abstract class FullyObservableContext <<Assumption>>  extends PartiallyObservableContext {
                Methods always have access to the context variable.
                i.e., during training and testing.
            }
        }
        package "non-stationarity" as context_nonstationarity_assumption {
            abstract class Continual <<Assumption>> {
                The context may change smoothly over time.
            }
            abstract class Incremental <<Assumption>> extends Continual {
                The context can change suddenly (task boundaries)
            }
            abstract class Stationary <<Assumption>> extends Incremental {
                The context is sampled uniformly
            }
        }
        package "shared vs disjoint spaces between tasks" as action_space_assumption {
            ' NOTE: We could have this for the observation and reward spaces too!
            abstract class PossiblySharedActionSpace {
                It is possible that there is an overlap in the action space between tasks. 
            }
            abstract class SharedActionSpaces extends PossiblySharedActionSpace {
                The action space remains the same in all tasks.
            }
            abstract class DisjointActionSpaces extends PossiblySharedActionSpace {
                Each task has its own (disjoint) action space. 
            }
        }
    }
}

package cl {
    package continuous {
        abstract class ContinuousTaskAgnosticSetting <<AbstractSetting>> extends base.SettingABC {
            - clear_task_boundaries: bool = False
            ' - task_labels_at_train_time: bool = False
            ' - task_labels_at_test_time: bool = False
            ' - stationary_context: bool = False
            ' - shared_action_space: bool = False
        }
        abstract class continuous.Environment <<Environment>> extends gym.Env {}
        abstract class continuous.Observations <<Observations>> extends base.Observations {}
        abstract class continuous.Actions <<Actions>> extends base.Actions {}
        abstract class continuous.Rewards <<Rewards>> extends base.Rewards {}
        ' continuous.Environment -.- continuous.Observations: yields
        ' continuous.Environment -.- continuous.Actions: receives
        ' continuous.Environment -.- continuous.Rewards: returns
    }

    package discrete {
        abstract class DiscreteTaskAgnosticSetting <<AbstractSetting>> extends ContinuousTaskAgnosticSetting {
            == New assumptions ==

            + clear_task_boundaries: Constant[bool] = True
            ' + known_task_boundaries_at_train_time: bool = False
            ' + known_task_boundaries_at_test_time: bool = False

            == Inherited assumptions ==
            ' # task_labels_at_train_time: bool = False
            ' # task_labels_at_test_time: bool = False
            ' # stationary_context: bool = False
            ' # shared_action_space: bool = False

        }
        abstract class discrete.Environment <<Environment>> extends continuous.Environment {}
        abstract class discrete.Observations <<Observations>> extends continuous.Observations {}
        abstract class discrete.Actions <<Actions>> extends continuous.Actions {}
        abstract class discrete.Rewards <<Rewards>> extends continuous.Rewards {}
        ' discrete.Environment -.- discrete.Observations: yields
        ' discrete.Environment -.- discrete.Actions: receives
        ' discrete.Environment -.- discrete.Rewards: returns
    }
    package incremental {
        abstract class IncrementalSetting <<AbstractSetting>> extends DiscreteTaskAgnosticSetting{
            == New assumptions ==

            + known_task_boundaries_at_train_time: Constant[bool] = True
            + known_task_boundaries_at_test_time: Constant[bool] = True

            == Inherited assumptions ==

            # clear_task_boundaries: Constant[bool] = True
            ' # task_labels_at_train_time: bool = False
            ' # task_labels_at_test_time: bool = False
            ' # shared_action_space: bool = False
            ' # stationary_context: bool = False
            
        }
        abstract class incremental.Environment <<Environment>> extends discrete.Environment {}
        abstract class incremental.Observations <<Observations>> extends discrete.Observations {}
        abstract class incremental.Actions <<Actions>> extends discrete.Actions {}
        abstract class incremental.Rewards <<Rewards>> extends discrete.Rewards {}
        ' incremental.Environment -.- incremental.Observations: yields
        ' incremental.Environment -.- incremental.Actions: receives
        ' incremental.Environment -.- incremental.Rewards: returns
    }
    package class_incremental {
        abstract class ClassIncrementalSetting <<AbstractSetting>> extends IncrementalSetting {
            == New assumptions ==
            
            + shared_action_space: Constant[bool] = False

            == Inherited assumptions ==

            # clear_task_boundaries: Constant[bool] = True
            # known_task_boundaries_at_train_time: Constant[bool] = True
            # known_task_boundaries_at_test_time: Constant[bool] = True
            ' # task_labels_at_train_time: bool = False
            ' # task_labels_at_test_time: bool = False
            ' # stationary_context: bool = False
        }
        abstract class class_incremental.Environment <<Environment>> extends incremental.Environment {}
        abstract class class_incremental.Observations <<Observations>> extends incremental.Observations {}
        abstract class class_incremental.Actions <<Actions>> extends incremental.Actions {}
        abstract class class_incremental.Rewards <<Rewards>> extends incremental.Rewards {}
        ' class_incremental.Environment -.- class_incremental.Observations: yields
        ' class_incremental.Environment -.- class_incremental.Actions: receives
        ' class_incremental.Environment -.- class_incremental.Rewards: returns
    }
    package domain_incremental {
        abstract class DomainIncrementalSetting <<AbstractSetting>> extends IncrementalSetting {
            == New assumptions ==

            + shared_action_space: Constant[bool] = True

            == Inherited assumptions ==

            # clear_task_boundaries: Constant[bool] = True
            # known_task_boundaries_at_train_time: Constant[bool] = True
            # known_task_boundaries_at_test_time: Constant[bool] = True
        }
        abstract class domain_incremental.Environment <<Environment>> extends incremental.Environment {}
        abstract class domain_incremental.Observations <<Observations>> extends incremental.Observations {}
        abstract class domain_incremental.Actions <<Actions>> extends incremental.Actions {}
        abstract class domain_incremental.Rewards <<Rewards>> extends incremental.Rewards {}
        ' domain_incremental.Environment -.- domain_incremental.Observations: yields
        ' domain_incremental.Environment -.- domain_incremental.Actions: receives
        ' domain_incremental.Environment -.- domain_incremental.Rewards: returns
    }
    package task_incremental {
        abstract class TaskIncrementalSetting <<AbstractSetting>> extends IncrementalSetting {
            == New assumptions ==

            + task_labels_at_train_time: Constant[bool] = True
            + task_labels_at_test_time: Constant[bool] = True
            
            == Inherited assumptions ==

            # clear_task_boundaries: Constant[bool] = True
            # known_task_boundaries_at_train_time: Constant[bool] = True
            # known_task_boundaries_at_test_time: Constant[bool] = True
        }
        abstract class task_incremental.Environment <<Environment>> extends incremental.Environment {}
        abstract class task_incremental.Observations <<Observations>> extends incremental.Observations {}
        abstract class task_incremental.Actions <<Actions>> extends incremental.Actions {}
        abstract class task_incremental.Rewards <<Rewards>> extends incremental.Rewards {}
        ' task_incremental.Environment -.- task_incremental.Observations: yields
        ' task_incremental.Environment -.- task_incremental.Actions: receives
        ' task_incremental.Environment -.- task_incremental.Rewards: returns

    }
    package traditional{
        abstract class TraditionalSetting <<AbstractSetting>> extends IncrementalSetting {
            == New assumptions ==

            + stationary_context: Constant[bool] = True

            == Inherited assumptions ==

            # clear_task_boundaries: Constant[bool] = True
        }
        abstract class traditional.Environment <<Environment>> extends incremental.Environment {}
        abstract class traditional.Observations <<Observations>> extends incremental.Observations {}
        abstract class traditional.Actions <<Actions>> extends incremental.Actions {}
        abstract class traditional.Rewards <<Rewards>> extends incremental.Rewards {}
        ' traditional.Environment -.- traditional.Observations: yields
        ' traditional.Environment -.- traditional.Actions: receives
        ' traditional.Environment -.- traditional.Rewards: returns
    }
    package multi_task {
        abstract class MultiTaskSetting <<AbstractSetting>> extends TaskIncrementalSetting, TraditionalSetting {
            == New assumptions (compared to Traditional) ==

            + task_labels_at_train_time: Constant[bool] = True
            + task_labels_at_test_time: Constant[bool] = True

            == New assumptions (compared to TaskIncremental) ==

            + stationary_context: Context[bool] = True
            
            == Inherited assumptions ==
            # stationary_context: Context[bool] = True
            # task_labels_at_train_time: Constant[bool] = True
            # task_labels_at_test_time: Constant[bool] = True
            # clear_task_boundaries: Constant[bool] = True
            # known_task_boundaries_at_train_time: Constant[bool] = True
            # known_task_boundaries_at_test_time: Constant[bool] = True
        }
        abstract class multi_task.Environment <<Environment>> extends task_incremental.Environment, traditional.Environment {}
        abstract class multi_task.Observations <<Observations>> extends task_incremental.Observations, traditional.Observations {}
        abstract class multi_task.Actions <<Actions>> extends task_incremental.Actions, traditional.Actions {}
        abstract class multi_task.Rewards <<Rewards>> extends task_incremental.Rewards, traditional.Rewards {}
    }
}

' !include settings/base/base.puml
' remove settings.base

' !include gym.puml
remove assumptions
' remove @unlinked
remove class_incremental
remove domain_incremental
' remove <<Environment>>
' remove <<Observations>>
' remove <<Actions>>
' remove <<Rewards>>

' show context_assumption_family
' remove assumptions
' remove supervision_assumptions
' remove context_assumption_family
' remove <<Assumption>>
' remove <<AbstractSetting>>

' remove sl
' remove cl
' remove rl
' show SLSetting
' show RLSetting
' remove <<Setting>>

' hide empty fields
' hide empty methods
' ' remove gym
' remove gym.spaces
' ' remove cl
' remove class_incremental
' remove domain_incremental


@enduml

================================================
FILE: sequoia/settings/assumptions/base.py
================================================
from sequoia.settings.base.bases import SettingABC
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)

# IDEA:  (@lebrice) Exploring the idea of using metaclasses to customize the isinstance
# and subclass checks, so that it could be property-based. This is probably not worth it
# though.
# It's also quite dumb that we have to extend a metaclass from pytorch lightning!

# class AssumptionMeta(_DataModuleWrapper):
#     def __instancecheck__(self, instance: Union[SettingABC, Any]):
#         logger.debug(f"InstanceCheck on assumption {self} for instance {instance}")
#         return super().__instancecheck__(instance)


class AssumptionBase(SettingABC):
    pass


================================================
FILE: sequoia/settings/assumptions/classification.py
================================================
# TODO: Test if a `Protocol` task from the typing or typing-extensions module could be
# used as an Assumption, based on the type of action space on the Setting, etc.

# def num_classes_in_task(self, task_id: int, train: bool) -> Union[int, List[int]]:
#     """ Returns the number of classes in the given task. """
#     increment = self.increment if train else self.test_increment
#     if isinstance(increment, list):
#         return increment[task_id]
#     return increment

# def num_classes_in_current_task(self, train: bool = None) -> int:
#     """ Returns the number of classes in the current task. """
#     # TODO: Its ugly to have the 'method' tell us if we're currently in
#     # train/eval/test, no? Maybe just make a method for each?
#     return self.num_classes_in_task(self._current_task_id, train=train)

# def task_classes(self, task_id: int, train: bool) -> List[int]:
#     """ Gives back the 'true' labels present in the given task. """
#     start_index = sum(self.num_classes_in_task(i, train) for i in range(task_id))
#     end_index = start_index + self.num_classes_in_task(task_id, train)
#     if train:
#         return self.class_order[start_index:end_index]
#     else:
#         return self.test_class_order[start_index:end_index]

# def current_task_classes(self, train: bool) -> List[int]:
#     """ Gives back the labels present in the current task. """
#     return self.task_classes(self._current_task_id, train)


================================================
FILE: sequoia/settings/assumptions/context_discreteness.py
================================================
from dataclasses import dataclass

from sequoia.utils.utils import constant, flag

from .base import AssumptionBase


@dataclass
class ContinuousContextAssumption(AssumptionBase):
    # Wether we have clear boundaries between tasks, or if the transitions are smooth.
    # Equivalent to wether the context variable is discrete vs continuous.
    smooth_task_boundaries: bool = flag(True)


@dataclass
class DiscreteContextAssumption(ContinuousContextAssumption):
    # Wether we have clear boundaries between tasks, or if the transitions are smooth.
    # Equivalent to wether the context variable is discrete vs continuous.
    smooth_task_boundaries: bool = constant(False)


================================================
FILE: sequoia/settings/assumptions/context_visibility.py
================================================
from dataclasses import dataclass

from sequoia.utils.utils import constant, flag

from .base import AssumptionBase


@dataclass
class HiddenContextAssumption(AssumptionBase):
    # Wether the task labels are observable during training.
    task_labels_at_train_time: bool = flag(False)
    # Wether the task labels are observable during testing.
    task_labels_at_test_time: bool = flag(False)
    # Wether we get informed when reaching the boundary between two tasks during
    # training.
    known_task_boundaries_at_train_time: bool = flag(False)
    # Wether we get informed when reaching the boundary between two tasks during
    # testing.
    known_task_boundaries_at_test_time: bool = flag(False)


@dataclass
class PartiallyObservableContextAssumption(HiddenContextAssumption):
    # Wether the task labels are observable during training.
    task_labels_at_train_time: bool = constant(True)
    # Wether we get informed when reaching the boundary between two tasks during
    # training.
    known_task_boundaries_at_train_time: bool = constant(True)
    known_task_boundaries_at_test_time: bool = flag(True)


@dataclass
class FullyObservableContextAssumption(PartiallyObservableContextAssumption):
    # Wether the task labels are observable during testing.
    task_labels_at_test_time: bool = constant(True)
    # Wether we get informed when reaching the boundary between two tasks during
    # testing.
    known_task_boundaries_at_test_time: bool = constant(True)


================================================
FILE: sequoia/settings/assumptions/continual.py
================================================
import itertools
import json
import time
from abc import ABC, abstractmethod
from dataclasses import asdict, dataclass, field, is_dataclass
from io import StringIO
from pathlib import Path
from typing import Any, ClassVar, Dict, Optional, Type

import gym
import tqdm
from gym.vector.utils import batch_space
from simple_parsing import field
from simple_parsing.helpers.serialization.serializable import Serializable
from torch import Tensor
from wandb.wandb_run import Run

import wandb
from sequoia.common.config import Config, WandbConfig
from sequoia.common.gym_wrappers.utils import IterableWrapper
from sequoia.common.metrics import Metrics, MetricsType
from sequoia.settings.base import Actions, Method
from sequoia.settings.base.results import Results
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import add_prefix, flag

from .base import AssumptionBase
from .iid_results import TaskResults

logger = get_logger(__name__)


@dataclass
class ContinualResults(TaskResults[MetricsType]):
    _runtime: Optional[float] = None
    _online_training_performance: Dict[int, MetricsType] = field(default_factory=dict)

    @property
    def online_performance(self) -> Dict[int, MetricsType]:
        """Returns the online training performance.

        In SL, this is only recorded over the first epoch.

        Returns
        -------
        Dict[int, MetricType]
            a dictionary mapping from step number to the Metrics object produced at that
            step.
        """
        if not self._online_training_performance:
            return {}
        return self._online_training_performance

    @property
    def online_performance_metrics(self) -> MetricsType:
        return sum(self.online_performance.values(), Metrics())

    def to_log_dict(self, verbose: bool = False) -> Dict:
        log_dict = {}
        log_dict["Average Performance"] = super().to_log_dict(verbose=verbose)
        if self._online_training_performance:
            log_dict["Online Performance"] = self.online_performance_metrics.to_log_dict(
                verbose=verbose
            )
        return log_dict

    def summary(self, verbose: bool = False) -> str:
        s = StringIO()
        print(json.dumps(self.to_log_dict(verbose=verbose), indent="\t"), file=s)
        s.seek(0)
        return s.read()


@dataclass
class ContinualAssumption(AssumptionBase):
    """Assumptions for Setting where the environments change over time."""

    # Which dataset to use.
    # dataset: ClassVar[str] = ""

    known_task_boundaries_at_train_time: bool = flag(False)
    # Wether we get informed when reaching the boundary between two tasks during
    # training. Only used when `smooth_task_boundaries` is False.
    known_task_boundaries_at_test_time: bool = flag(False)
    # Wether we have sudden changes in the environments, or if the transition
    # are "smooth".
    smooth_task_boundaries: bool = flag(True)

    # Wether task labels are available at train time.
    # NOTE: Forced to True at the moment.
    task_labels_at_train_time: bool = flag(False)

    # Wether task labels are available at test time.
    task_labels_at_test_time: bool = flag(False)

    @dataclass(frozen=True)
    class Observations(AssumptionBase.Observations):
        task_labels: Optional[Tensor] = None

    @dataclass(frozen=True)
    class Actions(AssumptionBase.Actions):
        pass

    @dataclass(frozen=True)
    class Rewards(AssumptionBase.Rewards):
        pass

    # TODO: Move everything necessary to get ContinualRLSetting to work out of
    # Incremental and into this here. Makes no sense that ContinualRLSetting inherits
    # from Incremental, rather than this!

    Results: ClassVar[Type[ContinualResults]] = ContinualResults

    # Options related to Weights & Biases (wandb). Turned Off by default. Passing any of
    # its arguments will enable wandb.
    # NOTE: Adding `cmd=False` here, so we only create the args in `Experiment`.
    # TODO: Fix this up.
    wandb: Optional[WandbConfig] = field(default=None, compare=False, cmd=False)

    def main_loop(self, method: Method) -> ContinualResults:
        """Runs a continual learning training loop, wether in RL or CL."""
        # TODO: Add ways of restoring state to continue a given run.
        if self.wandb and self.wandb.project:
            # Init wandb, and then log the setting's options.
            self.wandb_run = self.setup_wandb(method)
            method.setup_wandb(self.wandb_run)

        train_env = self.train_dataloader()
        valid_env = self.val_dataloader()

        logger.info(f"Starting training")
        method.set_training()
        self._start_time = time.process_time()

        method.fit(
            train_env=train_env,
            valid_env=valid_env,
        )
        train_env.close()
        valid_env.close()

        logger.info(f"Finished Training.")

        results = self.test_loop(method)

        if self.monitor_training_performance:
            results._online_training_performance = train_env.get_online_performance()

        logger.info(f"Resulting objective of Test Loop: {results.objective}")

        self._end_time = time.process_time()
        runtime = self._end_time - self._start_time
        results._runtime = runtime

        logger.info(f"Finished main loop in {runtime} seconds.")
        self.log_results(method, results)
        return results

    def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
        """WIP: Continual test loop."""
        test_env = self.test_dataloader()

        test_env: TestEnvironment

        was_training = method.training
        method.set_testing()

        try:
            # If the Method has `test` defined, use it.
            method.test(test_env)
            test_env.close()
            test_env: TestEnvironment
            # Get the metrics from the test environment
            test_results: Results = test_env.get_results()

        except NotImplementedError:
            logger.debug(
                f"Will query the method for actions at each step, "
                f"since it doesn't implement a `test` method."
            )
            obs = test_env.reset()

            # TODO: Do we always have a maximum number of steps? or of episodes?
            # Will it work the same for Supervised and Reinforcement learning?
            max_steps: int = getattr(test_env, "step_limit", None)

            # Reset on the last step is causing trouble, since the env is closed.
            pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
            episode = 0

            for step in pbar:
                if obs is None:
                    break
                # NOTE: The env might not be closed, while `obs` is actually still there.
                # if test_env.is_closed():
                #     logger.debug(f"Env is closed")
                #     break
                # logger.debug(f"At step {step}")

                # BUG: Need to pass an action space that actually reflects the batch
                # size, even for the last batch!

                # BUG: This doesn't work if the env isn't batched.
                action_space = test_env.action_space
                batch_size = getattr(test_env, "num_envs", getattr(test_env, "batch_size", 0))
                env_is_batched = batch_size is not None and batch_size >= 1
                if env_is_batched:
                    # NOTE: Need to pass an action space that actually reflects the batch
                    # size, even for the last batch!
                    obs_batch_size = obs.x.shape[0] if obs.x.shape else None
                    action_space_batch_size = (
                        test_env.action_space.shape[0] if test_env.action_space.shape else None
                    )
                    if obs_batch_size is not None and obs_batch_size != action_space_batch_size:
                        action_space = batch_space(test_env.single_action_space, obs_batch_size)

                action = method.get_actions(obs, action_space)

                if test_env.is_closed():
                    break

                obs, reward, done, info = test_env.step(action)

                if done and not test_env.is_closed():
                    # logger.debug(f"end of test episode {episode}")
                    obs = test_env.reset()
                    episode += 1

            test_env.close()
            test_results: Results = test_env.get_results()

        if wandb.run:
            d = add_prefix(test_results.to_log_dict(), prefix="Test", sep="/")
            # d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/")
            # d["current_task"] = task_id
            wandb.log(d)

        # Restore 'training' mode, if it was set at the start.
        if was_training:
            method.set_training()

        return test_results
        # return test_results
        # if not self.task_labels_at_test_time:
        #     # TODO: move this wrapper to common/wrappers.
        #     test_env = RemoveTaskLabelsWrapper(test_env)

    def setup_wandb(self, method: Method) -> Run:
        """Call wandb.init, log the experiment configuration to the config dict.

        This assumes that `self.wandb` is not None. This happens when one of the wandb
        arguments is passed.

        Parameters
        ----------
        method : Method
            Method to be applied.
        """
        assert isinstance(self.wandb, WandbConfig)
        method_name: str = method.get_name()
        setting_name: str = self.get_name()

        if not self.wandb.run_name:
            # Set the default name for this run.
            run_name = f"{method_name}-{setting_name}"
            dataset = getattr(self, "dataset", None)
            if isinstance(dataset, str):
                run_name += f"-{dataset}"
            if getattr(self, "nb_tasks", 0) > 1:
                run_name += f"_{self.nb_tasks}t"  # type: ignore
            self.wandb.run_name = run_name

        run: Run = self.wandb.wandb_init()
        run.config["setting"] = setting_name
        # Add the setting's options into the config:
        setting_config_dict: Dict[str, Any] = {}
        if isinstance(self, Serializable):
            setting_config_dict = self.to_dict()
        elif is_dataclass(self):
            setting_config_dict = asdict(self)
        run.config.update({f"setting.{k}": v for k, v in setting_config_dict.items()})
        run.config["method"] = method_name
        run.config["method_full_name"] = method.get_full_name()
        run.summary["setting"] = self.get_name()
        if isinstance(self.dataset, str):
            run.summary["dataset"] = self.dataset
        run.summary["method"] = method.get_name()
        assert wandb.run is run
        return run

    def log_results(self, method: Method, results: Results, prefix: str = "") -> None:
        """
        TODO: Create the tabs we need to show up in wandb:
        1. Final
            - Average "Current/Online" performance (scalar)
            - Average "Final" performance (scalar)
            - Runtime
        2. Test
            - Task i (evolution over time (x axis is the task id, if possible))
        """
        logger.info(results.summary())

        if wandb.run:
            wandb.summary["method"] = method.get_name()
            wandb.summary["setting"] = self.get_name()
            dataset = getattr(self, "dataset", "")
            if dataset and isinstance(dataset, str):
                wandb.summary["dataset"] = dataset

            results_dict = results.to_log_dict()
            if prefix:
                results_dict = add_prefix(results_dict, prefix=prefix, sep="/")
            wandb.log(results_dict)

            # BUG: Sometimes logging a matplotlib figure causes a crash:
            # File "/home/fabrice/miniconda3/envs/sequoia/lib/python3.8/site-packages/plotly/matplotlylib/mplexporter/utils.py", line 246, in get_grid_style
            # if axis._gridOnMajor and len(gridlines) > 0:
            # AttributeError: 'XAxis' object has no attribute '_gridOnMajor'
            # Seems to be fixed by downgrading the matplotlib version to 3.2.2

            plots_dict = results.make_plots()
            if prefix:
                plots_dict = add_prefix(plots_dict, prefix=prefix, sep="/")
            wandb.log(plots_dict)
            # TODO: Finish the run here? Not sure this is right.
            # wandb.run.finish()

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        In the case of Continual and DiscreteTaskAgnostic, fit is only called once,
        with an environment that shifts between all the tasks. In Incremental, fit is
        called once per task, while in Traditional and MultiTask, fit is called once.
        """
        return 1


from gym.vector import VectorEnv

from sequoia.common.gym_wrappers.utils import EnvType


class TestEnvironment(gym.wrappers.Monitor, IterableWrapper[EnvType], ABC):
    """Wrapper around a 'test' environment, which limits the number of steps
    and keeps tracks of the performance.
    """

    def __init__(
        self,
        env: EnvType,
        directory: Path,
        step_limit: int = 1_000,  # TODO: Remove this, use a dedicated wrapper for that.
        no_rewards: bool = False,
        config: Config = None,
        *args,
        **kwargs,
    ):
        super().__init__(env, directory, *args, **kwargs)
        # TODO: Need to stop re-creating the Monitor wrappers when we already have the list of envs
        # for each task!
        logger.info(f"Creating test env (Monitor) with log directory {self.directory}")
        self.step_limit = step_limit
        self.no_rewards = no_rewards
        self._steps = 0
        self.config = config
        # if wandb.run:
        #     wandb.gym.monitor()

    def step(self, action):
        self._before_step(action)
        # NOTE: Monitor wrapper from gym doesn't call `super().step`, so we have to
        # overwrite it here.
        observation, reward, done, info = IterableWrapper.step(self, action)
        done = self._after_step(observation, reward, done, info)
        return observation, reward, done, info

    def reset(self, **kwargs):
        self._before_reset()
        observation = IterableWrapper.reset(self, **kwargs)
        self._after_reset(observation)
        return observation

    @abstractmethod
    def get_results(self) -> Results:
        """Return how well the Method was applied on this environment.

        In RL, this would be based on the mean rewards, while in supervised
        learning it could be the average accuracy, for instance.

        Returns
        -------
        Results
            [description]
        """
        # TODO: Total reward over a number of steps? Over a number of episodes?
        # Average reward? What's the metric we care about in RL?
        rewards = self.get_episode_rewards()
        lengths = self.get_episode_lengths()
        total_steps = self.get_total_steps()
        return sum(rewards) / total_steps

    def step(self, action):
        # TODO: Its A bit uncomfortable that we have to 'unwrap' these here..
        # logger.debug(f"Step {self._steps}")
        action_for_stats = action.y_pred if isinstance(action, Actions) else action

        self._before_step(action_for_stats)

        if isinstance(action, Tensor):
            action = action.cpu().numpy()
        observation, reward, done, info = self.env.step(action)
        observation_for_stats = observation.x
        reward_for_stats = reward.y

        # TODO: Always render when debugging? or only when the corresponding
        # flag is set in self.config?
        try:
            if self.config and self.config.render and self.config.debug:
                self.render("human")
        except NotImplementedError:
            pass

        if isinstance(self.env.unwrapped, VectorEnv):
            done = all(done)
        else:
            done = bool(done)

        done = self._after_step(observation_for_stats, reward_for_stats, done, info)

        if self.get_total_steps() >= self.step_limit:
            done = True
            self.close()

        # Remove the rewards if they aren't allowed.
        if self.no_rewards:
            reward = None

        return observation, reward, done, info


TestEnvironment.__test__ = False


================================================
FILE: sequoia/settings/assumptions/discrete_results.py
================================================
import json
from dataclasses import dataclass
from io import StringIO
from typing import ClassVar, Dict, Generic, List

import matplotlib.pyplot as plt
from simple_parsing.helpers import list_field

from sequoia.common.metrics import Metrics
from sequoia.settings.base.results import Results

from .iid_results import MetricType, TaskResults


@dataclass
class TaskSequenceResults(Results, Generic[MetricType]):
    """Results obtained when evaluated on a sequence of (discrete) Tasks."""

    task_results: List[TaskResults[MetricType]] = list_field()

    # For now, all the 'concrete' objectives (mean reward / episode in RL, accuracy in
    # SL) have higher => better
    lower_is_better: ClassVar[bool] = False

    def __post_init__(self):
        if self.task_results and isinstance(self.task_results[0], dict):
            self.task_results = [
                TaskResults.from_dict(task_result, drop_extra_fields=False)
                for task_result in self.task_results
            ]

    @property
    def objective_name(self) -> str:
        return self.average_metrics.objective_name

    @property
    def num_tasks(self) -> int:
        """Returns the number of tasks.

        Returns
        -------
        int
            Number of tasks.
        """
        return len(self.task_results)

    @property
    def average_metrics(self) -> MetricType:
        return sum(self.average_metrics_per_task, Metrics())

    @property
    def average_metrics_per_task(self) -> List[MetricType]:
        return [task_result.average_metrics for task_result in self.task_results]

    @property
    def objective(self) -> float:
        return self.average_metrics.objective

    def to_log_dict(self, verbose: bool = False) -> Dict:
        result = {}
        for task_id, task_results in enumerate(self.task_results):
            result[f"Task {task_id}"] = task_results.to_log_dict(verbose=verbose)
        result["Average"] = self.average_metrics.to_log_dict(verbose=verbose)
        return result

    def summary(self, verbose: bool = False):
        s = StringIO()
        print(json.dumps(self.to_log_dict(verbose=verbose), indent="\t"), file=s)
        s.seek(0)
        return s.read()

    def make_plots(self) -> Dict[str, plt.Figure]:
        result = {}
        for task_id, task_results in enumerate(self.task_results):
            result[f"Task {task_id}"] = task_results.make_plots()
        return result


================================================
FILE: sequoia/settings/assumptions/iid.py
================================================
""" IDEA: create the simple train loop for an IID setting (RL or CL).
"""

from dataclasses import dataclass

from sequoia.utils.utils import constant

from .task_incremental import TaskIncrementalAssumption

# TODO: Import and use the `TaskResults` here.


@dataclass
class TraditionalSetting(TaskIncrementalAssumption):
    """Assumption (mixin) for Settings where the data is stationary (only one
    task).
    """

    nb_tasks: int = constant(1)

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        Defaults to the number of tasks, but may be different, for instance in so-called
        Multi-Task Settings, this is set to 1.
        """
        return 1


================================================
FILE: sequoia/settings/assumptions/iid_results.py
================================================
""" Results for an IID experiment. """
from dataclasses import dataclass, field
from typing import ClassVar, Dict, Generic, List, TypeVar

import matplotlib.pyplot as plt

from sequoia.common.metrics import Metrics
from sequoia.settings.base.results import Results

MetricType = TypeVar("MetricType", bound=Metrics)


@dataclass
class TaskResults(Results, Generic[MetricType]):
    """Results within a given Task.

    This is just a List of a given Metrics type, with additional methods.
    """

    # For now, all the 'concrete' objectives (mean reward / episode in RL, accuracy in
    # SL) have higher => better
    lower_is_better: ClassVar[bool] = False

    metrics: List[MetricType] = field(default_factory=list)
    plots_dict: Dict[str, plt.Figure] = field(default_factory=dict)

    def __post_init__(self):
        if self.metrics and isinstance(self.metrics[0], dict):
            self.metrics = [
                Metrics.from_dict(metrics, drop_extra_fields=False) for metrics in self.metrics
            ]

    def __str__(self) -> str:
        return f"{type(self).__name__}(average(metrics)={self.average_metrics})"

    def __repr__(self) -> str:
        return f"{type(self).__name__}(average(metrics)={self.average_metrics})"

    @property
    def average_metrics(self) -> MetricType:
        """Returns the average 'Metrics' object for this task."""
        return sum(self.metrics, Metrics())

    @property
    def objective(self) -> float:
        """Returns the main 'objective' value (a float) for this task.

        This value could be the average accuracy in SL, or the mean reward / episode in
        RL, depending on the type of Metrics stored in `self`.

        Returns
        -------
        float
            A single float that describes how 'good' these results are.
        """
        return self.average_metrics.objective

    @property
    def objective_name(self) -> str:
        # TODO: Add this objective_name attribute on Metrics
        return self.average_metrics.objective_name

    def __str__(self):
        return f"{type(self).__name__}({self.average_metrics})"

    def to_log_dict(self, verbose: bool = False) -> Dict:
        """Produce a dictionary that describes the results / metrics etc.

        Can be logged to console or to wandb using `wandb.log(results.to_log_dict())`.

        Parameters
        ----------
        verbose : bool, optional
            Wether to include very detailed information. Defaults to `False`.

        Returns
        -------
        Dict
            A dict mapping from str keys to either values or nested dicts of the same
            form.
        """
        return self.average_metrics.to_log_dict(verbose=verbose)

    def summary(self) -> str:
        return str(self.to_log_dict())

    def make_plots(self) -> Dict[str, plt.Figure]:
        """Produce a set of plots using the Metrics stored in this object.

        Returns
        -------
        Dict[str, plt.Figure]
            Dict mapping from strings to matplotlib plots.
        """
        # Could actually create plots here too.
        return self.plots_dict


================================================
FILE: sequoia/settings/assumptions/incremental.py
================================================
import itertools
import time
from abc import abstractmethod
from dataclasses import dataclass
from typing import ClassVar, Optional, Sequence, Type, Union

import tqdm
from gym.vector.utils.spaces import batch_space
from simple_parsing import field
from torch import Tensor
from wandb.wandb_run import Run

import wandb
from sequoia.common.gym_wrappers.step_callback_wrapper import StepCallbackWrapper
from sequoia.settings.base import Actions, Environment, Method, Results, Rewards, Setting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import add_prefix, constant, flag

from .continual import ContinualAssumption, TestEnvironment
from .incremental_results import IncrementalResults, TaskSequenceResults

logger = get_logger(__name__)


@dataclass
class IncrementalAssumption(ContinualAssumption):
    """Mixin that defines methods that are common to all 'incremental'
    settings, where the data is separated into tasks, and where you may not
    always get the task labels.

    Concretely, this holds the train and test loops that are common to the
    ClassIncrementalSetting (highest node on the Passive side) and ContinualRL
    (highest node on the Active side), therefore this setting, while abstract,
    is quite important.

    """

    # Which dataset to use.
    # dataset: str

    Results: ClassVar[Type[Results]] = IncrementalResults

    @dataclass(frozen=True)
    class Observations(Setting.Observations):
        """Observations produced by an Incremental setting.

        Adds the 'task labels' to the base Observation.
        """

        task_labels: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = None

    # Wether we have clear boundaries between tasks, or if the transition is
    # smooth.
    smooth_task_boundaries: bool = constant(False)  # constant for now.

    # Wether task labels are available at train time.
    # NOTE: Forced to True at the moment.
    task_labels_at_train_time: bool = flag(default=True)
    # Wether task labels are available at test time.
    task_labels_at_test_time: bool = flag(default=False)
    # Wether we get informed when reaching the boundary between two tasks during
    # training. Only used when `smooth_task_boundaries` is False.

    # TODO: Setting constant for now, but we could add task boundary detection
    # later on!
    known_task_boundaries_at_train_time: bool = constant(True)
    # Wether we get informed when reaching the boundary between two tasks during
    # training. Only used when `smooth_task_boundaries` is False.
    known_task_boundaries_at_test_time: bool = True

    # The number of tasks. By default 0, which means that it will be set
    # depending on other fields in __post_init__, or eventually be just 1.
    nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])

    # Attributes (not parsed through the command-line):
    _current_task_id: int = field(default=0, init=False)

    def __post_init__(self):
        super().__post_init__()

        self.train_env: Environment = None  # type: ignore
        self.val_env: Environment = None  # type: ignore
        self.test_env: TestEnvironment = None  # type: ignore

        self.wandb_run: Optional[Run] = None

        self._start_time: Optional[float] = None
        self._end_time: Optional[float] = None
        self._setting_logged_to_wandb: bool = False

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        Defaults to the number of tasks, but may be different, for instance in so-called
        Multi-Task Settings, this is set to 1.
        """
        return self.nb_tasks

    @property
    def current_task_id(self) -> Optional[int]:
        """Get the current task id.

        TODO: Do we want to return None if the task labels aren't currently
        available? (at either Train or Test time?) Or if we 'detect' if
        this is being called from the method?

        TODO: This property doesn't really make sense in the Multi-Task SL or RL
        settings.
        """
        return self._current_task_id

    @current_task_id.setter
    def current_task_id(self, value: int) -> None:
        """Sets the current task id."""
        self._current_task_id = value

    def task_boundary_reached(self, method: Method, task_id: int, training: bool):
        known_task_boundaries = (
            self.known_task_boundaries_at_train_time
            if training
            else self.known_task_boundaries_at_test_time
        )
        task_labels_available = (
            self.task_labels_at_train_time if training else self.task_labels_at_test_time
        )

        if known_task_boundaries:
            # Inform the model of a task boundary. If the task labels are
            # available, then also give the id of the new task to the
            # method.
            # TODO: Should we also inform the method of wether or not the
            # task switch is occuring during training or testing?
            if not hasattr(method, "on_task_switch"):
                logger.warning(
                    UserWarning(
                        f"On a task boundary, but since your method doesn't "
                        f"have an `on_task_switch` method, it won't know about "
                        f"it! "
                    )
                )
            elif not task_labels_available:
                method.on_task_switch(None)
            elif self.phases == 1:
                # NOTE: on_task_switch won't be called if there is only one task.
                pass
            else:
                method.on_task_switch(task_id)

    def main_loop(self, method: Method) -> IncrementalResults:
        """Runs an incremental training loop, wether in RL or CL."""
        # TODO: Add ways of restoring state to continue a given run?
        # For each training task, for each test task, a list of the Metrics obtained
        # during testing on that task.
        # NOTE: We could also just store a single metric for each test task, but then
        # we'd lose the ability to create a plots to show the performance within a test
        # task.
        # IDEA: We could use a list of IIDResults! (but that might cause some circular
        # import issues)
        results = self.Results()
        if self.monitor_training_performance:
            results._online_training_performance = []

        if self.wandb and self.wandb.project:
            # Init wandb, and then log the setting's options.
            self.wandb_run = self.setup_wandb(method)
            method.setup_wandb(self.wandb_run)

        # TODO: Fix this up, need to set the '_objective_scaling_factor' to a different
        # value depending on the 'dataset' / environment.
        results._objective_scaling_factor = self._get_objective_scaling_factor()

        method.set_training()

        self._start_time = time.process_time()

        for task_id in range(self.phases):
            logger.info(
                f"Starting training" + (f" on task {task_id}." if self.nb_tasks > 1 else ".")
            )
            self.current_task_id = task_id
            self.task_boundary_reached(method, task_id=task_id, training=True)

            # Creating the dataloaders ourselves (rather than passing 'self' as
            # the datamodule):
            task_train_env = self.train_dataloader()
            task_valid_env = self.val_dataloader()

            method.fit(
                train_env=task_train_env,
                valid_env=task_valid_env,
            )
            task_train_env.close()
            task_valid_env.close()

            if self.monitor_training_performance:
                results._online_training_performance.append(task_train_env.get_online_performance())

            logger.info(f"Finished Training on task {task_id}.")
            test_metrics: TaskSequenceResults = self.test_loop(method)

            # Add a row to the transfer matrix.
            results.task_sequence_results.append(test_metrics)
            logger.info(f"Resulting objective of Test Loop: {test_metrics.objective}")

            if wandb.run:
                d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/")
                # d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/")
                d["current_task"] = task_id
                wandb.log(d)

        self._end_time = time.process_time()
        runtime = self._end_time - self._start_time
        results._runtime = runtime
        logger.info(f"Finished main loop in {runtime} seconds.")
        self.log_results(method, results)
        return results

    def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
        """(WIP): Runs an incremental test loop and returns the Results.

        The idea is that this loop should be exactly the same, regardless of if
        you're on the RL or the CL side of the tree.

        NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the
        method has the `on_task_switch` callback defined, then a callback
        wrapper is added that will invoke the method's `on_task_switch` and pass
        it the task id (or `None` if `not self.task_labels_available_at_test_time`)
        when a task boundary is encountered.

        This `on_task_switch` 'callback' wrapper gets added the same way for
        Supervised or Reinforcement learning settings.
        """
        test_env = self.test_dataloader()

        test_env: TestEnvironment

        was_training = method.training
        method.set_testing()

        if self.known_task_boundaries_at_test_time and self.nb_tasks > 1:

            def _on_task_switch(step: int, *arg) -> None:
                # TODO: This attribute isn't on IncrementalAssumption itself, it's defined
                # on ContinualRLSetting.
                if step not in test_env.boundary_steps:
                    return
                if not hasattr(method, "on_task_switch"):
                    logger.warning(
                        UserWarning(
                            f"On a task boundary, but since your method doesn't "
                            f"have an `on_task_switch` method, it won't know about "
                            f"it! "
                        )
                    )
                    return

                if self.task_labels_at_test_time:
                    # TODO: Should this 'test boundary' step depend on the batch size?
                    task_steps = sorted(test_env.boundary_steps)
                    # TODO: If the ordering of tasks were different (shuffled
                    # tasks for example), then this wouldn't work, we'd need a
                    # list of the task ids or something like that.
                    task_id = task_steps.index(step)
                    logger.debug(
                        f"Calling `method.on_task_switch({task_id})` "
                        f"since task labels are available at test-time."
                    )
                    method.on_task_switch(task_id)
                else:
                    logger.debug(
                        f"Calling `method.on_task_switch(None)` "
                        f"since task labels aren't available at "
                        f"test-time, but task boundaries are known."
                    )
                    method.on_task_switch(None)

            test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch])

        # If the Method has `test` defined, use it.
        method.test(test_env)
        test_env: TestEnvironment
        # Get the metrics from the test environment
        test_results: TaskSequenceResults = test_env.get_results()

        # Restore 'training' mode, if it was set at the start.
        if was_training:
            method.set_training()

        return test_results
        # return test_results
        # if not self.task_labels_at_test_time:
        #     # TODO: move this wrapper to common/wrappers.
        #     test_env = RemoveTaskLabelsWrapper(test_env)

    @abstractmethod
    def train_dataloader(
        self, *args, **kwargs
    ) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]:
        """Returns the DataLoader/Environment for the current train task."""
        return super().train_dataloader(*args, **kwargs)

    @abstractmethod
    def val_dataloader(
        self, *args, **kwargs
    ) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]:
        """Returns the DataLoader/Environment used for validation on the
        current task.
        """
        return super().val_dataloader(*args, **kwargs)

    @abstractmethod
    def test_dataloader(
        self, *args, **kwargs
    ) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]:
        """Returns the Test Environment (for all the tasks)."""
        return super().test_dataloader(*args, **kwargs)

    def _get_objective_scaling_factor(self) -> float:
        return 1.0


================================================
FILE: sequoia/settings/assumptions/incremental_results.py
================================================
""" Results of an Incremental setting. """
import json
import warnings
from dataclasses import dataclass
from io import StringIO
from typing import ClassVar, Dict, Generic, List, Optional, Union

import matplotlib.pyplot as plt
import numpy as np
from gym.utils import colorize
from simple_parsing.helpers import list_field
from simple_parsing.helpers.serialization import encode

import wandb
from sequoia.common.metrics import Metrics
from sequoia.settings.base.results import Results

from .discrete_results import TaskSequenceResults
from .iid_results import MetricType, TaskResults


@dataclass
class IncrementalResults(Results, Generic[MetricType]):
    """Results for a whole train loop (transfer matrix).

    This class is basically just a 2d list of TaskResults objects, with some convenience
    methods and properties.
    We get one TaskSequenceResults (a 1d list of TaskResults objects) as a result of
    every test loop, which, in the Incremental Settings, happens after training on each
    task, hence why we get a nb_tasks x nb_tasks matrix of results.
    """

    task_sequence_results: List[TaskSequenceResults[MetricType]] = list_field()

    min_runtime_hours: ClassVar[float] = 0.0
    max_runtime_hours: ClassVar[float] = 12.0

    def __post_init__(self):
        self._runtime: Optional[float] = None
        self._online_training_performance: Optional[List[Dict[int, Metrics]]] = None
        # Factor used to scale the 'objective' to a 'score' between 0 and 1.
        self._objective_scaling_factor: float = 1.0

    @property
    def runtime_minutes(self) -> Optional[float]:
        return self._runtime / 60 if self._runtime is not None else None

    @property
    def runtime_hours(self) -> Optional[float]:
        return self._runtime / 3600 if self._runtime is not None else None

    @property
    def transfer_matrix(self) -> List[List[TaskResults]]:
        return [
            task_sequence_result.task_results for task_sequence_result in self.task_sequence_results
        ]

    @property
    def metrics_matrix(self) -> List[List[MetricType]]:
        """Returns the 'transfer matrix' but with the average metrics for each task
        in each cell.

        NOTE: This is different from `transfer_matrix` since it returns the matrix of
        `TaskResults` objects (which are themselves lists of Metrics)

        Returns
        -------
        List[List[MetricType]]
            2d grid of average metrics for each task.
        """
        return [
            [task_results.average_metrics for task_results in task_sequence_result]
            for task_sequence_result in self
        ]

    @property
    def objective_matrix(self) -> List[List[float]]:
        """Return transfer matrix containing the value of the 'objective' for each task.

        The value at the index (i, j) gives the test performance on task j after having
        learned tasks 0-i.

        Returns
        -------
        List[List[float]]
            The 2d matrix of objectives (floats).
        """
        return [
            [task_result.objective for task_result in task_sequence_result]
            for task_sequence_result in self.transfer_matrix
        ]

    @property
    def cl_score(self) -> float:
        """CL Score, as a weigted sum of three objectives:
        - The average final performance over all tasks
        - The average 'online' performance over all tasks
        - Runtime

        TODO: @optimass Determine the weights for each factor.

        Returns
        -------
        float
            [description]
        """
        # TODO: Determine the function to use to get a runtime score between 0 and 1.
        score = (
            +0.30 * self._online_performance_score()
            + 0.40 * self._final_performance_score()
            + 0.30 * self._runtime_score()
        )
        return score

    def _runtime_score(self) -> float:
        # TODO: function that takes the total runtime in seconds and returns a
        # normalized float score between 0 and 1.
        runtime_seconds = self._runtime
        if self._runtime is None:
            warnings.warn(
                RuntimeWarning(
                    colorize(
                        "Runtime is None! Returning runtime score of 0.\n (Make sure the "
                        "Setting had its `monitor_training_performance` attr set to True!",
                        color="red",
                    )
                )
            )
            return 0
        runtime_hours = runtime_seconds / 3600

        # Get the maximum runtime for this type of Results (and Setting)
        min_runtime_hours = type(self).min_runtime_hours
        max_runtime_hours = type(self).max_runtime_hours

        assert 0 <= min_runtime_hours < max_runtime_hours
        assert 0 < runtime_hours
        if runtime_hours <= min_runtime_hours:
            return 1.0
        if max_runtime_hours <= runtime_hours:
            return 0.0
        return 1 - ((runtime_hours - min_runtime_hours) / (max_runtime_hours - min_runtime_hours))

    def _online_performance_score(self) -> float:
        """Function that takes the 'objective' of the Metrics from the average online
        performance, and returns a normalized float score between 0 and 1.
        """
        objectives: List[float] = [
            task_online_metric.objective for task_online_metric in self.online_performance_metrics
        ]
        return self._objective_scaling_factor * np.mean(objectives)
        # return self._objective_scaling_factor * self.average_online_performance.objective

    def _final_performance_score(self) -> float:
        """Function that takes the 'objective' of the Metrics from the average
        final performance, and returns a normalized float score between 0 and 1.
        """
        objectives: List[float] = [
            task_metric.objective for task_metric in self.final_performance_metrics
        ]
        return self._objective_scaling_factor * np.mean(objectives)
        # return self._objective_scaling_factor * self.average_final_performance.objective

    @property
    def objective(self) -> float:
        # return self.cl_score
        return self.average_final_performance.objective

    @property
    def num_tasks(self) -> int:
        return len(self.task_sequence_results)

    @property
    def online_performance(self) -> List[Dict[int, MetricType]]:
        """Returns the online training performance for each task. i.e. the diagonal of
        the transfer matrix.

        In SL, this is only recorded over the first epoch.

        Returns
        -------
        List[Dict[int, MetricType]]
            A List containing, for each task, a dictionary mapping from step number to
            the Metrics object produced at that step.
        """
        if not self._online_training_performance:
            return [{} for _ in range(self.num_tasks)]
        return self._online_training_performance

        # return [self[i][i] for i in range(self.num_tasks)]

    @property
    def online_performance_metrics(self) -> List[MetricType]:
        return [
            sum(online_performance_dict.values(), Metrics())
            for online_performance_dict in self.online_performance
        ]

    @property
    def final_performance(self) -> List[TaskResults[MetricType]]:
        return self.transfer_matrix[-1]

    @property
    def final_performance_metrics(self) -> List[MetricType]:
        return [task_result.average_metrics for task_result in self.final_performance]

    @property
    def average_online_performance(self) -> MetricType:
        return sum(self.online_performance_metrics, Metrics())

    @property
    def average_final_performance(self) -> MetricType:
        return sum(self.final_performance_metrics, Metrics())

    def to_log_dict(self, verbose: bool = False) -> Dict:
        log_dict = {}
        # TODO: This assumes that the metrics were stored in the right index for their
        # corresponding task.
        for task_id, task_sequence_result in enumerate(self.task_sequence_results):
            log_dict[f"Task {task_id}"] = task_sequence_result.to_log_dict(verbose=verbose)

        if self._online_training_performance:
            log_dict["Online Performance"] = {
                f"Task {task_id}": task_online_metrics.to_log_dict(verbose=verbose)
                for task_id, task_online_metrics in enumerate(self.online_performance_metrics)
            }

        log_dict.update(
            {
                "Final/Average Online Performance": self._online_performance_score(),
                "Final/Average Final Performance": self._final_performance_score(),
                "Final/Runtime (seconds)": self._runtime,
                "Final/CL Score": self.cl_score,
            }
        )
        return log_dict

    def summary(self, verbose: bool = False):
        s = StringIO()
        log_dict = self.to_log_dict(verbose=verbose)
        log_dict_json = json.dumps(log_dict, indent="\t", default=encode)
        print(log_dict_json, file=s)
        s.seek(0)
        return s.read()

    def make_plots(self) -> Dict[str, Union[plt.Figure, Dict]]:
        plots = {
            f"Task {task_id}": task_sequence_result.make_plots()
            for task_id, task_sequence_result in enumerate(self.task_sequence_results)
        }
        axis_labels = [f"Task {task_id}" for task_id in range(self.num_tasks)]
        if wandb.run:
            plots["Transfer matrix"] = wandb.plots.HeatMap(
                x_labels=axis_labels,
                y_labels=axis_labels,
                matrix_values=self.objective_matrix,
                show_text=True,
            )
            objective_array = np.asfarray(self.objective_matrix)
            perf_per_step = objective_array.mean(-1)
            table = wandb.Table(
                data=[[i + 1, perf] for i, perf in enumerate(perf_per_step)],
                columns=["# of learned tasks", "Average Test performance on all tasks"],
            )
            plots["Test Performance"] = wandb.plot.line(
                table,
                x="# of learned tasks",
                y="Average Test performance on all tasks",
                title="Test Performance vs # of Learned tasks",
            )
        return plots

    def __str__(self) -> str:
        return self.summary()


================================================
FILE: sequoia/settings/assumptions/incremental_test.py
================================================
from typing import List, Optional

import gym
import numpy as np
from gym import Space
from gym.vector.utils.spaces import batch_space

from sequoia.methods import Method
from sequoia.settings import Actions, Environment, Observations

from .incremental import IncrementalAssumption, TestEnvironment


class DummyMethod(Method, target_setting=IncrementalAssumption):
    """Dummy method used to check that the Setting calls `on_task_switch` with the
    right arguments.
    """

    def __init__(self):
        self.n_task_switches = 0
        self.n_fit_calls = 0
        self.received_task_ids: List[Optional[int]] = []
        self.received_while_training: List[bool] = []
        self.train_steps_per_task: List[int] = []
        self.train_episodes_per_task: List[int] = []

    def fit(self, train_env: gym.Env = None, valid_env: gym.Env = None):
        self.n_fit_calls += 1
        self.train_steps_per_task.append(0)
        self.train_episodes_per_task.append(0)
        obs = train_env.reset()
        for i in range(100):
            obs, reward, done, info = train_env.step(train_env.action_space.sample())
            self.train_steps_per_task[-1] += 1
            if done:
                self.train_episodes_per_task[-1] += 1
                break

    def test(self, test_env: TestEnvironment):
        while not test_env.is_closed():
            done = False
            obs = test_env.reset()
            while not done:
                actions = test_env.action_space.sample()
                obs, _, done, info = test_env.step(actions)

    def get_actions(
        self, observations: IncrementalAssumption.Observations, action_space: gym.Space
    ):
        return np.ones(action_space.shape)

    def on_task_switch(self, task_id: int = None):
        self.n_task_switches += 1
        self.received_task_ids.append(task_id)
        self.received_while_training.append(self.training)


class OtherDummyMethod(Method, target_setting=IncrementalAssumption):
    def __init__(self):
        self.batch_sizes: List[int] = []

    def fit(self, train_env: Environment, valid_env: Environment):
        for i, batch in enumerate(train_env):
            if isinstance(batch, Observations):
                observations, rewards = batch, None
            else:
                assert isinstance(batch, tuple) and len(batch) == 2
                observations, rewards = batch

            y_preds = train_env.action_space.sample()
            if rewards is None:
                action_space = train_env.action_space
                if train_env.action_space.shape:
                    # This is a bit complicated, but it's needed because the last batch
                    # might have a different batch dimension than the env's action
                    # space, (only happens on the last batch in supervised learning).
                    # TODO: Should we perhaps drop the last batch?
                    action_space = train_env.action_space
                    batch_size = getattr(train_env, "num_envs", getattr(train_env, "batch_size", 0))
                    env_is_batched = batch_size is not None and batch_size >= 1
                    if env_is_batched:
                        # NOTE: Need to pass an action space that actually reflects the batch
                        # size, even for the last batch!
                        obs_batch_size = observations.x.shape[0] if observations.x.shape else None
                        action_space_batch_size = (
                            train_env.action_space.shape[0]
                            if train_env.action_space.shape
                            else None
                        )
                        if obs_batch_size is not None and obs_batch_size != action_space_batch_size:
                            action_space = batch_space(
                                train_env.single_action_space, obs_batch_size
                            )

                y_preds = action_space.sample()
                rewards = train_env.send(Actions(y_pred=y_preds))

    def get_actions(self, observations: Observations, action_space: Space) -> Actions:
        # This won't work on weirder spaces.
        if action_space.shape:
            assert observations.x.shape[0] == action_space.shape[0]
        if getattr(observations.x, "shape", None):
            batch_size = 1
            if observations.x.ndim > 1:
                batch_size = observations.x.shape[0]
            self.batch_sizes.append(batch_size)
        else:
            self.batch_sizes.append(0)  # X isn't batched.
        return action_space.sample()


================================================
FILE: sequoia/settings/assumptions/task_incremental.py
================================================
from dataclasses import dataclass

from sequoia.utils.utils import constant

from .context_visibility import FullyObservableContextAssumption
from .incremental import IncrementalAssumption


@dataclass
class TaskIncrementalAssumption(FullyObservableContextAssumption, IncrementalAssumption):
    """Assumption (mixin) for Settings where the task labels are available at
    both train and test time.
    """

    task_labels_at_train_time: bool = constant(True)
    task_labels_at_test_time: bool = constant(True)


================================================
FILE: sequoia/settings/assumptions/task_type.py
================================================
from dataclasses import dataclass
from typing import Union

from torch import LongTensor, Tensor

from sequoia.settings.base import Actions


@dataclass(frozen=True)
class ClassificationActions(Actions):
    """Typed dict-like class that represents the 'forward pass'/output of a
    classification head, which correspond to the 'actions' to be sent to the
    environment, in the general formulation.
    """

    y_pred: Union[LongTensor, Tensor]
    logits: Tensor

    @property
    def action(self) -> LongTensor:
        return self.y_pred

    @property
    def y_pred_log_prob(self) -> Tensor:
        """returns the log probabilities for the chosen actions/predictions."""
        return self.logits[:, self.y_pred]

    @property
    def y_pred_prob(self) -> Tensor:
        """returns the log probabilities for the chosen actions/predictions."""
        return self.probabilities[self.y_pred]

    @property
    def probabilities(self) -> Tensor:
        """Returns the normalized probabilies for each class, i.e. the
        softmax-ed version of `self.logits`.
        """
        return self.logits.softmax(-1)


================================================
FILE: sequoia/settings/base/__init__.py
================================================
from .bases import Method, SettingABC
from .environment import Environment
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .results import Results
from .setting import Setting, SettingType


================================================
FILE: sequoia/settings/base/base.puml
================================================
@startuml base
!include gym.puml
remove gym.spaces
remove Wrapper
hide empty members

package sequoia as settings.base {
    ' namespace base.objects {
    together {
        together {
            abstract class Observations extends Batch {
                + x: Tensor
            }
            abstract class Actions extends Batch {
                + y_pred: Tensor
            }
            abstract class Rewards extends Batch {
                + y: Tensor
            }
        }
        
        Environment --* Observations: yields
        Environment --* Actions: receives
        Environment --* Rewards: returns

        interface Environment extends gym.Env, torch.DataLoader {
            + observation_space: Space<Observations>
            + action_space: Space<Actions>
            + reward_space: Space<Rewards>
            + step(Actions actions) -> Tuple[Observations, Rewards, bool, Dict] 
            + reset() -> Observations
        }

        abstract class Results {
            + objective: float
        }

        interface SettingABC {
            -- static (class) attributes --

            + {static} Results: Type[Results] 
            + {static} Observations: Type[Observations] 
            + {static} Actions: Type[Actions] 
            + {static} Rewards: Type[Rewards] 
            --
            {abstract} + apply(Method): Results
        }
        ' TODO: Here we just show the most basic interface.
        abstract class Setting extends SettingABC, pytorch_lightning.LightningDataModule {
            -- static (class) attributes --

            + {static} Results: Type[Results] 
            + {static} Observations: Type[Observations] 
            + {static} Actions: Type[Actions] 
            + {static} Rewards: Type[Rewards] 

            ' TODO: should we move this to `Setting` rather than SettingABC?
            -- inherited from LightningDataModule --
            {abstract} + prepare_data()
            {abstract} + setup()
            {abstract} + train_dataloader() -> Environment
            {abstract} + val_dataloader() -> Environment
            {abstract} + test_dataloader() -> Environment

            == Abstract Method ==
            
            {abstract} + apply(Method) -> Results
        }


    ' NOTE: Choose either of the following code blocks:
    ' -------------

    remove Setting
    remove pytorch_lightning
    SettingABC -.left-> Environment : creates
    SettingABC -.-> Results : produces
    SettingABC -.-> Method : applies
    SettingABC <-.- Method  : targets

    ' ----- OR -----

    ' remove SettingABC
    ' Setting -.left-> Environment : creates
    ' Setting -.-> Results : produces
    ' Setting -.-> Method : applies
    ' Setting <-.- Method  : targets

    ' -------------
    
    }

    Method <-.-> Environment : interacts with

    abstract class Method <S extends Setting> {
        ..  abstract static attributes ..

        {static} {abstract} target_setting: Type[S]

        ..  abstract (required) methods ..

        {abstract} + fit(train_env: Environment, valid_env: Environment)
        {abstract} + get_actions(observations: Observations, action_space: Space)
        
        .. optional methods ..

        + configure(setting: S)
        + on_task_switch(task_id: Optional[int])
        + test(test_env: Environment)

        ' - is_applicable(setting: SettingABC): bool
    }

    abstract class Model {
        + forward(input: Observations) -> Actions
    }
    Method -.- Model : ( can use ) 
}
remove Batch

@enduml

================================================
FILE: sequoia/settings/base/bases.py
================================================
""" This module defines the base classes for Settings and Methods.
"""
import json
import traceback
import typing
from abc import ABC, abstractmethod
from functools import partial
from io import StringIO
from pathlib import Path
from typing import (
    Any,
    ClassVar,
    Dict,
    Generic,
    Iterable,
    List,
    Mapping,
    Optional,
    Set,
    Tuple,
    Type,
    TypeVar,
    Union,
)

import gym
from gym.utils import colorize
from pytorch_lightning import LightningDataModule
from wandb.wandb_run import Run

import wandb

if typing.TYPE_CHECKING:
    from sequoia.common.config.config import Config

from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import Actions, Observations, Rewards
from sequoia.settings.base.results import Results
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable
from sequoia.utils.utils import (
    camel_case,
    compute_identity,
    flatten_dict,
    get_path_to_source_file,
    remove_suffix,
)

logger = get_logger(__name__)


class SettingABC:
    """Abstract base class for a Setting.

    This just shows the minimal API. For more info, see the `Setting` class,
    which is the concrete implementation of this class, and the 'root' of the
    tree.

    Abstract (required) methods:
    - **apply** Applies a given Method on this setting to produce Results.

    "Abstract"-ish (required) class attributes:
    - `Results`: The class of Results that are created when applying a Method on
      this setting.
    - `Observations`: The type of Observations that will be produced  in this
        setting.
    - `Actions`: The type of Actions that are expected from this setting.
    - `Rewards`: The type of Rewards that this setting will (potentially) return
      upon receiving an action from the method.
    """

    Results: ClassVar[Type[Results]] = Results
    Observations: ClassVar[Type[Observations]] = Observations
    Actions: ClassVar[Type[Actions]] = Actions
    Rewards: ClassVar[Type[Rewards]] = Rewards

    @abstractmethod
    def apply(self, method: "Method", config: "Config" = None) -> "SettingABC.Results":
        """Applies a Method on this experimental Setting to produce Results.

        Defines the training/evaluation procedure specific to this Setting.

        The training/evaluation loop can be defined however you want, as long as
        it respects the following constraints:

        1.  This method should always return either a float or a Results object
            that indicates the "performance" of this method on this setting.

        2. More importantly: You **have** to make sure that you do not break
            compatibility with more general methods targetting a parent setting!
            It should always be the case that all methods designed for any of
            this Setting's parents should also be applicable via polymorphism,
            i.e., anything that is defined to work on the class `Animal` should
            also work on the class `Cat`!

        3. While not enforced, it is strongly encourged that you define your
            training/evaluation routines at a pretty high level, so that Methods
            that get applied to your Setting can make use of pytorch-lightning's
            `Trainer` & `LightningDataModule` API to be neat and fast.

        Parameters
        ----------
        method : Method
            A Method to apply on this Setting.

        config : Optional[Config]
            Optional configuration object with things like the log dir, the data
            dir, cuda, wandb config, etc. When None, will be parsed from the
            current command-line arguments.

        Returns
        -------
        Results
            An object that is used to measure or quantify the performance of the
            Method on this experimental Setting.
        """
        raise NotImplementedError()

    @abstractmethod
    def prepare_data(self, *args, **kwargs):
        pass

    @abstractmethod
    def setup(self, stage: Optional[str] = None):
        pass

    @abstractmethod
    def train_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]:
        pass

    @abstractmethod
    def val_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]:
        pass

    @abstractmethod
    def test_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]:
        pass

    @classmethod
    @abstractmethod
    def get_available_datasets(cls) -> Iterable[str]:
        """Returns an iterable of the names of available datasets."""

    # --- Below this are some class attributes and methods related to the Tree. ---

    # These are some "private" class attributes.
    # For any new Setting subclass, it's parent setting.
    _parent: ClassVar[Type["SettingABC"]] = None
    # A list of all the direct children of this setting.
    _children: ClassVar[Set[Type["SettingABC"]]] = set()
    # List of all methods that directly target this Setting.
    _targeted_methods: ClassVar[Set[Type["Method"]]] = set()

    def __init_subclass__(cls, **kwargs):
        """Called whenever a new subclass of `Setting` is declared."""
        # logger.debug(f"Registering a new setting: {cls.get_name()}")

        # Exceptionally, create this new empty list that will hold all the
        # forthcoming subclasses of this particular new setting.
        cls._children = set()
        cls._targeted_methods = set()
        # Inform the immediate parents in the tree that they have a new child.
        for immediate_parent in cls.get_immediate_parents():
            immediate_parent._children.add(cls)
        super().__init_subclass__(**kwargs)

    @classmethod
    def get_applicable_methods(cls) -> List[Type["Method"]]:
        """Returns all the Methods applicable on this Setting."""
        applicable_methods: List[Method] = []
        from sequoia.methods import get_all_methods

        for method_type in get_all_methods():
            if method_type.is_applicable(cls):
                applicable_methods.append(method_type)
        return applicable_methods

    @classmethod
    def register_method(cls, method: Type["Method"]):
        """Register a method as being Applicable on this type of Setting."""
        cls._targeted_methods.add(method)

    @classmethod
    def get_name(cls) -> str:
        """Gets the name of this Setting."""
        # LightningDataModule has a `name` class attribute of `...`!
        if getattr(cls, "name", None) != Ellipsis:
            return cls.name
        name = camel_case(cls.__qualname__)
        return remove_suffix(name, "_setting")

    @classmethod
    def immediate_children(cls) -> Iterable[Type["SettingABC"]]:
        """Returns the immediate children of this Setting in the hierarchy.
        In most cases, this will be a list with only one value.
        """
        yield from cls._children

    @classmethod
    def get_immediate_children(cls) -> List[Type["SettingABC"]]:
        """Returns a list of the immediate children of this Setting."""
        return list(cls.immediate_children())

    @classmethod
    def children(cls) -> Iterable[Type["SettingABC"]]:
        """Returns an Iterator over all the children of this Setting, in-order."""
        # Yield the immediate children.
        for child in cls._children:
            yield child
            # Yield from the children themselves.
            yield from child.children()

    @classmethod
    def get_children(cls) -> List[Type["SettingABC"]]:
        return list(cls.children())

    @classmethod
    def immediate_parents(cls) -> List[Type["SettingABC"]]:
        """Returns the immediate parent(s) Setting(s).
        In most cases, this will be a list with only one value.
        """
        return [parent for parent in cls.__bases__ if issubclass(parent, SettingABC)]

    @classmethod
    def get_immediate_parents(cls) -> List[Type["SettingABC"]]:
        """Returns the immediate parent(s) Setting(s).
        In most cases, this will be a list with only one value.
        """
        return cls.immediate_parents()

    @classmethod
    def parents(cls) -> Iterable[Type["SettingABC"]]:
        """yields the lineage, from bottom to top.

        NOTE: In the case of Settings having multiple parents (such as TraditionalSLSetting),
        this is still just a list that reflects the method resolution order for that
        setting.
        """
        return [
            parent_class for parent_class in cls.mro()[1:] if issubclass(parent_class, SettingABC)
        ]

    @classmethod
    def get_parents(cls) -> List[Type["SettingABC"]]:
        return list(cls.parents())

    @classmethod
    def get_path_to_source_file(cls: Type) -> Path:
        from sequoia.utils.utils import get_path_to_source_file

        return get_path_to_source_file(cls)

    @classmethod
    def get_tree_string(
        cls,
        formatting: str = "command_line",
        with_methods: bool = False,
        with_assumptions: bool = False,
        with_docstrings: bool = False,
    ) -> str:
        """Returns a string representation of the tree starting at this node downwards."""
        from sequoia.utils.readme import get_tree_string, get_tree_string_markdown

        formatting_functions = {
            "command_line": get_tree_string,
            "markdown": get_tree_string_markdown,
        }
        if formatting not in formatting_functions.keys():
            raise RuntimeError(
                f"formatting must be one of {','.join(formatting_functions)}, " f"got {formatting}"
            )
        return formatting_functions[formatting](
            cls,
            with_methods=with_methods,
            with_assumptions=with_assumptions,
            with_docstrings=with_docstrings,
        )


SettingType = TypeVar("SettingType", bound=SettingABC)


class Method(Generic[SettingType], Parseable, ABC):
    """ABC for a Method, which is a solution to a research problem (a Setting)."""

    # Class attribute that holds the setting this method was designed to target.
    # Needs to either be passed to the class statement or set as a class
    # attribute.
    target_setting: ClassVar[Type[SettingType]] = None

    _training: bool

    def configure(self, setting: SettingType) -> None:
        """Configures this method before it gets applied on the given Setting.

        Args:
            setting (SettingType): The setting the method will be evaluated on.
        """

    @abstractmethod
    def get_actions(
        self, observations: Observations, action_space: gym.Space
    ) -> Union[Actions, Any]:
        """Get a batch of predictions (actions) for the given observations.
        returned actions must fit the action space.
        """

    @abstractmethod
    def fit(
        self,
        train_env: Environment[Observations, Actions, Rewards],
        valid_env: Environment[Observations, Actions, Rewards],
    ):
        """Called by the Setting to give the method data to train with.

        Might be called more than once before training is 'complete'.
        """

    def test(self, test_env: Environment[Observations, Actions, Optional[Rewards]]):
        """(WIP) Optional method which could be called by the setting to give
        your Method more flexibility about how it wants to arrange the test env.

        Parameters
        ----------
        test_env : Environment[Observations, Actions, Optional[Rewards]]
            Test environment which monitors your actions, and in which you are
            only allowed a limited number of steps.
        """
        import tqdm

        pbar = tqdm.tqdm(desc="Testing")
        postfix = {}
        steps = 0
        episodes = 0
        while not test_env.is_closed():
            observations = test_env.reset()
            done = False
            episode_steps = 0
            while not (done or test_env.is_closed()):
                actions = self.get_actions(observations, action_space=test_env.action_space)
                observations, rewards, done, info = test_env.step(actions)
                steps += 1
                episode_steps += 1
                postfix.update(steps=steps, episode_steps=episode_steps)
                pbar.set_postfix(postfix)
            pbar.update()
            episodes += 1
            postfix.update(episodes=episodes)
        pbar.close()

    def receive_results(self, setting: SettingType, results: Results) -> None:
        """Receive the Results of applying this method on the given Setting.

        This method is optional.

        This will be called after the method has been successfully applied to
        a Setting, and could be used to log or persist the results somehow.

        Parameters
        ----------
        results : Results
            The `Results` object constructed by `setting`, as a result of applying
            this Method to it.
        """

        run_name = ""
        # Set the default name for this run.
        # run_name = f"{method_name}-{setting_name}"
        # dataset = getattr(self, "dataset", None)
        # if isinstance(dataset, str):
        #     run_name += f"-{dataset}"
        # if getattr(self, "nb_tasks", 0) > 1:
        #     run_name += f"_{self.nb_tasks}t"

        setting_name = setting.get_name()
        method_name = self.get_name()
        base_results_dir: Path = setting.config.log_dir / setting_name / method_name

        dataset_name = getattr(setting, "dataset", None)
        if isinstance(dataset_name, str):
            base_results_dir /= dataset_name

        if wandb.run and wandb.run.id:
            # if setting.wandb and setting.wandb.project:
            run_id = wandb.run.id
            assert isinstance(run_id, str)
            # results_dir = base_results_dir / run_id
            # TODO: Fix this:
            results_dir = wandb.run.dir
        else:
            for suffix in [f"run_{i}" for i in range(100)]:
                results_dir = base_results_dir / suffix
                try:
                    results_dir.mkdir(exist_ok=False, parents=True)
                except FileExistsError:
                    pass
                else:
                    break
            else:
                raise RuntimeError(
                    f"Unable to create a unique results dir under {base_results_dir} "
                )
        results_dir = Path(results_dir)
        logger.info(f"Saving results in directory {results_dir}")
        results_json_path = results_dir / "results.json"
        try:
            with open(results_json_path, "w") as f:
                json.dump(results.to_log_dict(), f)
        except Exception as e:
            print(f"Unable to save the results: {e}")

        setting_path = results_dir / "setting.yaml"
        try:
            setting.save(setting_path)
        except Exception as e:
            print(f"Unable to save the Setting: {e}")

        method_path = results_dir / "method.yaml"
        try:
            self.save(method_path)
        except Exception as e:
            print(f"Unable to save the Method: {e}")

        if wandb.run:
            wandb.save(str(results_json_path))
            if setting_path.exists():
                wandb.save(str(setting_path))
            if method_path.exists():
                wandb.save(str(method_path))

    def setup_wandb(self, run: Run) -> None:
        """Called by the Setting when using Weights & Biases, after `wandb.init`.

        This method is here to provide Methods with the opportunity to log some of their
        configuration options or hyper-parameters to wandb.

        NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
        this point.

        Parameters
        ----------
        run : wandb.Run
            Current wandb Run.
        """

    def set_training(self) -> None:
        """Called by the Setting to let the Method know it is in the "training" phase.

        By default, this will try to to look for any nn.Module attributes on `self`, and
        call their `train()` method.
        """
        self._training = True
        try:
            from torch import nn

            for attribute, value in vars(self).items():
                if isinstance(value, nn.Module):
                    logger.debug(f"Calling 'train()' on the Method's {attribute} attribute.")
                    value.train()
        except Exception as exc:
            logger.warning(f"Unable to call `train()` on nn.Modules of the Method: {exc}")

    def set_testing(self) -> None:
        """Called by the Setting to let the Method know when it is in "testing" phase.

        By default, this will try to to look for any nn.Module attributes on `self`, and
        call their `eval()` method.
        """
        self._training = False
        try:
            from torch import nn

            for attribute, value in vars(self).items():
                if isinstance(value, nn.Module):
                    logger.debug(f"Calling 'eval()' on the Method's {attribute} attribute.")
                    value.eval()
        except Exception as exc:
            logger.warning(f"Unable to call `eval()` on nn.Modules of the Method: {exc}")

    @property
    def training(self) -> bool:
        """Wether we're currently in the 'training' phase.

        Returns
        -------
        bool
            Wether we're in the 'training' phase or not.
        """
        return getattr(self, "_training", True)

    @property
    def testing(self) -> bool:
        """Wether we're currently in the 'testing' phase.

        Returns
        -------
        bool
            Wether we're in the 'testing' phase or not.
        """
        return not self.training

    # --------
    # Below this are some class attributes and methods related to the Tree
    # structure and for launching Experiments using this method.
    # --------

    @classmethod
    def main(cls, argv: Optional[Union[str, List[str]]] = None) -> Results:
        """Run an Experiment from the command-line using this method.

        (TODO: @lebrice Finish writing a good docstring here that explains how this works
        and how to use it.)
        You can then select which setting, dataset, etc. this method will be
        applied to using the --setting <setting_name>, and the rest of the
        arguments will be passed to the Setting's from_args method.
        """

        from sequoia.main import Experiment

        experiment: Experiment
        # Create the Method object from the command-line:
        method = cls.from_args(argv, strict=False)
        # Then create the 'Experiment' from the command-line, which makes it
        # possible to choose between all the settings.
        experiment = Experiment.from_args(argv, strict=False)
        # Set the method attribute to be the one parsed above.
        experiment.method = method
        results: Results = experiment.launch(argv)
        return results

    @classmethod
    def is_applicable(cls, setting: Union[SettingType, Type[SettingType]]) -> bool:
        """Returns wether this Method is applicable to the given setting.

        A method is applicable on a given setting if and only if the setting is
        the method's target setting, or if it is a descendant of the method's
        target setting (below the target setting in the tree).

        Concretely, since the tree is implemented as an inheritance hierarchy,
        a method is applicable to any setting which is an instance (or subclass)
        of its target setting.

        Args:
            setting (SettingABC): a Setting.

        Returns:
            bool: Wether or not this method is applicable on the given setting.
        """

        # if given an object, get it's type.
        if isinstance(setting, LightningDataModule):
            setting = type(setting)

        if not issubclass(setting, SettingABC) and issubclass(setting, LightningDataModule):
            # TODO: If we're trying to check if this method would be compatible
            # with a LightningDataModule, rather than a Setting, then we treat
            # that LightningModule the same way we would an TraditionalSLSetting.
            # i.e., if we're trying to apply a Method on something that isn't in
            # the tree, then we consider that datamodule as the TraditionalSLSetting node.
            from sequoia.settings import TraditionalSLSetting

            setting = TraditionalSLSetting

        return issubclass(setting, cls.target_setting)

    @classmethod
    def get_applicable_settings(cls) -> List[Type[SettingType]]:
        """Returns all settings on which this method is applicable.
        NOTE: This only returns 'concrete' Settings.
        """
        from sequoia.settings import all_settings

        return list(filter(cls.is_applicable, all_settings))
        # This would return ALL the setting:
        # return list([cls.target_setting, *cls.target_setting.children()])

    @classmethod
    def all_evaluation_settings(cls, **kwargs) -> Iterable[SettingType]:
        """Generator over all the combinations of Settings/datasets on which
        this method is applicable.

        If keyword arguments are passed, they will be passed to the constructor
        of each setting.
        """
        for setting_type in cls.get_applicable_settings():
            for dataset in setting_type.get_available_datasets():
                setting = setting_type(dataset=dataset, **kwargs)
                yield setting

    @classmethod
    def get_name(cls) -> str:
        """Gets the name of this method class."""
        name = getattr(cls, "name", None)
        if name is None:
            name = camel_case(cls.__qualname__)
            name = remove_suffix(name, "_method")
        return name

    @classmethod
    def get_family(cls) -> Optional[str]:
        """Gets the name of the 'family' of Methods which contains this method class.

        This is used to differentiate methods with the same name, for instance
        sb3/DQN versus pl_bolts/DQN, sequoia/EWC vs avalanche/EWC, etc.
        """
        return getattr(cls, "family", None)

    @classmethod
    def get_full_name(cls) -> str:
        """Gets the 'full name' of a method, which is the "{family}.{name}" if the
        family is set, and just the name otherwise.

        The full name is used as the option on the command-line.
        """
        name = cls.get_name()
        family = cls.get_family()
        return f"{family}.{name}" if family is not None else name

    def __init_subclass__(cls, target_setting: Type[SettingType] = None, **kwargs) -> None:
        """Called when creating a new subclass of Method.

        Args:
            target_setting (Type[Setting], optional): The target setting.
                Defaults to None, in which case the method will inherit the
                target setting of it's parent class.
        """
        if target_setting:
            cls.target_setting = target_setting
        elif getattr(cls, "target_setting", None):
            target_setting = cls.target_setting
        else:
            raise RuntimeError(
                f"You must either pass a `target_setting` argument to the "
                f"class statement or have a `target_setting` class variable "
                f"when creating a new subclass of {__class__}."
            )
        # Register this new method on the Setting.
        target_setting.register_method(cls)
        return super().__init_subclass__(**kwargs)

    @classmethod
    def get_path_to_source_file(cls) -> Path:
        return get_path_to_source_file(cls)

    def get_experiment_name(self, setting: SettingABC, experiment_id: str = None) -> str:
        """Gets a unique name for the experiment where `self` is applied to `setting`.

        This experiment name will be passed to `orion` when performing a run of
        Hyper-Parameter Optimization.

        Parameters
        ----------
        - setting : Setting

            The `Setting` onto which this method will be applied. This method will be used when

        - experiment_id: str, optional

            A custom hash to append to the experiment name. When `None` (default), a
            unique hash will be created based on the values of the Setting's fields.

        Returns
        -------
        str
            The name for the experiment.
        """
        if not experiment_id:
            setting_dict = setting.to_dict()
            # BUG: Some settings have non-string keys/value or something?
            d = flatten_dict(setting_dict)
            experiment_id = compute_identity(size=5, **d)
        assert isinstance(setting.dataset, str), "assuming that dataset is a str for now."
        return f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}"

    def get_search_space(self, setting: SettingABC) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        raise NotImplementedError(
            "You need to provide an implementation for the `get_search_space` method "
            "in order to enable HPO sweeps."
        )

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        NOTE: It is very strongly recommended that you always re-create your model and
        any modules / components that depend on these hyper-parameters inside the
        `configure` method! (Otherwise these new hyper-parameters will not be used in
        the next run)

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        raise NotImplementedError(
            "You need to provide an implementation for the `adapt_to_new_hparams` "
            "method in order to enable HPO sweeps."
        )

    def hparam_sweep(
        self,
        setting: SettingABC,
        search_space: Dict[str, Union[str, Dict]] = None,
        experiment_id: str = None,
        database_path: Union[str, Path] = None,
        max_runs: int = None,
        hpo_algorithm: Union[str, Dict] = "BayesianOptimizer",
        debug: bool = False,
    ) -> Tuple[Dict, float]:
        """Performs a Hyper-Parameter Optimization sweep using orion.

        Changes the values in `self.hparams` iteratively, returning the best hparams
        found so far.

        Parameters
        ----------
        setting : Setting
            Setting to run the sweep on.

        search_space : Dict[str, Union[str, Dict]], optional
            Search space of the hyper-parameter optimization algorithm. Defaults to
            `None`, in which case the result of the `get_search_space` method is used.

        experiment_id : str, optional
            Unique Id to use when creating the experiment in Orion. Defaults to `None`,
            in which case a hash of the `setting`'s fields is used.

        database_path : Union[str, Path], optional
            Path to a pickle file to be used by Orion to store the hyper-parameters and
            their corresponding values. Default to `None`, in which case the database is
            created at path `./orion_db.pkl`.

        max_runs : int, optional
            Maximum number of runs to perform. Defaults to `None`, in which case the run
            lasts until the search space is exhausted.

        hpo_algorithm : Union[str, Dict], optional
            The hyper-parameter optimization algorithms to use.

        debug : bool, optional
            Wether to run Orion in debug-mode, where the database is an EphemeralDb,
            meaning it gets created for the sweep and destroyed at the end of the sweep.

        Returns
        -------
        Tuple[BaseModel.HParams, float]
            Best HParams, and the corresponding performance.
        """
        try:
            from orion.client import build_experiment
            from orion.core.worker.trial import Trial
        except ImportError as e:
            raise RuntimeError(
                f"Need to install the optional dependencies for HPO, using "
                f"`pip install -e .[hpo]` (error: {e})"
            ) from e

        search_space = search_space or self.get_search_space(setting)
        logger.info("HPO Search space:\n" + json.dumps(search_space, indent="\t"))

        database_path: Path = Path(database_path or "./orion_db.pkl")
        logger.info(f"Will use database at path '{database_path}'.")
        experiment_name = self.get_experiment_name(setting, experiment_id=experiment_id)

        experiment = build_experiment(
            name=experiment_name,
            space=search_space,
            debug=debug,
            algorithms=hpo_algorithm,
            max_trials=max_runs,
            storage={
                "type": "legacy",
                "database": {"type": "pickleddb", "host": str(database_path)},
            },
        )

        previous_trials: List[Trial] = experiment.fetch_trials_by_status("completed")
        # Since Orion works in a 'lower is better' fashion, so if the `objective` of the
        # Results class for the given Setting have "higher is better", we negate the
        # objectives when extracting them and again before submitting them to Orion.
        lower_is_better = setting.Results.lower_is_better
        sign = 1 if lower_is_better else -1
        if previous_trials:
            logger.info(
                f"Using existing Experiment {experiment} which has "
                f"{len(previous_trials)} existing trials."
            )
        else:
            logger.info(f"Created new experiment with name {experiment_name}")

        trials_performed = 0
        failed_trials = 0

        red = partial(colorize, color="red")
        green = partial(colorize, color="green")

        while not (experiment.is_done or failed_trials == 3):
            # Get a new suggestion of hparams to try:
            trial: Trial = experiment.suggest()

            # ---------
            # (Re)create the Model with the suggested Hparams values.
            # ---------

            new_hparams: Dict = trial.params
            # Inner function, just used to make the code below a bit simpler.
            # TODO: We should probably also change some values in the Config (e.g.
            # log_dir, checkpoint_dir, etc) between runs.
            logger.info("Suggested values for this run:\n" + json.dumps(new_hparams, indent="\t"))
            self.adapt_to_new_hparams(new_hparams)

            # ---------
            # Evaluate the (adapted) method on the setting:
            # ---------
            try:
                result: Results = setting.apply(self)
            except Exception:

                logger.error(red("Encountered an error, this trial will be dropped:"))
                logger.error(red("-" * 60))
                with StringIO() as s:
                    traceback.print_exc(file=s)
                    s.seek(0)
                    logger.error(red(s.read()))
                logger.error(red("-" * 60))
                failed_trials += 1
                logger.error(red(f"({failed_trials} failed trials so far). "))

                experiment.release(trial)
            else:
                # Report the results to Orion:
                orion_result = dict(
                    name=result.objective_name,
                    type="objective",
                    value=sign * result.objective,
                )
                experiment.observe(trial, [orion_result])
                trials_performed += 1
                logger.info(
                    green(
                        f"Trial #{trials_performed}: {result.objective_name} = {result.objective}"
                    )
                )
                # Receive the results, maybe log to wandb, whatever you wanna do.
                self.receive_results(setting, result)

        logger.info(
            "Experiment statistics: \n"
            + "\n".join(f"\t{key}: {value}" for key, value in experiment.stats.items())
        )
        logger.info(f"Number of previous trials: {len(previous_trials)}")
        logger.info(f"Trials successfully completed by this worker: {trials_performed}")
        logger.info(f"Failed Trials attempted by this worker: {failed_trials}")

        if "best_trials_id" not in experiment.stats:
            raise RuntimeError("Can't find the best trial, experiment might be broken!")

        best_trial: Trial = experiment.get_trial(uid=experiment.stats["best_trials_id"])
        best_hparams = best_trial.params
        best_objective = best_trial.objective
        return best_hparams, best_objective


================================================
FILE: sequoia/settings/base/environment.py
================================================
"""Defines the Abstract Base class for an "Environment".

NOTE (@lebrice): This 'Environment' abstraction isn't super useful at the moment
because there's only the `ActiveDataLoader` that fits this interface (since we
can't send anything to the usual DataLoader).
"""
from abc import ABC
from typing import Generic

import gym

from sequoia.utils.logging_utils import get_logger

from .objects import ActionType, ObservationType, RewardType

logger = get_logger(__name__)

from abc import abstractmethod


class Environment(
    gym.Env,
    Generic[ObservationType, ActionType, RewardType],
    ABC,
):
    """ABC for a learning 'environment' in *both* Supervised and Reinforcement Learning.

    Different settings can implement this interface however they want.
    """

    reward_space: gym.Space

    # @abstractmethod
    def is_closed(self) -> bool:
        """Returns wether this environment is closed."""
        if hasattr(self, "env") and hasattr(self.env, "is_closed"):
            return self.env.is_closed()
        raise NotImplementedError(self)


================================================
FILE: sequoia/settings/base/objects.py
================================================
from dataclasses import dataclass
from typing import Generic, TypeVar

import numpy as np
from torch import Tensor

from sequoia.common import Batch


@dataclass(frozen=True)
class Observations(Batch):
    """A batch of "observations" coming from an Environment."""

    x: Tensor

    @property
    def state(self) -> Tensor:
        return self.x

    def __len__(self) -> int:
        return self.batch_size


@dataclass(frozen=True)
class Actions(Batch):
    """A batch of "actions" coming from an Environment.

    For example, in a supervised setting, this would be the predicted labels,
    while in an RL setting, this would be the next 'actions' to take in the
    Environment.
    """

    y_pred: Tensor

    @property
    def actions(self) -> Tensor:
        return self.y_pred

    @property
    def actions_np(self) -> np.ndarray:
        """Returns the prediction/action as a numpy array."""
        if isinstance(self.y_pred, Tensor):
            return self.y_pred.detach().cpu().numpy()
        return np.asarray(self.y_pred)

    @property
    def predictions(self) -> Tensor:
        return self.y_pred


T = TypeVar("T")


@dataclass(frozen=True)
class Rewards(Batch, Generic[T]):
    """A batch of "rewards" coming from an Environment.

    For example, in a supervised setting, this would be the true labels, while
    in an RL setting, this would be the 'reward' for a state-action pair.

    TODO: Maybe add the task labels as a part of the 'Reward', to help with the
    training of task-inference methods later on when we add those.
    """

    # TODO: Rename this to 'reward', and add a 'y' field in the 'DenseRewards' class.
    y: T

    @property
    def labels(self) -> T:
        return self.y

    @property
    def reward(self) -> T:
        return self.y


ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)


================================================
FILE: sequoia/settings/base/results.py
================================================
"""In the current setup, `Results` objects are created by a Setting when a
method is applied to them. Each setting can define its own type of `Results` to
customize what the ‘objective’ is in that particular setting.
For instance, the TaskIncrementalSLSetting class also defines a
TaskIncrementalResults class, where the average accuracy across all tasks is the
objective.

We currently have a unit testing setup that, for a given Method class, performs
a quick run of training / testing (using the --fast_dev_run option from
Pytorch-Lightning).
In those tests, there is also a `validate_results` function, which is basically
used to make sure that the results make sense, for the given method and setting.

For instance, when testing a RandomBaselineMethod on an TraditionalSLSetting, the accuracy
should be close to chance level. Likewise, in the `baseline_test.py` file, we
make sure that the BaseMethod (just a classifier, no CL adjustments) also
exhibits catastrophic forgetting when applied on a Class or Task Incremental
Setting.
"""

from abc import ABC, abstractmethod
from dataclasses import dataclass
from functools import total_ordering
from pathlib import Path
from typing import Any, ClassVar, Dict, TypeVar, Union

import matplotlib.pyplot as plt
from simple_parsing import Serializable

from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


@dataclass
@total_ordering
class Results(Serializable, ABC):
    """Represents the results of an experiment.

    Here you can define what the quantity to maximize/minize is. This class
    should also be used to create the plots that will be helpful to understand
    and compare different results.

    TODO: Add wandb logging here somehow.
    """

    lower_is_better: ClassVar[bool] = False
    # Name for the 'objective'.
    objective_name: ClassVar[str] = "Objective"

    @property
    @abstractmethod
    def objective(self) -> float:
        """Returns a float value that indicating how "good" this result is.

        If the `lower_is_better` class variable is set to `False` (default),
        then this
        """
        raise NotImplementedError("Each Result subclass should implement this.")

    @abstractmethod
    def summary(self) -> str:
        """Gives a string describing the results, in a way that is easy to understand.

        :return: A summary of the results.
        :rtype: str
        """

    @abstractmethod
    def make_plots(self) -> Dict[str, plt.Figure]:
        """Generates the plots that are useful for understanding/interpreting or
        comparing this kind of results.

        :return: A dictionary mapping from plot name to the matplotlib figure.
        :rtype: Dict[str, plt.Figure]
        """

    @abstractmethod
    def to_log_dict(self, verbose: bool = False) -> Dict[str, Any]:
        """Create a dict version of the results, to be logged to wandb"""
        return {self.objective_name: self.objective}

    def save(self, path: Union[str, Path], dump_fn=None, **kwargs) -> None:
        path = Path(path)
        path.parent.mkdir(exist_ok=True, parents=True)
        return super().save(path, dump_fn=dump_fn, **kwargs)

    def save_to_dir(self, save_dir: Union[str, Path], filename: str = "results.json") -> None:
        save_dir = Path(save_dir)
        save_dir.mkdir(exist_ok=True, parents=True)

        print(f"Results summary:")
        self.summary

        results_dump_file = save_dir / filename
        self.save(results_dump_file)
        print(f"Saved a copy of the results to {results_dump_file}")

        plots: Dict[str, plt.Figure] = self.make_plots()
        plot_paths: Dict[str, Path] = {}
        for fig_name, figure in plots.items():
            print(f"fig_name: {fig_name}")
            # figure.show()
            # plt.waitforbuttonpress(10)
            path = (save_dir / fig_name).with_suffix(".jpg")
            path.parent.mkdir(exist_ok=True, parents=True)
            figure.savefig(path)
            # print(f"Saved figure at path {path}")
            plot_paths[fig_name] = path
        print(f"\nSaved Plots to: {plot_paths}\n")

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Results):
            return self.objective == other.objective
        elif isinstance(other, float):
            return self.objective == other
        return NotImplemented

    def __gt__(self, other: Any) -> bool:
        if isinstance(other, Results):
            return self.objective > other.objective
        elif isinstance(other, float):
            return self.objective > other
        return NotImplemented


ResultsType = TypeVar("ResultsType", bound=Results)


================================================
FILE: sequoia/settings/base/setting.py
================================================
""" This module defines the `Setting` class, an ML "problem" to solve.

The `Setting` class is an abstract base class which should represent the most
general learning setting imaginable, i.e. with the fewest assumptions about the
data, the environment, the agent, etc.


The Setting class is currently loosely based on the `LightningDataModule` class
from pytorch-lightning, with the goal of having an `TraditionalSLSetting` node somewhere
in the tree, which would be totally interchangeable with existing datamodules
from pytorch-lightning.

The hope is that by staying close to that API, we can make it easier for people
to adopt the repo, and also, if possible, directly reuse existing models from
pytorch-lightning.

See: [Pytorch-Lightning](https://pytorch-lightning.readthedocs.io/en/latest/)
See: [LightningDataModule](https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html)

"""
import itertools
import sys
import typing
from abc import abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any, ClassVar, Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union

import gym
import numpy as np
import torch
from gym import spaces
from pytorch_lightning import LightningDataModule
from simple_parsing import Serializable, field
from torch import Tensor

from sequoia.common.config import Config, WandbConfig
from sequoia.common.metrics import Metrics

if typing.TYPE_CHECKING:
    from sequoia.common.transforms import Compose
from sequoia.common.transforms.transform_enum import Transforms

from sequoia.settings.base.bases import Method, SettingABC
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import Actions, Observations, Rewards
from sequoia.settings.base.results import Results, ResultsType
from sequoia.settings.base.setting_meta import SettingMeta
from sequoia.settings.presets import setting_presets
from sequoia.utils import Parseable, get_logger
from sequoia.utils.utils import take

logger = get_logger(__name__)

SettingType = TypeVar("SettingType", bound="Setting")
EnvironmentType = TypeVar("EnvironmentType", bound=Environment)


@dataclass
class Setting(
    SettingABC,
    Parseable,
    Serializable,
    LightningDataModule,
    Generic[EnvironmentType],
    metaclass=SettingMeta,
):
    """Base class for all research settings in ML: Root node of the tree.

    A 'setting' is loosely defined here as a learning problem with a specific
    set of assumptions, restrictions, and an evaluation procedure.

    For example, Reinforcement Learning is a type of Setting in which we assume
    that an Agent is able to observe an environment, take actions upon it, and
    receive rewards back from the environment. Some of the assumptions include
    that the reward is dependant on the action taken, and that the actions have
    an impact on the environment's state (and on the next observations the agent
    will receive). The evaluation procedure consists in trying to maximize the
    reward obtained from an environment over a given number of steps.

    This 'Setting' class should ideally represent the most general learning
    problem imaginable, with almost no assumptions about the data or evaluation
    procedure.

    This is a dataclass. Its attributes are can also be used as command-line
    arguments using `simple_parsing`.

    Abstract (required) methods:
    - **apply** Applies a given Method on this setting to produce Results.
    - **prepare_data** (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode).
    - **setup**  (things to do on every accelerator in distributed mode).
    - **train_dataloader** the training environment/dataloader.
    - **val_dataloader** the val environments/dataloader(s).
    - **test_dataloader** the test environments/dataloader(s).

    "Abstract"-ish (required) class attributes:
    - `Results`: The class of Results that are created when applying a Method on
      this setting.
    - `Observations`: The type of Observations that will be produced  in this
        setting.
    - `Actions`: The type of Actions that are expected from this setting.
    - `Rewards`: The type of Rewards that this setting will (potentially) return
      upon receiving an action from the method.
    """

    # ---------- Class Variables -------------
    # Fields in this block are class attributes. They don't create command-line
    # arguments.

    # Type of Observations that the dataloaders (a.k.a. "environments") will
    # produce for this type of Setting.
    Observations: ClassVar[Type[Observations]] = Observations
    # Type of Actions that the dataloaders (a.k.a. "environments") will receive
    # through their `send` method, for this type of Setting.
    Actions: ClassVar[Type[Actions]] = Actions
    # Type of Rewards that the dataloaders (a.k.a. "environments") will return
    # after receiving an action, for this type of Setting.
    Rewards: ClassVar[Type[Rewards]] = Rewards

    # The type of Results that are given back when a method is applied on this
    # Setting. The `Results` class basically defines the 'evaluation metric' for
    # a given type of setting. See the `Results` class for more info.
    Results: ClassVar[Type[Results]] = Results

    available_datasets: ClassVar[Dict[str, Any]] = {}

    # Transforms to be applied to the observatons of the train/valid/test
    # environments.
    transforms: Optional[List[Transforms]] = None

    # Transforms to be applied to the training datasets.
    train_transforms: Optional[List[Transforms]] = None
    # Transforms to be applied to the validation datasets.
    val_transforms: Optional[List[Transforms]] = None
    # Transforms to be applied to the testing datasets.
    test_transforms: Optional[List[Transforms]] = None

    # Fraction of training data to use to create the validation set.
    # (Only applicable in Passive settings.)
    val_fraction: float = 0.2

    # TODO: Still not sure where exactly we should be adding the 'batch_size'
    # and 'num_workers' arguments. Adding it here for now with cmd=False, so
    # that they can be passed to the constructor of the Setting.
    batch_size: Optional[int] = field(default=None, cmd=False)
    num_workers: Optional[int] = field(default=None, cmd=False)

    # # TODO: Add support for semi-supervised training.
    # # Fraction of the dataset that is labeled.
    # labeled_data_fraction: int = 1.0
    # # Number of labeled examples.
    # n_labeled_examples: Optional[int] = None

    # Options related to Weights & Biases (wandb). Turned Off by default. Passing any of
    # its arguments will enable wandb.
    # NOTE: Adding `cmd=False` here, so we only create the args in `Experiment`.
    # TODO: Fix this up.
    wandb: Optional[WandbConfig] = field(default=None, compare=False, cmd=False)

    # Group of configuration options like log_dir, data dir, etc.
    # TODO: It's a bit confusing to also have a `config` attribute on the
    # Setting. Might want to change this a bit.
    config: Optional[Config] = field(default=None, cmd=False)

    def __post_init__(
        self,
        observation_space: gym.Space = None,
        action_space: gym.Space = None,
        reward_space: gym.Space = None,
    ):
        """Initializes the fields of the setting that weren't set from the
        command-line.
        """
        from sequoia.common.transforms import Compose

        logger.debug("__post_init__ of Setting")
        # BUG: simple-parsing sometimes parses a list with a single item, itself the
        # list of transforms. Not sure if this still happens.

        def is_list_of_list(v: Any) -> bool:
            return isinstance(v, list) and len(v) == 1 and isinstance(v[0], list)

        if is_list_of_list(self.train_transforms):
            self.train_transforms = self.train_transforms[0]
        if is_list_of_list(self.val_transforms):
            self.val_transforms = self.val_transforms[0]
        if is_list_of_list(self.test_transforms):
            self.test_transforms = self.test_transforms[0]

        # if all(
        #     t is None
        #     for t in [
        #         self.transforms,
        #         self.train_transforms,
        #         self.val_transforms,
        #         self.test_transforms,
        #     ]
        # ):
        #     # Use these two transforms by default if no transforms are passed at all.
        #     # TODO: Remove this after the competition perhaps.
        #     self.transforms = Compose([Transforms.to_tensor, Transforms.three_channels])

        # TODO: Should change this, so that these transform fields are only the
        # additional transforms compared to `self.transforms` (the 'base' transforms)
        # If the constructor is called with just the `transforms` argument, like this:
        # <SomeSetting>(dataset="bob", transforms=foo_transform)
        # Then we use this value as the default for the train, val and test transforms.
        if self.transforms and not any(
            [self.train_transforms, self.val_transforms, self.test_transforms]
        ):
            if not isinstance(self.transforms, list):
                self.transforms = Compose([self.transforms])
            self.train_transforms = self.transforms.copy()
            self.val_transforms = self.transforms.copy()
            self.test_transforms = self.transforms.copy()

        if self.train_transforms is not None and not isinstance(self.train_transforms, list):
            self.train_transforms = [self.train_transforms]

        if self.val_transforms is not None and not isinstance(self.val_transforms, list):
            self.val_transforms = [self.val_transforms]

        if self.test_transforms is not None and not isinstance(self.test_transforms, list):
            self.test_transforms = [self.test_transforms]

        # Actually compose the list of Transforms or callables into a single transform.
        self.train_transforms = Compose(self.train_transforms or [])
        self.val_transforms = Compose(self.val_transforms or [])
        self.test_transforms = Compose(self.test_transforms or [])

        LightningDataModule.__init__(
            self,
            train_transforms=self.train_transforms,
            val_transforms=self.val_transforms,
            test_transforms=self.test_transforms,
        )

        self._observation_space = observation_space
        self._action_space = action_space
        self._reward_space = reward_space

        self.train_env: Environment = None  # type: ignore
        self.val_env: Environment = None  # type: ignore
        self.test_env: Environment = None  # type: ignore

    @abstractmethod
    def apply(self, method: Method, config: Config = None) -> "Setting.Results":
        # NOTE: The actual train/test loop should be defined in a more specific
        # setting. This is just here as an illustration of what that could look
        # like.
        raise NotImplementedError("this is just here for illustration purposes. ")

        method.fit(
            train_env=self.train_dataloader(),
            valid_env=self.val_dataloader(),
        )

        # Test loop:
        test_env = self.test_dataloader()
        test_metrics = []
        # Number of episodes to test on:
        n_test_episodes = 1

        # Perform a set number of episodes in the test environment.
        for episode in range(n_test_episodes):
            # Get initial observations.
            observations = test_env.reset()

            for i in itertools.count():
                # Get the predictions/actions for a batch of observations.
                actions = method.get_actions(observations, test_env.action_space)
                observations, rewards, done, info = test_env.step(actions)
                # Calculate the 'metrics' (TODO: This should be done be in the env!)
                batch_metrics = ...
                test_metrics.append(batch_metrics)
                if done:
                    break

        return self.Results(test_metrics=test_metrics)

    def get_metrics(self, actions: Actions, rewards: Rewards) -> Union[float, Metrics]:
        """Calculate the "metric" from the model predictions (actions) and the true labels (rewards).

        In this example, we return a 'Metrics' object:
        - `ClassificationMetrics` for classification problems,
        - `RegressionMetrics` for regression problems.

        We use these objects because they are awesome (they basically simplify
        making plots, wandb logging, and serialization), but you can also just
        return floats if you want, no problem.

        TODO: This is duplicated from Incremental. Need to fix this.
        """
        from sequoia.common.metrics import get_metrics

        # In this particular setting, we only use the y_pred from actions and
        # the y from the rewards.
        if isinstance(actions, Actions):
            actions = torch.as_tensor(actions.y_pred)
        if isinstance(rewards, Rewards):
            rewards = torch.as_tensor(rewards.y)
        # TODO: At the moment there's this problem, ClassificationMetrics wants
        # to create a confusion matrix, which requires 'logits' (so it knows how
        # many classes.
        if isinstance(actions, Tensor):
            actions = actions.cpu().numpy()
        if isinstance(rewards, Tensor):
            rewards = rewards.cpu().numpy()

        if isinstance(self.action_space, spaces.Discrete):
            batch_size = rewards.shape[0]
            actions = torch.as_tensor(actions)
            if len(actions.shape) == 1 or (actions.shape[-1] == 1 and self.action_space.n != 2):
                fake_logits = torch.zeros([batch_size, self.action_space.n], dtype=int)
                # FIXME: There must be a smarter way to do this indexing.
                for i, action in enumerate(actions):
                    fake_logits[i, action] = 1
                actions = fake_logits

        return get_metrics(y_pred=actions, y=rewards)

    @property
    def image_space(self) -> Optional[gym.Space]:
        if isinstance(self.observation_space, spaces.Box):
            return self.observation_space
        if isinstance(self.observation_space, spaces.Tuple):
            assert isinstance(self.observation_space["x"], spaces.Box)
            return self.observation_space["x"]
        if isinstance(self.observation_space, spaces.Dict):
            return self.observation_space.spaces["x"]
        logger.warning(
            f"Don't know what the image space is. "
            f"(self.observation_space={self.observation_space})"
        )
        return None

    @property
    def observation_space(self) -> gym.Space:
        return self._observation_space

    @observation_space.setter
    def observation_space(self, value: gym.Space) -> None:
        """Sets a the observation space.

        NOTE: This also changes the value of the `dims` attribute and the result
        of the `size()` method from LightningDataModule.
        """
        if not isinstance(value, gym.Space):
            raise RuntimeError(f"Value must be a `gym.Space` (got {value})")
        if not self._dims:
            if isinstance(value, spaces.Box):
                self.dims = value.shape
            elif isinstance(value, spaces.Tuple):
                self.dims = tuple(space.shape for space in value.spaces)
            elif isinstance(value, spaces.Dict) and "x" in value.spaces:
                self.dims = value.spaces["x"].shape
            else:
                raise NotImplementedError(
                    f"Don't know how to set the 'dims' attribute using "
                    f"observation space {value}"
                )
        self._observation_space = value

    @property
    def action_space(self) -> gym.Space:
        return self._action_space

    @action_space.setter
    def action_space(self, value: gym.Space) -> None:
        self._action_space = value

    @property
    def reward_space(self) -> gym.Space:
        return self._reward_space

    @reward_space.setter
    def reward_space(self, value: gym.Space) -> None:
        self._reward_space = value

    @classmethod
    def get_available_datasets(cls) -> Iterable[str]:
        """Returns an iterable of strings which represent the names of datasets."""
        return cls.available_datasets

    def _setup_config(self, method: Method) -> Config:
        config: Config
        if isinstance(getattr(method, "config", None), Config):
            config = method.config
            logger.debug(f"Using Config from the Method: {config}")
        elif isinstance(getattr(self, "config", None), Config):
            config = self.config
            logger.debug(f"Using Config from the Setting: {config}")
        else:
            argv = self._argv
            if argv:
                logger.debug(f"Parsing the Config from the command-line arguments ({argv})")
            else:
                logger.debug(f"Parsing the config from the current command-line arguments.")
            config = Config.from_args(argv, strict=False)
        return config

    @classmethod
    def main(cls, argv: Optional[Union[str, List[str]]] = None) -> Results:
        from sequoia.main import Experiment

        experiment: Experiment
        # Create the Setting object from the command-line:
        setting = cls.from_args(argv)
        # Then create the 'Experiment' from the command-line, which makes it
        # possible to choose between all the methods.
        experiment = Experiment.from_args(argv)
        # fix the setting attribute to be the one parsed above.
        experiment.setting = setting
        results: ResultsType = experiment.launch(argv)
        return results

    def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type["Method"], Results]:
        applicable_methods = self.get_applicable_methods()
        from sequoia.methods import Method

        all_results: Dict[Type[Method], Results] = {}
        config = Config.from_args(argv)
        for method_type in applicable_methods:
            method = method_type.from_args(argv)
            results = self.apply(method, config)
            all_results[method_type] = results
        logger.info(f"All results for setting of type {type(self)}:")
        logger.info(
            {
                method.get_name(): (results.get_metric() if results else "crashed")
                for method, results in all_results.items()
            }
        )
        return all_results

    def _check_environments(self):
        """Do a quick check to make sure that interacting with the envs/dataloaders
        works correctly.
        """
        # Check that the env's spaces are batched versions of the settings'.
        from gym.vector.utils import batch_space

        from sequoia.settings.sl import PassiveEnvironment

        batch_size = self.batch_size
        for loader_method in [
            self.train_dataloader,
            self.val_dataloader,
            self.test_dataloader,
        ]:
            print(f"\n\nChecking loader method {loader_method.__name__}\n\n")
            env = loader_method(batch_size=batch_size)

            batch_size = env.batch_size

            # We could compare the spaces directly, but that's a bit messy, and
            # would be depends on the type of spaces for each. Instead, we could
            # check samples from such spaces on how the spaces are batched.
            if batch_size:
                expected_observation_space = batch_space(self.observation_space, n=batch_size)
                expected_action_space = batch_space(self.action_space, n=batch_size)
                expected_reward_space = batch_space(self.reward_space, n=batch_size)
            else:
                expected_observation_space = self.observation_space
                expected_action_space = self.action_space
                expected_reward_space = self.reward_space

            # TODO: Batching the 'Sparse' makes it really ugly, so just
            # comparing the 'image' portion of the space for now.
            assert env.observation_space["x"].shape == expected_observation_space[0].shape, (
                env.observation_space["x"],
                expected_observation_space[0],
            )

            assert env.action_space == expected_action_space, (
                env.action_space,
                expected_action_space,
            )
            assert env.reward_space == expected_reward_space, (
                env.reward_space,
                expected_reward_space,
            )

            # Check that the 'gym API' interaction is working correctly.
            reset_obs: Observations = env.reset()
            self._check_observations(env, reset_obs)

            for i in range(5):
                actions = env.action_space.sample()
                self._check_actions(env, actions)
                step_observations, step_rewards, done, info = env.step(actions)
                self._check_observations(env, step_observations)
                self._check_rewards(env, step_rewards)
                if batch_size:
                    assert not any(done)
                else:
                    assert not done
                # assert not (done if isinstance(done, bool) else any(done))

            for batch in take(env, 5):
                observations: Observations
                rewards: Optional[Rewards]

                if isinstance(env, PassiveEnvironment):
                    observations, rewards = batch
                else:
                    # in RL atm, the 'dataset' gives back only the observations.
                    # Coul
                    observations, rewards = batch, None

                self._check_observations(env, observations)
                if rewards is not None:
                    self._check_rewards(env, rewards)

                if batch_size:
                    actions = tuple(self.action_space.sample() for _ in range(batch_size))
                else:
                    actions = self.action_space.sample()
                # actions = self.Actions(torch.as_tensor(actions))
                rewards = env.send(actions)
                self._check_rewards(env, rewards)

            env.close()

    def _check_observations(self, env: Environment, observations: Any):
        """Check that the given observation makes sense for the given environment.

        TODO: This should probably not be in this file here. It's more used for
        testing than anything else.
        """
        assert isinstance(observations, self.Observations), observations
        images = observations.x
        assert isinstance(images, (torch.Tensor, np.ndarray))
        if isinstance(images, Tensor):
            images = images.cpu().numpy()

        # Find the 'image' space:
        if isinstance(env.observation_space, spaces.Box):
            image_space = env.observation_space
        elif isinstance(env.observation_space, spaces.Tuple):
            image_space = env.observation_space["x"]
        else:
            raise RuntimeError(
                f"Don't know how to find the image space in the "
                f"env's obs space ({env.observation_space})."
            )
        assert images in image_space

    def _check_actions(self, env: Environment, actions: Any):
        if isinstance(actions, Actions):
            assert isinstance(actions, self.Actions)
            actions = actions.y_pred.cpu().numpy()
        elif isinstance(actions, Tensor):
            actions = actions.cpu().numpy()
        elif isinstance(actions, np.ndarray):
            actions = actions
        assert actions in env.action_space

    def _check_rewards(self, env: Environment, rewards: Any):
        if isinstance(rewards, Rewards):
            assert isinstance(rewards, self.Rewards)
            rewards = rewards.y
        if isinstance(rewards, Tensor):
            rewards = rewards.cpu().numpy()
        if isinstance(rewards, np.ndarray):
            rewards = rewards
        if isinstance(rewards, (int, float)):
            rewards = np.asarray(rewards)
        assert rewards in env.reward_space, (rewards, env.reward_space)

    # Just to make type hinters stop throwing errors when using the constructor
    # to create a Setting.
    def __new__(cls, *args, **kwargs):
        return super().__new__(cls, *args, **kwargs)

    @classmethod
    def load_benchmark(cls: Type[SettingType], benchmark: Union[str, Path]) -> SettingType:
        """Load the given "benchmark" (pre-configured Setting) of this type.

        Parameters
        ----------
        cls : Type[SettingType]
            Type of Setting to create.
        benchmark : Union[str, Path]
            Either the name of a benchmark (e.g. "cartpole_state", "monsterkong", etc.)
            or a path to a json/yaml file.

        Returns
        -------
        SettingType
            Setting of type `cls`, appropriately populated according to the chosen
            benchmark.

        Raises
        ------
        RuntimeError
            If `benchmark` isn't an existing file or a known preset.
        RuntimeError
            If any command-line arguments are present in sys.argv which would be ignored
            when creating this setting.
        """
        # If the provided benchmark isn't a path, try to get the value from
        # the `setting_presets` dict. If it isn't in the dict, raise an
        # error.
        if not Path(benchmark).is_file():
            if benchmark in setting_presets:
                benchmark = setting_presets[benchmark]
            else:
                raise RuntimeError(
                    f"Could not find benchmark '{benchmark}': it "
                    f"is neither a path to a file or a key of the "
                    f"`setting_presets` dictionary. \n"
                    f"(Available presets: {setting_presets}) "
                )
        # Creating an experiment for the given setting, loaded from the
        # config file.
        # TODO: IDEA: Do the same thing for loading the Method?
        logger.info(
            f"Will load the options for setting {cls} from the file " f"at path {benchmark}."
        )

        # Raise an error if any of the args in sys.argv would have been used
        # up by the Setting, just to prevent any ambiguities.
        _, unused_args = cls.from_known_args()
        consumed_args = list(set(sys.argv[1:]) - set(unused_args))
        if consumed_args:
            # TODO: This could also be trigerred if there were arguments
            # in the method with the same name as some from the Setting.
            raise RuntimeError(
                f"Cannot pass command-line arguments for the Setting when "
                f"loading a benchmark, since these arguments whould have been "
                f"ignored when creating the setting of type {cls} "
                f"anyway: {consumed_args}"
            )

        drop_extras = False
        # Actually load the setting from the file.
        setting = cls.load(path=benchmark, drop_extra_fields=drop_extras)
        return setting


================================================
FILE: sequoia/settings/base/setting_meta.py
================================================
"""

"""
import dataclasses
from dataclasses import Field
from typing import Dict, List, Type

from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


class SettingMeta(Type["Setting"]):
    """Metaclass for the nodes in the Setting inheritance tree.

    Might remove this. Was experimenting with using this to create class
    properties for each Setting.

    What this currently does is to remove any keyword argument passed to the
    constructor if its value is marked as a 'constant'.

    TODO: A little while back I noticed some strange behaviour when trying
    to create a Setting class (either manually or through the command-line), and
    I attributed it to PL adding a `_DataModuleWrapper` metaclass to
    `LightningDataModule`, which seemed to be causing problems related to
    calling __init__ when using dataclasses. I don't quite recall exactly what
    was happening and was causing an issue, so it would be a good idea to try
    removing this metaclass and writing a test to make sure there was a problem
    to begin with, and also to make sure that adding back this class fixes it.
    """

    def __call__(cls, *args, **kwargs):
        # This is used to filter the arguments passed to the constructor
        # of the Setting and only keep the ones that are fields with init=True.
        fields: Dict[str, Field] = {field.name: field for field in dataclasses.fields(cls)}
        init_fields: List[str] = [name for name, f in fields.items() if f.init]

        for key in list(kwargs.keys()):
            value = kwargs[key]
            if key not in fields:
                # We let this through, so that if there is a problem, it is
                # raised when calling the constructor below.
                continue
            # elif key in fields and key not in init_fields:
            #     # We let this through, so that if there is a problem, it is
            #     # raised when calling the constructor below.
            #     logger.warning(RuntimeWarning(
            #         f"Constructor Argument {key} is a field with init=False but"
            #         f"but is being passed to the constructor."
            #     ))
            #     continue
            # Alternative: Raise a custom Exception directly:
            # raise RuntimeError((
            # Other idea: go up two stackframes so that it looks like
            # `cls(blabla=123)` is what's causing the exception?

            field = fields[key]
            _missing = object()
            constant_value = field.metadata.get("constant", _missing)
            if constant_value is not _missing and value != constant_value:
                logger.warning(
                    UserWarning(
                        f"Ignoring argument {key}={value} when creating class "
                        f"{cls}, since it has that field marked as constant with a "
                        f"value of {constant_value}."
                    )
                )
                kwargs.pop(key)
        return super().__call__(*args, **kwargs)

    def __instancecheck__(self, instance):
        from sequoia.client import SettingProxy

        if isinstance(instance, SettingProxy) or hasattr(instance, "_setting_type"):
            # If the setting is a proxy, then we check if its a proxy to a setting of
            # this type.
            return issubclass(instance._setting_type, self)
        return super().__instancecheck__(instance)


================================================
FILE: sequoia/settings/base/setting_test.py
================================================
import functools
import inspect
from dataclasses import dataclass
from typing import Union

import pytest

from sequoia.methods import Method
from sequoia.utils.utils import constant

from .setting import Setting


@dataclass
class Setting1(Setting):
    foo: int = 1
    bar: int = 2

    def __post_init__(self):
        print(f"Setting1 __init__ ({self})")
        super().__post_init__()


@dataclass
class Setting2(Setting1):
    bar: int = constant(1)

    def __post_init__(self):
        print(f"Setting2 __init__ ({self})")
        super().__post_init__()


@pytest.mark.xfail(reason="Changed this.")
def test_settings_override_with_constant_take_init():
    """Test that when a value for one of the constant fields is passed to the
    constructor, its value is ignored and getting that attribute on the object
    gives back the constant value.
    If the field isn't constant, the value should be set on the object as usual.
    """
    bob1 = Setting1(foo=3, bar=7)
    assert bob1.foo == 3
    assert bob1.bar == 7
    bob2 = Setting2(foo=4, bar=4)
    assert bob2.bar == 1.0
    assert bob2.foo == 4


def test_loading_benchmark_doesnt_overwrite_constant():
    setting1 = Setting1.loads_json('{"foo":1, "bar":2}')
    assert setting1.foo == 1
    assert setting1.bar == 2

    setting2 = Setting2.loads_json('{"foo":1, "bar":2}')
    assert setting2.foo == 1
    assert setting2.bar == 1


def test_init_still_works():
    setting = Setting(val_fraction=0.01)
    assert setting.val_fraction == 0.01


def test_passing_unexpected_arg_raises_typeerror():
    with pytest.raises(TypeError):
        bob2 = Setting2(foo=4, bar=4, baz=123123)


@dataclass
class SettingA(Setting):
    pass


@dataclass
class SettingA1(SettingA):
    pass


@dataclass
class SettingA2(SettingA):
    pass


@dataclass
class SettingB(Setting):
    pass


class MethodA(Method, target_setting=SettingA):
    pass


class MethodB(Method, target_setting=SettingB):
    pass


class CoolGeneralMethod(Method, target_setting=Setting):
    pass


def test_that_transforms_can_be_set_through_command_line():
    from sequoia.common.transforms import Compose, Transforms

    setting = Setting(train_transforms=[])
    assert setting.train_transforms == []

    setting = Setting.from_args("--train_transforms channels_first")
    assert setting.train_transforms == [Transforms.channels_first]
    assert isinstance(setting.train_transforms, Compose)

    setting = Setting.from_args("--train_transforms channels_first")
    assert setting.train_transforms == [Transforms.channels_first]
    assert isinstance(setting.train_transforms, Compose)


from typing import Any, ClassVar, Dict, Type

from sequoia.common.config import Config
from sequoia.methods.random_baseline import RandomBaselineMethod

from .setting import Setting


class SettingTests:
    """Class that groups all the tests for a given setting.

    You should create a test class for your new setting, ideally in a file placed next to the class
    under test, named with the "_test.py" suffix.

    The test class can be created in one of two ways:
    - Either using a 'Setting' class attribute:

    ```python
    from sequoia.settings.base.setting_test import SettingTests
    class TestMySetting(SettingTests):
        Setting = MySetting

        def test_something(self):
            setting = self.Setting(...)
            ...
    ```

    - OR, by passing the `setting` keyword argument to the class statement:

    ```python
    class TestMySetting(SettingTests, setting=MySetting):
        def test_something(self):
            setting = self.Setting(...)
            ...
    ```

    If your setting is based on something more concrete than just the `Setting` class, then you
    should use the associated test class as a base for your new test class:

    ```python
    # (Taking ContinualRLSetting here as an example)
    # *Important*: Remember to rename the test class if needed so that pytest doesn't also run them
    # when testing your module:
    from sequoia.settings.rl.continual.setting_test import TestContinualRLSetting as ContinualRLSettingTests

    from .my_custom_setting import MyCustomSetting

    class TestMyCustomSetting(ContinualRLSettingTests, setting=MyCustomSetting):
        def my_custom_test(self):
            ...
    # OR
    class TestMyCustomSetting(ContinualRLSettingTests):
        Setting = MyCustomSetting
    ```

    This also generates a `dataset` fixture.
    """

    Setting: ClassVar[Type[Setting]]

    # Autogenerated fixture that will yield each entry from the available dataset of the setting
    # class under test.
    dataset: pytest.fixture

    # The kwargs to be passed to the Setting when we want to create a 'short' setting.
    fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = {}

    def __init_subclass__(cls, setting: Type[Setting] = None):
        """Autogenerates fixtures on the class under test."""
        super().__init_subclass__()
        if not setting and not hasattr(cls, "Setting"):
            raise RuntimeError(
                "Need to either pass `setting` when subclassing or set "
                "a 'Sethod' class attribute."
            )
        if setting is not None:
            # Make the setting accessible to tests as either self.Setting or cls.Setting for
            # classmethods.
            cls.Setting = setting
        cls.dataset: pytest.fixture = make_dataset_fixture(cls.Setting)

    def assert_chance_level(self, setting: Setting, results: Setting.Results):
        """Called during testing. Use this to assert that the results you get
        from applying your method on the given setting match your expectations.

        Args:
            setting
            results (Results): A given Results object.
        """
        assert results is not None
        assert results.objective > 0
        print(f"Objective when applied to a setting of type {type(setting)}: {results.objective}")

    @pytest.mark.timeout(60)
    def test_random_baseline(self, config: Config):
        """
        Test that applies a random baseline to the Setting, and checks that the results
        are around chance level.
        """
        # Create the Setting
        setting_type = self.Setting
        # if issubclass(setting_type, ContinualRLSetting):
        #     kwargs.update(max_steps=100, test_steps_per_task=100)
        # if issubclass(setting_type, IncrementalRLSetting):
        #     kwargs.update(nb_tasks=2)
        # if issubclass(setting_type, ClassIncrementalSetting):
        #     kwargs = dict(nb_tasks=5)
        # if issubclass(setting_type, (TraditionalSLSetting, RLSetting)):
        #     kwargs.pop("nb_tasks", None)
        # if isinstance(setting, SLSetting):
        #     method.batch_size = 64
        # elif isinstance(setting, RLSetting):
        #     method.batch_size = None
        #     setting.train_max_steps = 100

        setting: Setting = setting_type(**self.fast_dev_run_kwargs)
        method = RandomBaselineMethod()

        results = setting.apply(method, config=config)
        self.assert_chance_level(setting, results=results)


def make_dataset_fixture(setting_type: Union[Type[Setting], functools.partial]):
    """Create a parametrized fixture that will go through all the available datasets
    for a given setting."""

    def dataset(_, request):
        dataset = request.param
        return dataset

    if isinstance(setting_type, functools.partial):
        setting_type = setting_type.args[0]
        assert inspect.isclass(setting_type) and issubclass(setting_type, Setting)

    datasets = set(setting_type.available_datasets.keys())
    datasets_to_remove = set(["MT10", "MT50", "CW10", "CW20"])
    # NOTE: Need deterministic ordering for the datasets for tests to be parallelizable
    # with pytest-xdist.
    datasets = sorted(list(datasets - datasets_to_remove))

    return pytest.fixture(
        params=datasets,
        scope="module",
    )(dataset)


================================================
FILE: sequoia/settings/offline_rl/setting.py
================================================
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, List

import gym
from gym.wrappers import RecordEpisodeStatistics
from matplotlib import pyplot as plt
from simple_parsing.helpers import choice
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

from sequoia import Results
from sequoia.settings.base import Setting

try:
    import d3rlpy
except ImportError as err:
    raise RuntimeError(f"You need to have `d3rlpy` installed to use these methods.") from err


@dataclass
class OfflineRLResults(Results):

    # TODO: Write these methods
    def summary(self) -> str:
        return f"Offline RL results: {self.objective_name} = {self.objective}"

    def make_plots(self) -> Dict[str, plt.Figure]:
        return {}

    def to_log_dict(self, verbose: bool = False) -> Dict[str, Any]:
        return {self.objective_name: self.objective}

    # Metrics from online testing
    test_rewards: list
    test_episode_length: list
    test_episode_count: list

    objective_name: ClassVar[str] = "Average Reward"

    @property
    def objective(self):
        return sum(self.test_rewards) / len(self.test_rewards)


# Offline datasets from d3rlpy (not including atari)
offline_datasets_from_d3rlpy = {
    "cartpole-replay",
    "cartpole-random",
    "pendulum-replay",
    "pendulum-random",
    "hopper",
    "halfcheetah",
    "walker",
    "ant",
}

# Offline atari datasets from d3rlpy
offline_atari_datasets_from_d3rlpy = set(d3rlpy.datasets.ATARI_GAMES)


@dataclass
class OfflineRLSetting(Setting):

    # A list of available offline rl datasets
    available_datasets: ClassVar[List[str]] = list(offline_datasets_from_d3rlpy) + list(
        offline_atari_datasets_from_d3rlpy
    )

    # choice of dataset for the current setting
    dataset: str = choice(available_datasets, default="cartpole-replay")

    # size of validation set
    val_size: float = 0.2

    # mask for control bootstrapping
    create_mask: bool = False
    mask_size: int = 1

    def __post_init__(self):
        # Load d3rlpy offline dataset
        if (
            self.dataset in offline_datasets_from_d3rlpy
            or self.dataset in offline_atari_datasets_from_d3rlpy
        ):
            mdp_dataset, self.env = d3rlpy.datasets.get_dataset(
                self.dataset, self.create_mask, self.mask_size
            )
            self.train_dataset, self.valid_dataset = train_test_split(
                mdp_dataset, test_size=self.val_size
            )

        # Load other dataset types here
        else:
            raise NotImplementedError

    def train_dataloader(self, batch_size: int = None) -> DataLoader:
        return DataLoader(self.train_dataset, batch_size=batch_size)

    def val_dataloader(self, batch_size: int = None) -> DataLoader:
        return DataLoader(self.valid_dataset, batch_size=batch_size)

    def test(self, method, test_env: gym.Env):
        """
        Test self.algo on given test_env for self.test_steps iterations
        """
        test_env = RecordEpisodeStatistics(test_env)

        obs = test_env.reset()
        for _ in range(method.test_steps):
            obs, reward, done, info = test_env.step(
                method.get_actions(obs, action_space=test_env.action_space)
            )
            if done:
                break
        test_env.close()

        return test_env.episode_returns, test_env.episode_lengths, test_env.episode_count

    def apply(self, method) -> OfflineRLResults:
        method.configure(self)

        method.fit(train_env=self.train_dataset, valid_env=self.valid_dataset)

        # Test
        test_rewards, test_episode_length, test_episode_count = self.test(method, self.env)
        return OfflineRLResults(
            test_rewards=test_rewards,
            test_episode_length=test_episode_length,
            test_episode_count=test_episode_count,
        )


================================================
FILE: sequoia/settings/presets/__init__.py
================================================
import os
from pathlib import Path
from typing import Dict

presets_dir = Path(os.path.dirname(__file__))

setting_presets: Dict[str, Path] = {file.stem: file for file in presets_dir.rglob("*.yaml")}


================================================
FILE: sequoia/settings/presets/cartpole_pixels.yaml
================================================
dataset: PixelCartPole-v0
max_episodes: null
nb_tasks: 3
train_max_steps: 3000
steps_per_task: 1000
test_max_steps: 3000
test_steps_per_task: 1000
train_task_schedule:
  0:
    gravity: 10
    length: 0.2
  1000:
    gravity: 100
    length: 1.2
  2000:
    gravity: 10
    length: 0.2
val_task_schedule:
  0:
    gravity: 10
    length: 0.2
  1000:
    gravity: 100
    length: 1.2
  2000:
    gravity: 10
    length: 0.2
test_task_schedule:
  0:
    gravity: 10
    length: 0.2
  1000:
    gravity: 100
    length: 1.2
  2000:
    gravity: 10
    length: 0.2


================================================
FILE: sequoia/settings/presets/cartpole_state.yaml
================================================
dataset: CartPole-v0
max_episodes: null
nb_tasks: 2
train_max_steps: 4000
test_max_steps: 1000
test_steps_per_task: 500
# TODO: Need to fix these task schedules: They probably won't work the same with
# 'Continual' settings vs in the IncremementalRL Settings. Also need to decide what
# happens with the last key in MultiTask RL.
train_task_schedule:
  0:
    gravity: 10
    length: 0.3
  2000:
    gravity: 10
    length: 0.8
val_task_schedule:
  0:
    gravity: 10
    length: 0.3
  2000:
    gravity: 10
    length: 0.8


================================================
FILE: sequoia/settings/presets/cifar10.yaml
================================================
dataset: cifar10


================================================
FILE: sequoia/settings/presets/cifar100.yaml
================================================
dataset: cifar100


================================================
FILE: sequoia/settings/presets/classic_control/cartpole.yaml
================================================
dataset: cartpole
monitor_training_performance: true
nb_tasks: 8
steps_per_task: 20_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    force_mag: 10.0
    gravity: 9.8
    length: 0.5
    masscart: 1.0
    masspole: 0.1
    tau: 0.02
  1:
    force_mag: 8.666898797953921
    gravity: 7.760853554007704
    length: 0.5217446765844818
    masscart: 0.8908045485782948
    masspole: 0.15674543117467288
    tau: 0.0220635245382657
  2:
    force_mag: 7.458618324495651
    gravity: 9.400984342498948
    length: 0.6462064142932058
    masscart: 1.3539692996769968
    masspole: 0.133507111769919
    tau: 0.021147855257131764
  3:
    force_mag: 8.5574863595876
    gravity: 6.7285307726150085
    length: 0.38294798778813294
    masscart: 0.8574588708166866
    masspole: 0.0615236260048324
    tau: 0.02307661947728138
  4:
    force_mag: 8.02716944821746
    gravity: 11.150504602382693
    length: 0.4854716271338247
    masscart: 1.0456215435706913
    masspole: 0.10899768542795317
    tau: 0.019865776370441367
  5:
    force_mag: 11.700513704843809
    gravity: 6.312815408929171
    length: 0.45130592348981863
    masscart: 1.0380878429865934
    masspole: 0.07187238299019481
    tau: 0.014052652786485233
  6:
    force_mag: 13.934001347849406
    gravity: 10.133200774940446
    length: 0.4905968584092335
    masscart: 0.9859796874461285
    masspole: 0.08510387732488867
    tau: 0.01695718912603805
  7:
    force_mag: 10.523014205764852
    gravity: 9.174287955179715
    length: 0.560680060936186
    masscart: 0.9513630929456718
    masspole: 0.07683588323840541
    tau: 0.016089633251709107

================================================
FILE: sequoia/settings/presets/classic_control/mountaincar_continuous.yaml
================================================
dataset: MountainCarContinuous-v0
monitor_training_performance: true
nb_tasks: 8
train_max_steps: 160_000
train_steps_per_task: 20_000
test_max_steps: 80_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    goal_position: 0.45
    goal_velocity: 0
  1:
    goal_position: 0.4565062937130897
    goal_velocity: 0
  2:
    goal_position: 0.526503904898121
    goal_velocity: 0
  3:
    goal_position: 0.37901356007820275
    goal_velocity: 0
  4:
    goal_position: 0.5132810016616194
    goal_velocity: 0
  5:
    goal_position: 0.5023364056388072
    goal_velocity: 0
  6:
    goal_position: 0.47315246637784114
    goal_velocity: 0
  7:
    goal_position: 0.45239346485932264
    goal_velocity: 0


================================================
FILE: sequoia/settings/presets/fashion_mnist.yaml
================================================
dataset: fashion_mnist
# Two classes per task:
increment: 2
test_increment: 2


================================================
FILE: sequoia/settings/presets/mnist.yaml
================================================
dataset: mnist

================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_3each.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 0
  1:
    level: 1
  2:
    level: 2
  3:
    level: 10
  4:
    level: 11
  5:
    level: 12
  6:
    level: 20
  7:
    level: 21
  8:
    level: 22


================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_4each.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 0
  1:
    level: 1
  2:
    level: 2
  3:
    level: 3
  4:
    level: 10
  5:
    level: 11
  6:
    level: 12
  7:
    level: 13
  8:
    level: 20
  9:
    level: 21
  10:
    level: 22
  11:
    level: 23


================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_5each.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 0
  1:
    level: 1
  2:
    level: 2
  3:
    level: 3
  4:
    level: 4
  5:
    level: 10
  6:
    level: 11
  7:
    level: 12
  8:
    level: 13
  9:
    level: 14
  10:
    level: 20
  11:
    level: 21
  12:
    level: 22
  13:
    level: 23
  14:
    level: 24


================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_all.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 0
  1:
    level: 1
  2:
    level: 2
  3:
    level: 3
  4:
    level: 4
  5:
    level: 5
  6:
    level: 6
  7:
    level: 7
  8:
    level: 8
  9:
    level: 9
  10:
    level: 10
  11:
    level: 11
  12:
    level: 12
  13:
    level: 13
  14:
    level: 14
  15:
    level: 15
  16:
    level: 16
  17:
    level: 17
  18:
    level: 18
  19:
    level: 19
  20:
    level: 20
  21:
    level: 21
  22:
    level: 22
  23:
    level: 23
  24:
    level: 24
  25:
    level: 25
  26:
    level: 26
  27:
    level: 27
  28:
    level: 28
  29:
    level: 29

================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_jumps.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 0
  1:
    level: 1
  2:
    level: 2
  3:
    level: 3
  4:
    level: 4
  5:
    level: 5
  6:
    level: 6
  7:
    level: 7
  8:
    level: 8
  9:
    level: 9

================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_jumps_and_ladders.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 20
  1:
    level: 21
  2:
    level: 22
  3:
    level: 23
  4:
    level: 24
  5:
    level: 25
  6:
    level: 26
  7:
    level: 27
  8:
    level: 28
  9:
    level: 29

================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_ladders.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 10
  1:
    level: 11
  2:
    level: 12
  3:
    level: 13
  4:
    level: 14
  5:
    level: 15
  6:
    level: 16
  7:
    level: 17
  8:
    level: 18
  9:
    level: 19

================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_mix.yaml
================================================
dataset: monsterkong
monitor_training_performance: true
force_pixel_observations: true
nb_tasks: 8
train_max_steps: 1_600_000
train_steps_per_task: 200_000
test_steps_per_task: 10_000
test_max_steps: 80_000
train_task_schedule:
  0:
    level: 0
  1:
    level: 1
  2:
    level: 10
  3:
    level: 11
  4:
    level: 20
  5:
    level: 21
  6:
    level: 30
  7:
    level: 31


================================================
FILE: sequoia/settings/presets/mujoco/half_cheetah.yaml
================================================
dataset: ContinualHalfCheetah-v2
monitor_training_performance: true
nb_tasks: 8
train_steps_per_task: 200_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    gravity: -9.81
  1:
    gravity: -7.3087968946619615
  2:
    gravity: -5.615716866871361
  3:
    gravity: -12.45890973547683
  4:
    gravity: -7.6875976238634465
  5:
    gravity: -5.807262467656652
  6:
    gravity: -8.448144726367474
  7:
    gravity: -7.750512896029625


================================================
FILE: sequoia/settings/presets/rl_track.yaml
================================================
dataset: monsterkong
known_task_boundaries_at_train_time: true
known_task_boundaries_at_test_time: false
task_labels_at_train_time: true
task_labels_at_test_time: false
monitor_training_performance: true
steps_per_task: 200_000
test_steps_per_task: 10_000
train_task_schedule:
  0:
    level: 0
  1:
    level: 1
  2:
    level: 10
  3:
    level: 11
  4:
    level: 20
  5:
    level: 21
  6:
    level: 30
  7:
    level: 31


================================================
FILE: sequoia/settings/presets/sl_track.yaml
================================================
dataset: synbols
nb_tasks: 12
known_task_boundaries_at_train_time: true
known_task_boundaries_at_test_time: false
task_labels_at_train_time: true
task_labels_at_test_time: false
monitor_training_performance: true


================================================
FILE: sequoia/settings/rl/__init__.py
================================================
from .environment import RLEnvironment
from .setting import RLSetting

ActiveEnvironment = RLEnvironment
from .continual import ContinualRLSetting, make_continuous_task
from .discrete import DiscreteTaskAgnosticRLSetting, make_discrete_task
from .incremental import IncrementalRLSetting, make_incremental_task

# TODO: Properly Add the multi-task RL setting.
from .multi_task import MultiTaskRLSetting
from .task_incremental import TaskIncrementalRLSetting
from .traditional import TraditionalRLSetting


================================================
FILE: sequoia/settings/rl/continual/__init__.py
================================================
from .environment import GymDataLoader
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .results import ContinualRLResults
from .setting import ContinualRLSetting
from .tasks import make_continuous_task

ContinualRLEnvironment = GymDataLoader
Results = ContinualRLResults


================================================
FILE: sequoia/settings/rl/continual/environment.py
================================================
""" Dataloader for a Gym Environment. Uses multiple parallel environments.

TODO: @lebrice: We need to decide which of these two behaviours we want to
    support in the GymDataLoader, (if not both):

- Either iterate over the dataset and get the usual 4-item tuples like gym,
    by using a policy to generate the actions,
OR
- Give back 3-item tuples (without the reward) and give the reward when
    users send back an action for the current observation. Users would either
    be required to send actions back after each observation or to provide a
    policy to "fill-in-the-gaps" and select the action when the model doesn't
    send one back.

The traditional supervised dataloader can be easily recovered in this second
case: since the reward doesn't depend on the action, we can just send back a
random or None action to the dataloader, and group the returned reward with
the batch of observations, before yielding the (observations, rewards)
batch.

In either case, we can easily keep the `step` API from gym available.
Need to talk more about this for sure.
"""
import warnings
from typing import Any, Iterable, Iterator, Optional, TypeVar, Union

import gym
import numpy as np
from gym import Wrapper, spaces
from gym.utils.colorize import colorize
from gym.vector import AsyncVectorEnv, VectorEnv
from gym.vector.utils import batch_space
from torch import Tensor
from torch.utils.data import IterableDataset

from sequoia.common.gym_wrappers import EnvDataset, IterableWrapper
from sequoia.common.gym_wrappers.policy_env import PolicyEnv
from sequoia.common.gym_wrappers.utils import StepResult
from sequoia.settings.base.objects import Actions
from sequoia.settings.rl.environment import ActiveEnvironment
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)
T = TypeVar("T")


# TODO: The typing information from sequoia.settings.base.environment isn't quite
# accurate here... The observations are bound by Tensors or numpy arrays, not
# 'Batch' objects.

# from sequoia.settings.base.environment import ObservationType, ActionType, RewardType
ObservationType = TypeVar("ObservationType")
ActionType = TypeVar("ActionType")
RewardType = TypeVar("RewardType")


class GymDataLoader(
    ActiveEnvironment[ObservationType, ActionType, RewardType], IterableWrapper, Iterable
):
    """Environment for RL settings.

    Exposes **both** the `gym.Env` as well as the "Active" DataLoader APIs.

    This is useful because it makes it easy to adapt a method originally made for SL so
    that it can also work in a reinforcement learning context, where the rewards (e.g.
    image labels, or correct/incorrect prediction, etc.) are only given *after* the
    action (e.g. y_pred) has been received by the environment.

    meaning you
    can use this in two different ways:

    1. Gym-style using `step`:
        1. Agent   --------- action ----------------> Env
        2. Agent   <---(state, reward, done, info)--- Env

    2. ActiveDataLoader style, using `iter` and `send`:
        1. Agent   <--- (state, done, info) --- Env
        2. Agent   ---------- action ---------> Env
        3. Agent   <--------- reward ---------- Env


    This would look something like this in code:

    ```python
    env = GymDataLoader("CartPole-v0", batch_size=32)
    for states, done, infos in env:
        actions = actor(states)
        rewards = env.send(actions)
        loss = loss_function(...)

    # OR:

    state = env.reset()
    for i in range(max_steps):
        action = self.actor(state)
        states, reward, done, info = env.step(action)
        loss = loss_function(...)
    ```

    """

    def __init__(
        self,
        env: Union[EnvDataset, PolicyEnv] = None,
        dataset: Union[EnvDataset, PolicyEnv] = None,
        batch_size: int = None,
        num_workers: int = None,
        **kwargs,
    ):
        assert not (
            env is None and dataset is None
        ), "One of the `dataset` or `env` arguments must be passed."
        assert not (
            env is not None and dataset is not None
        ), "Only one of the `dataset` and `env` arguments can be used."

        if not isinstance(env, IterableDataset):
            raise RuntimeError(
                f"The env {env} isn't an interable dataset! (You can use the "
                f"EnvDataset or PolicyEnv wrappers to make an IterableDataset "
                f"from a gym environment."
            )

        if isinstance(env.unwrapped, VectorEnv):
            if batch_size is not None and batch_size != env.num_envs:
                logger.warning(
                    UserWarning(
                        f"The provided batch size {batch_size} will be ignored, since "
                        f"the provided env is vectorized with a batch_size of "
                        f"{env.unwrapped.num_envs}."
                    )
                )
            batch_size = env.num_envs

        if isinstance(env.unwrapped, AsyncVectorEnv):
            num_workers = env.num_envs
        else:
            num_workers = 0

        self.env = env
        # NOTE: The batch_size and num_workers attributes reflect the values from the
        # iterator (the VectorEnv), not those of the dataloader.
        # This is done in order to avoid pytorch workers being ever created, and also so
        # that pytorch-lightning stops warning us that the num_workers is too low.
        self._batch_size = batch_size
        self._num_workers = num_workers
        super().__init__(
            dataset=self.env,
            # The batch size is None, because the VecEnv takes care of
            # doing the batching for us.
            batch_size=None,
            num_workers=0,
            collate_fn=None,
            **kwargs,
        )
        Wrapper.__init__(self, env=self.env)
        assert not isinstance(self.env, GymDataLoader), "Something very wrong is happening."
        # self.max_epochs: int = max_epochs
        self.observation_space: gym.Space = self.env.observation_space
        self.action_space: gym.Space = self.env.action_space
        self.reward_space: gym.Space
        if isinstance(env.unwrapped, VectorEnv):
            env: VectorEnv
            batch_size = env.num_envs
            # TODO: Overwriting the action space to be the 'batched' version of
            # the single action space, rather than a Tuple(Discrete, ...) as is
            # done in the gym.vector.VectorEnv.
            self.action_space = batch_space(env.single_action_space, batch_size)

        if not hasattr(self.env, "reward_space"):
            self.reward_space = spaces.Box(
                low=self.env.reward_range[0],
                high=self.env.reward_range[1],
                shape=(),
                dtype=np.float64,
            )
            if isinstance(self.env.unwrapped, VectorEnv):
                # Same here, we use a 'batched' space rather than Tuple.
                self.reward_space = batch_space(self.reward_space, batch_size)

        # BUG: Fix this bug: the observation / action spaces don't accept Tensors as
        # valid samples, even though they should.
        # self.observation_space = add_tensor_support(self.observation_space)
        # self.action_space = add_tensor_support(self.action_space)
        # self.reward_space = add_tensor_support(self.reward_space)
        # assert has_tensor_support(self.observation_space)

    @property
    def num_workers(self) -> Optional[int]:
        return self._num_workers

    @num_workers.setter
    def num_workers(self, value: Any) -> Optional[int]:
        if value and value != self._num_workers:
            warnings.warn(
                RuntimeWarning(
                    f"Can't set num_workers to {value}, it's hard-set to {self._num_workers}"
                )
            )

    @property
    def batch_size(self) -> Optional[int]:
        return self._batch_size

    @batch_size.setter
    def batch_size(self, value: Any) -> Optional[int]:
        if value != self._batch_size:
            warnings.warn(
                RuntimeWarning(
                    f"Can't set batch size to {value}, it's hard-set to {self._batch_size}"
                )
            )

    def __next__(self) -> ObservationType:
        if self._iterator is None:
            self._iterator = self.__iter__()
        return next(self._iterator)

    # def __len__(self):
    #     if isinstance(self.env, EnvDataset):
    #         return self.env.max_steps
    #     raise NotImplementedError(f"TODO: Can't tell the length of the env {self.env}.")

    def _obs_have_done_signal(self) -> bool:
        """Try to determine if the observations contain the 'done' signal or not."""
        if (
            isinstance(self.observation_space, spaces.Dict)
            and "done" in self.observation_space.spaces
        ):
            return True
        return False

    def __iter__(self) -> Iterator:
        # TODO: Pretty sure this could be greatly simplified by just always using the loop from EnvDataset.
        # return super().__iter__()
        # assert False, self.env.__iter__()
        if self.is_vectorized:
            # elif isinstance(self.observation_space, spaces.Tuple)
            if not self._obs_have_done_signal():
                warnings.warn(
                    RuntimeWarning(
                        colorize(
                            f"You are iterating over a vectorized env, but the observations "
                            f"don't seem to contain the 'done' signal! You should definitely "
                            f"consider applying something like an `AddDoneToObservation` "
                            f"wrapper to each individual env before vectorization. ",
                            "red",
                        )
                    )
                )
        return self.env.__iter__()
        # yield from IterableWrapper.__iter__(self)

        # self.observation_ = self.reset()
        # self.done_ = False
        # self.action_ = None
        # self.reward_ = None

        # # Yield the first observation_.
        # # TODO: Maybe add something like 't' on the observations to make sure they
        # # line up with the rewards we get?
        # yield self.observation_

        # if self.action_ is None:
        #     raise RuntimeError(
        #         f"You have to send an action using send() between every "
        #         f"observation. (env = {self})"
        #     )
        # def done_is_true(done: Union[bool, np.ndarray, Sequence[bool]]) -> bool:
        #     return done if isinstance(done, bool) or not done.shape else all(done)

        # while not any([done_is_true(self.done_), self.is_closed()]):
        #     # logger.debug(f"step {self.n_steps_}/{self.max_steps},  (episode {self.n_episodes_})")

        #     # Set those to None to force the user to call .send()
        #     self.action_ = None
        #     self.reward_ = None
        #     yield self.observation_

        #     if self.action_ is None:
        #         raise RuntimeError(
        #             f"You have to send an action using send() between every "
        #             f"observation. (env = {self})"
        #         )

    # def __iter__(self) -> Iterable[ObservationType]:
    #     # This would give back a single-process dataloader iterator over the
    #     # 'dataset' which in this case is the environment:
    #     # return super().__iter__()

    #     # This, on the other hand, completely bypasses the dataloader iterator,
    #     # and instead just yields the samples from the dataset directly, which
    #     # is actually what we want!
    #     # BUG: Somehow this doesn't batch the samples correctly..
    #     return self.env.__iter__()

    #     # TODO: BUG: Wrappers applied on top of the GymDataLoader won't have an
    #     # effect on the values yielded by this iterator. Currently trying to fix
    #     # this inside the IterableWrapper base class, but it's not that simple.

    #     # return type(self.env).__iter__(self)
    #     # if has_wrapper(self.env, EnvDataset):
    #     #     return EnvDataset.__iter__(self)
    #     # elif has_wrapper(self.env, PolicyEnv):
    #     #     return PolicyEnv.__iter__(self)
    #     # return type(self.env).__iter__(self)
    #     # return  iter(self.env)
    #     # yield from self._iterator

    #     # Could increment the number of epochs here also, if we wanted to keep
    #     # count.

    # def random_actions(self):
    #     return self.env.random_actions()

    def step(self, action: Union[ActionType, Any]) -> StepResult:
        # logger.debug(f"Calling step on self.env")
        return super().step(action)

    def send(self, action: Union[ActionType, Any]) -> RewardType:
        # TODO: Remove this unwrapping code, and instead only unwrap stuff if necessary
        # for the environment.
        if isinstance(action, Actions):
            action = action.y_pred
        if isinstance(action, Tensor):
            action = action.detach().cpu().numpy()
        if isinstance(action, np.ndarray) and not action.shape:
            action = action.item()
        if isinstance(self.env.action_space, spaces.Tuple) and isinstance(action, np.ndarray):
            action = action.tolist()
        assert action in self.env.action_space, (action, self.env.action_space)
        return super().send(action)
        # self.action_ = action
        # self.observation_, self.reward_, self.done_, self.info_ = su(action)
        # return self.reward_
        # return self.env.send(action)


================================================
FILE: sequoia/settings/rl/continual/environment_test.py
================================================
from typing import ClassVar, Optional, Type

import gym
import numpy as np
import pytest
import torch
from gym import spaces
from gym.vector.utils import batch_space
from torch import Tensor

from sequoia.common.gym_wrappers import EnvDataset, PixelObservationWrapper
from sequoia.conftest import param_requires_atari_py
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import take

from .environment import GymDataLoader
from .make_env import make_batched_env

logger = get_logger(__name__)


class TestGymDataLoader:
    # Grouping tests into a class so we can inherit from it in another test module, for
    # instance in the tests for EnvironmentProxy class.
    GymDataLoader: ClassVar[Type[GymDataLoader]] = GymDataLoader

    @pytest.mark.parametrize("batch_size", [1, 2, 5])
    @pytest.mark.parametrize(
        "env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")]
    )
    def test_spaces(self, env_name: str, batch_size: int):
        dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size))

        batched_obs_space = dataset.observation_space
        # NOTE: the VectorEnv class creates the 'batched' action space by creating a
        # Tuple of the single action space, of length 'N', which seems a bit weird.
        # batched_action_space = vector_env.action_space
        batched_action_space = batch_space(dataset.single_action_space, batch_size)

        dataloader_env = self.GymDataLoader(dataset, batch_size=batch_size)
        assert dataloader_env.observation_space == batched_obs_space
        assert dataloader_env.action_space == batched_action_space

        dataloader_env.reset()
        for observation_batch in take(dataloader_env, 3):
            if isinstance(observation_batch, Tensor):
                observation_batch = observation_batch.cpu().numpy()
            assert observation_batch in batched_obs_space

            actions = dataloader_env.action_space.sample()
            assert len(actions) == batch_size
            assert actions in batched_action_space

            rewards = dataloader_env.send(actions)
            # BUG: rewards has dtype np.float64, while the space has np.float32.
            assert len(rewards) == batch_size
            assert rewards in dataloader_env.reward_space

    @pytest.mark.parametrize("batch_size", [None, 1, 2, 5])
    @pytest.mark.parametrize(
        "env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")]
    )
    def test_max_steps_is_respected(self, env_name: str, batch_size: int):
        max_steps = 5
        env_name = "CartPole-v0"
        env = make_batched_env(env_name, batch_size=batch_size)
        dataset = EnvDataset(env)
        from sequoia.common.gym_wrappers.action_limit import ActionLimit

        dataset = ActionLimit(dataset, max_steps=max_steps * (batch_size or 1))
        env: GymDataLoader = self.GymDataLoader(dataset)
        env.reset()
        i = 0
        for i, obs in enumerate(env):
            assert obs in env.observation_space
            assert i < max_steps, f"Max steps should have been respected: {i}"
            env.send(env.action_space.sample())
        assert i == max_steps - 1
        env.close()

    @pytest.mark.parametrize("batch_size", [None, 1, 2, 5])
    @pytest.mark.parametrize("seed", [None, 123, 456])
    # @pytest.mark.parametrize(
    #     "env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")]
    # )
    def test_multiple_epochs_works(self, batch_size: Optional[int], seed: Optional[int]):
        epochs = 3
        max_steps_per_episode = 10
        from gym.wrappers import TimeLimit

        from sequoia.common.gym_wrappers import AddDoneToObservation
        from sequoia.conftest import DummyEnvironment

        def env_fn():
            # FIXME: Using the DummyEnvironment for now since it's easier to debug with.
            # env = gym.make(env_name)
            env = DummyEnvironment()
            env = AddDoneToObservation(env)
            env = TimeLimit(env, max_episode_steps=max_steps_per_episode)
            return env

        # assert False, [env_fn(i).unwrapped for i in range(4)]
        # env = gym.vector.make(env_name, num_envs=(batch_size or 1))
        env = make_batched_env(env_fn, batch_size=batch_size)

        batched_env = env
        # from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit
        # env = EpisodeLimit(env, max_episodes=epochs)
        from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors

        env = ConvertToFromTensors(env)

        env = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        env: GymDataLoader = self.GymDataLoader(env)
        # BUG: Seems to be a little bug in the shape of the items yielded by the env due
        # to the concat_fn of the DataLoader.
        # if batch_size and batch_size >= 1:
        #     assert False, (env.reset().shape, env.observation_space, next(iter(env)).shape)
        env.seed(seed)

        all_rewards = []
        with env:
            for epoch in range(epochs):
                for step, obs in enumerate(env):
                    print(f"'epoch' {epoch}, step {step}:, obs: {obs}")
                    assert obs in env.observation_space, obs.shape
                    assert (  # BUG: This isn't working: (sometimes!)
                        step < max_steps_per_episode
                    ), "Max steps per episode should have been respected."
                    rewards = env.send(env.action_space.sample())

                    if batch_size is None:
                        all_rewards.append(rewards)
                    else:
                        all_rewards.extend(rewards)

                # Since in the VectorEnv, 'episodes' are infinite, we must have
                # reached the limit of the number of steps, while in a single
                # environment, the episode might have been shorter.
                assert step <= max_steps_per_episode - 1

            assert epoch == epochs - 1

        if batch_size in [None, 1]:
            # Some episodes might last shorter than the max number of steps per episode,
            # therefore the total should be at most this much:
            assert len(all_rewards) <= epochs * max_steps_per_episode
        else:
            # The maximum number of steps per episode is set, but the env is vectorized,
            # so the number of 'total' rewards we get from all envs should be *exactly*
            # this much:
            assert len(all_rewards) == epochs * max_steps_per_episode * batch_size

    @pytest.mark.parametrize("batch_size", [1, 2, 5])
    @pytest.mark.parametrize("env_name", [param_requires_atari_py("ALE/Breakout-v5")])
    def test_reward_isnt_always_one(self, env_name: str, batch_size: int):
        epochs = 3
        max_steps_per_episode = 100

        env = make_batched_env(env_name, batch_size=batch_size)
        dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        env: GymDataLoader = self.GymDataLoader(env=dataset)
        all_rewards = []
        with env:
            env.reset()
            for epoch in range(epochs):
                for i, batch in enumerate(env):
                    rewards = env.send(env.action_space.sample())
                    all_rewards.extend(rewards)

        assert all_rewards != np.ones(len(all_rewards)).tolist()

    @pytest.mark.parametrize("env_name", ["CartPole-v0"])
    @pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
    def test_batched_state(self, env_name: str, batch_size: int):
        max_steps_per_episode = 10

        env = make_batched_env(env_name, batch_size=batch_size)
        dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        env: GymDataLoader = GymDataLoader(
            dataset,
            batch_size=batch_size,
        )
        with gym.make(env_name) as temp_env:
            state_shape = temp_env.observation_space.shape
            action_shape = temp_env.action_space.shape

        state_shape = (batch_size, *state_shape)
        action_shape = (batch_size, *action_shape)
        reward_shape = (batch_size,)

        state = env.reset()
        assert state.shape == state_shape
        env.seed(123)
        i = 0
        for obs_batch in take(env, 5):
            assert obs_batch.shape == state_shape

            random_actions = env.action_space.sample()
            assert torch.as_tensor(random_actions).shape == action_shape
            assert temp_env.action_space.contains(random_actions[0])

            reward = env.send(random_actions)
            assert reward.shape == reward_shape
            i += 1
        assert i == 5

    @pytest.mark.parametrize("env_name", ["CartPole-v0"])
    @pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
    def test_batched_pixels(self, env_name: str, batch_size: int):
        max_steps_per_episode = 10
        pyglet = pytest.importorskip("pyglet")
        wrappers = [PixelObservationWrapper]
        env = make_batched_env(env_name, wrappers=wrappers, batch_size=batch_size)
        dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)

        with gym.make(env_name) as temp_env:
            for wrapper in wrappers:
                temp_env = wrapper(temp_env)

            state_shape = temp_env.observation_space.shape
            action_shape = temp_env.action_space.shape

        state_shape = (batch_size, *state_shape)
        action_shape = (batch_size, *action_shape)
        reward_shape = (batch_size,)

        env = self.GymDataLoader(
            dataset,
            batch_size=batch_size,
        )
        assert isinstance(env.observation_space, spaces.Box)
        assert len(env.observation_space.shape) == 4
        assert env.observation_space.shape[0] == batch_size

        env.seed(1234)
        for i, batch in enumerate(env):
            assert len(batch) == batch_size

            if isinstance(batch, Tensor):
                batch = batch.cpu().numpy()
            assert batch in env.observation_space

            random_actions = env.action_space.sample()
            assert torch.as_tensor(random_actions).shape == action_shape
            assert temp_env.action_space.contains(random_actions[0])

            reward = env.send(random_actions)
            assert reward.shape == reward_shape


================================================
FILE: sequoia/settings/rl/continual/make_env.py
================================================
"""Creates an IterableDataset from a gym env by applying different wrappers.
"""
import multiprocessing as mp
import warnings
from functools import partial
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union

import gym
from gym import Wrapper
from gym.vector import AsyncVectorEnv, SyncVectorEnv, VectorEnv

from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)

W = TypeVar("W", bound=Union[gym.Env, gym.Wrapper])

WrapperAndKwargs = Tuple[Type[gym.Wrapper], Dict]


def make_batched_env(
    base_env: Union[str, Callable],
    batch_size: int = 10,
    wrappers: Iterable[Union[Type[Wrapper], WrapperAndKwargs]] = None,
    shared_memory: bool = True,
    num_workers: Optional[int] = None,
    **kwargs,
) -> VectorEnv:
    """Create a vectorized environment from multiple copies of an environment.

    NOTE: This function does pretty much the same as `gym.vector.make`, but with
    a bit more flexibility:
    - Allows passing an env factory to start with, rather than only taking ids.
    - Allows passing wrappers to be added to the env on
        each worker, as well as wrappers to add on top of the returned (batched) env.
    - Allows passing tuples of (Type[Wrapper, kwargs])

    Parameters
    ----------
    base_env : str
        The environment ID (or an environment factory). This must be a valid ID
        from the registry.

    batch_size : int
        Number of copies of the environment (as well as batch size).

    num_workers : Optional[int]
        Number of workers to use. When `None` (default), uses as many workers as
        there are CPUs on this machine. When 0, the returned environment will be
        a `SyncVectorEnv`. When `num_workers` == `batch_size`, returns an
        AsyncVectorEnv. When `num_workers` != `batch_size`, returns a
        `BatchVectorEnv`.

    wrappers : Callable or Iterable of Callables (default: `None`)
        If not `None`, then apply the wrappers to each internal environment
        during creation.

    **kwargs : Dict
        Keyword arguments to be passed to `gym.make` when `base_env` is an id.

    Returns
    -------
    env : `gym.vector.VectorEnv` instance
        The vectorized environment.

    Example
    -------
    >>> import gym
    >>> env = gym.vector.make('CartPole-v1', 3)
    >>> env.seed([123, 456, 789])
    >>> env.reset()
    array([[ 0.01823519, -0.0446179 , -0.02796401, -0.03156282],
           [-0.00303268, -0.00523447, -0.03759432,  0.025485  ],
           [-0.04084033, -0.0285856 ,  0.01318461, -0.03327109]],
          dtype=float32)
    """
    # Get the default wrappers, if needed.
    wrappers = wrappers or []

    base_env_factory: Callable[[], gym.Env]
    if isinstance(base_env, str):
        base_env_factory = partial(gym.make, base_env)
    elif callable(base_env):
        base_env_factory = base_env
    else:
        raise NotImplementedError(
            f"Unsupported base env: {base_env}. Must be " f"either a string or a callable for now."
        )

    def pre_batch_env_factory():
        env = base_env_factory(**kwargs)
        for wrapper in wrappers:
            if isinstance(wrapper, tuple):
                assert len(wrapper) == 2 and isinstance(wrapper[1], dict)
                wrapper = partial(wrapper[0], **wrapper[1])
            env = wrapper(env)
        return env

    if batch_size is None:
        return pre_batch_env_factory()

    env_fns = [pre_batch_env_factory for _ in range(batch_size)]

    if num_workers is None:
        if batch_size == 1:
            num_workers = 0
        else:
            num_workers = min(mp.cpu_count(), batch_size)

    if num_workers == 0:
        if batch_size > 1:
            warnings.warn(
                UserWarning(
                    f"Running {batch_size} environments in series, which might be "
                    f"slow. Consider setting the `num_workers` argument, perhaps to "
                    f"the number of CPUs on your machine."
                )
            )
        return SyncVectorEnv(env_fns)

    if num_workers == batch_size:
        return AsyncVectorEnv(env_fns, shared_memory=shared_memory)

    raise RuntimeError(f"Need num_workers to match batch_size for now.")
    return AsyncVectorEnv(env_fns, shared_memory=shared_memory, n_workers=num_workers)


def wrap(env: gym.Env, wrappers: Iterable[Union[Type[Wrapper], WrapperAndKwargs]]) -> Wrapper:
    wrappers = list(wrappers)
    # Convert the list of wrapper types or (wrapper_type, kwargs) tuples into
    # a list of callables that we can apply successively to the env.
    wrapper_fns = _make_wrapper_fns(wrappers)
    for wrapper_fn in wrapper_fns:
        env = wrapper_fn(env)
    return env


def _make_wrapper_fns(
    wrappers_and_args: Iterable[Union[Type[Wrapper], Tuple[Type[Wrapper], Dict]]]
) -> List[Callable[[Wrapper], Wrapper]]:
    """Given a list of either wrapper classes or (wrapper, kwargs) tuples,
    returns a list of callables, each of which just takes an env and wraps
    it using the wrapper and the kwargs, if present.
    """
    wrappers_and_args = list(wrappers_and_args or [])
    wrapper_functions: List[Callable[[gym.Wrapper], gym.Wrapper]] = []
    for wrapper_and_args in wrappers_and_args:
        if isinstance(wrapper_and_args, (tuple, list)):
            # List element was a tuple with (wrapper, (args?), kwargs).
            wrapper, *args, kwargs = wrapper_and_args
            logger.debug(f"Wrapper: {wrapper}, args: {args}, kwargs: {kwargs}")
            wrapper_fn = partial(wrapper, *args, **kwargs)
        else:
            # list element is a type of Wrapper or some kind of callable.
            wrapper_fn = wrapper_and_args
        wrapper_functions.append(wrapper_fn)
    return wrapper_functions


================================================
FILE: sequoia/settings/rl/continual/make_env_test.py
================================================
"""
Tests that check that combining wrappers works fine in combination.
"""

from typing import Union

import gym
import pytest
import torch
from gym.vector import AsyncVectorEnv, SyncVectorEnv

from sequoia.conftest import requires_pyglet, slow_param

from .make_env import make_batched_env


@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)])
def test_make_batched_env(env_name: str, batch_size: int):
    env = make_batched_env(base_env=env_name, batch_size=batch_size)
    start_state = env.reset()
    assert start_state.shape == (batch_size, 4)

    for i in range(10):
        action = env.action_space.sample()
        assert torch.as_tensor(action).shape == (batch_size,)
        obs, reward, done, info = env.step(action)
        assert obs.shape == (batch_size, 4)
        assert reward.shape == (batch_size,)


@pytest.mark.xfail(
    reason="Not sure that the 'id' function gives an 'absolute' memory adress, or if "
    "the address is process-relative, in which case it might be an explanation as to "
    "why these tests don't work."
)
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [4])
@pytest.mark.parametrize("num_workers", [0, 4])
def test_make_batched_env_envs_have_distinct_ids(env_name: str, batch_size: int, num_workers: int):
    # NOTE: We get a SyncVectorEnv if num_workers == 0, else we get an AsyncVectorEnv if
    # num_workers == batch_size, else we get a BatchVectorEnv.
    from gym.wrappers import TimeLimit

    def base_env_fn():
        env = gym.make(env_name)
        return TimeLimit(env, max_episode_steps=10)

    env: Union[SyncVectorEnv, AsyncVectorEnv] = make_batched_env(
        base_env=base_env_fn, batch_size=batch_size, num_workers=num_workers
    )
    if isinstance(env, SyncVectorEnv):
        envs = env.envs
        # Assert that the wrappers are distinct objects
        assert len(set(id(env) for env in envs)) == batch_size
        # Assert that the unwrapped envs are distinct objects
        assert len(set(id(env.unwrapped) for env in envs)) == batch_size
    else:
        assert isinstance(env, AsyncVectorEnv)
        ids = env.apply(id)
        assert len(set(ids)) == batch_size
        unwrapped_ids = env.apply(get_unwrapped_id)
        assert len(set(unwrapped_ids)) == batch_size


def get_unwrapped_id(env):
    return id(env.unwrapped)


@requires_pyglet
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)])
def test_make_env_with_wrapper(env_name: str, batch_size: int):
    env = make_batched_env(
        base_env=env_name,
        batch_size=batch_size,
        wrappers=[PixelObservationWrapper],
    )
    start_state = env.reset()
    expected_state_shape = (batch_size, 400, 600, 3)
    assert start_state.shape == expected_state_shape

    for i in range(10):
        action = env.action_space.sample()
        assert torch.as_tensor(action).shape == (batch_size,)
        obs, reward, done, info = env.step(action)
        assert obs.shape == expected_state_shape
        assert reward.shape == (batch_size,)


from gym.vector import AsyncVectorEnv

from sequoia.common.gym_wrappers import MultiTaskEnvironment, PixelObservationWrapper


@pytest.mark.xfail(reason="TODO: Check if gym supports remote getattr now.")
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)])
def test_make_env_with_wrapper_and_kwargs(env_name: str, batch_size: int):
    # NOTE: Since BatchVectorEnv and our subclasses of the vectorenvs in gym got removed, we lost
    # the ability to use the remote getattr feature.
    task_schedule = {0: dict(length=0.5), 50: dict(length=1.5)}
    env = make_batched_env(
        base_env=env_name,
        batch_size=batch_size,
        wrappers=[
            PixelObservationWrapper,
            lambda env: MultiTaskEnvironment(env, task_schedule=task_schedule),
        ],
        # For now, setting the number of workers to the batch size, just so we
        # get an AsyncVectorEnv rather than the BatchedVectorEnv (so the remote_getattr works).
        num_workers=batch_size,
    )
    start_state = env.reset()
    expected_state_shape = (batch_size, 400, 600, 3)
    assert start_state.shape == expected_state_shape

    for i in range(100):
        action = env.action_space.sample()
        assert torch.as_tensor(action).shape == (batch_size,)

        assert env.length == [2.0 for i in range(batch_size)]

        obs, reward, done, info = env.step(action)
        assert obs.shape == expected_state_shape
        assert reward.shape == (batch_size,)


================================================
FILE: sequoia/settings/rl/continual/objects.py
================================================
from dataclasses import dataclass
from typing import Optional, Sequence, TypeVar, Union

from torch import Tensor

from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.rl import RLSetting


@dataclass(frozen=True)
class Observations(RLSetting.Observations, ContinualAssumption.Observations):
    """Observations from a Continual Reinforcement Learning environment."""

    x: Tensor
    task_labels: Optional[Tensor] = None
    # The 'done' that is normally returned by the 'step' method.
    # We add this here in case a method were to iterate on the environments in the
    # dataloader-style so they also have access to those (i.e. for the BaseMethod).
    done: Optional[Union[bool, Sequence[bool]]] = None


@dataclass(frozen=True)
class Actions(RLSetting.Actions, ContinualAssumption.Actions):
    """Actions to be sent to a Continual Reinforcement Learning environment."""

    y_pred: Tensor


@dataclass(frozen=True)
class Rewards(RLSetting.Rewards, ContinualAssumption.Rewards):
    """Rewards obtained from a Continual Reinforcement Learning environment."""

    y: Tensor


ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)


================================================
FILE: sequoia/settings/rl/continual/results.py
================================================
from typing import ClassVar, Generic, TypeVar

from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.continual import ContinualResults
from sequoia.utils.plotting import autolabel, plt

MetricType = TypeVar("MetricType", bound=EpisodeMetrics)


class ContinualRLResults(ContinualResults, Generic[MetricType]):
    """Results for a ContinualRLSetting."""

    # Higher mean reward / episode => better
    lower_is_better: ClassVar[bool] = False

    objective_name: ClassVar[str] = "Mean reward per episode"

    # Minimum runtime considered (in hours).
    # (No extra points are obtained for going faster than this.)
    min_runtime_hours: ClassVar[float] = 1.5
    # Maximum runtime allowed (in hours).
    max_runtime_hours: ClassVar[float] = 12.0

    def mean_reward_plot(self):
        raise NotImplementedError("TODO")
        figure: plt.Figure
        axes: plt.Axes
        figure, axes = plt.subplots()
        x = list(range(self.num_tasks))
        y = [metrics.accuracy for metrics in self.average_metrics_per_task]
        rects = axes.bar(x, y)
        axes.set_title("Task Accuracy")
        axes.set_xlabel("Task")
        axes.set_ylabel("Accuracy")
        axes.set_ylim(0, 1.0)
        autolabel(axes, rects)
        return figure


================================================
FILE: sequoia/settings/rl/continual/setting.py
================================================
""" Current most general Setting in the Reinforcement Learning side of the tree.
"""
import difflib
import json
import textwrap
import warnings
from dataclasses import dataclass, fields
from functools import partial
from pathlib import Path
from typing import Any, Callable, ClassVar, Dict, List, Optional, Type, Union

import gym
import numpy as np
from gym import spaces
from gym.envs.registration import EnvSpec, registry
from gym.utils import colorize
from gym.wrappers import TimeLimit
from simple_parsing import choice, field, list_field
from simple_parsing.helpers import dict_field

try:
    from stable_baselines3.common.atari_wrappers import AtariWrapper as SB3AtariWrapper
except ImportError:

    class SB3AtariWrapper:
        pass


from gym.wrappers.atari_preprocessing import AtariPreprocessing as GymAtariWrapper

import wandb
from sequoia.common import Config
from sequoia.common.gym_wrappers import (
    AddDoneToObservation,
    MultiTaskEnvironment,
    RenderEnvWrapper,
    SmoothTransitions,
    TransformObservation,
    TransformReward,
)
from sequoia.common.gym_wrappers.action_limit import ActionLimit
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit
from sequoia.common.gym_wrappers.pixel_observation import ImageObservations
from sequoia.common.gym_wrappers.utils import is_atari_env
from sequoia.common.spaces import Sparse, TypedDictSpace
from sequoia.common.transforms import Transforms
from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.base import Method
from sequoia.settings.rl import ActiveEnvironment, RLSetting
from sequoia.settings.rl.wrappers import (
    HideTaskLabelsWrapper,
    MeasureRLPerformanceWrapper,
    TypedObjectsWrapper,
)
from sequoia.utils import get_logger
from sequoia.utils.generic_functions import move
from sequoia.utils.utils import flag, pairwise

from .environment import GymDataLoader
from .make_env import make_batched_env
from .objects import Actions, Observations, Rewards  # type: ignore
from .results import ContinualRLResults
from .tasks import ContinuousTask, TaskSchedule, is_supported, make_continuous_task, names_match
from .test_environment import ContinualRLTestEnvironment

logger = get_logger(__name__)


# Type alias for the Environment returned by `train/val/test_dataloader`.
Environment = ActiveEnvironment[
    "ContinualRLSetting.Observations",
    "ContinualRLSetting.Observations",
    "ContinualRLSetting.Rewards",
]


# NOTE: Takes about 0.2 seconds to check for all compatible envs (with loading), and
# only happens once.
supported_envs: Dict[str, EnvSpec] = {
    spec.id: spec for env_id, spec in registry.env_specs.items() if is_supported(env_id)
}
available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs}
# available_datasets.update(
#     {camel_case(env_id.split("-v")[0]): env_id for env_id in supported_envs}
# )


@dataclass
class ContinualRLSetting(RLSetting, ContinualAssumption):
    """Reinforcement Learning Setting where the environment changes over time.

    This is an Active setting which uses gym environments as sources of data.
    These environments' attributes could change over time following a task
    schedule. An example of this could be that the gravity increases over time
    in cartpole, making the task progressively harder as the agent interacts with
    the environment.
    """

    # (NOTE: commenting out SLSetting.Observations as it is the same class
    # as Setting.Observations, and we want a consistent method resolution order.
    Observations: ClassVar[Type[Observations]] = Observations
    Actions: ClassVar[Type[Actions]] = Actions
    Rewards: ClassVar[Type[Rewards]] = Rewards

    # The type of results returned by an RL experiment.
    Results: ClassVar[Type[Results]] = ContinualRLResults
    # The type wrapper used to wrap the test environment, and which produces the
    # results.
    TestEnvironment: ClassVar[Type[TestEnvironment]] = ContinualRLTestEnvironment

    # Dict of all available options for the 'dataset' field below.
    available_datasets: ClassVar[Dict[str, Union[str, Any]]] = available_datasets
    # The function used to create the tasks for the chosen env.
    _task_sampling_function: ClassVar[Callable[..., ContinuousTask]] = make_continuous_task

    # Which environment (a.k.a. "dataset") to learn on.
    # The dataset could be either a string (env id or a key from the
    # available_datasets dict), a gym.Env, or a callable that returns a
    # single environment.
    dataset: str = choice(available_datasets, default="CartPole-v0")

    # The number of "tasks" that will be created for the training, valid and test
    # environments.
    # NOTE: In the case of settings with smooth task boundaries, this is the number of
    # "base" tasks which are created, and the task space consists of interpolations
    # between these base tasks.
    # When left unset, will use a default value that makes sense
    # (something like 5).
    nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])

    # Environment/dataset to use for validation. Defaults to the same as `dataset`.
    train_dataset: Optional[str] = None
    # Environment/dataset to use for validation. Defaults to the same as `dataset`.
    val_dataset: Optional[str] = None
    # Environment/dataset to use for testing. Defaults to the same as `dataset`.
    test_dataset: Optional[str] = None

    # Wether the task boundaries are smooth or sudden.
    smooth_task_boundaries: bool = True
    # Wether the tasks are sampled uniformly. (This is set to True in MultiTaskRLSetting
    # and below)
    stationary_context: bool = False

    # Max number of training steps in total. (Also acts as the "length" of the training
    # and validation "Datasets")
    train_max_steps: int = 100_000
    # Maximum number of episodes in total.
    # TODO: Add tests for this 'max episodes' and 'episodes_per_task'.
    train_max_episodes: Optional[int] = None
    # Total number of steps in the test loop. (Also acts as the "length" of the testing
    # environment.)
    test_max_steps: int = 10_000
    test_max_episodes: Optional[int] = None
    # Standard deviation of the multiplicative Gaussian noise that is used to
    # create the values of the env attributes for each task.
    task_noise_std: float = 0.2
    # NOTE: THIS ARG IS DEPRECATED! Only keeping it here so previous config yaml files
    # don't cause a crash.
    observe_state_directly: Optional[bool] = None

    # NOTE: Removing those, in favor of just using the registered Pixel<...>-v? variant.
    # force_pixel_observations: bool = False
    # """ Wether to use the "pixel" version of `self.dataset`.
    # When `False`, does nothing.
    # When `True`, will do one of the following, depending on the choice of environment:
    # - For classic control envs, it adds a `PixelObservationsWrapper` to the env.
    # - For atari envs:
    #     - If `self.dataset` is a regular atari env (e.g. "ALE/Breakout-v5"), does nothing.
    #     - if `self.dataset` is the 'RAM' version of an atari env, raises an error.
    # - For mujoco envs, this raises a NotImplementedError, as we don't yet know how to
    #   make a pixel-version the Mujoco Envs.
    # - For other envs:
    #     - If the environment's observation space appears to be image-based, an error
    #       will be raised.
    #     - If the environment's observation space doesn't seem to be image-based, does
    #       nothing.
    # """

    # force_state_observations: bool = False
    # """ Wether to use the "state" version of `self.dataset`.
    # When `False`, does nothing.
    # When `True`, will do one of the following, depending on the choice of environment:
    # - For classic control envs, it does nothing, as they are already state-based.
    # - TODO: For atari envs, the 'RAM' version of the chosen env will be used.
    # - For mujoco envs, it doesn nothing, as they are already state-based.
    # - For other envs, if this is set to True, then
    #     - If the environment's observation space appears to be image-based, an error
    #       will be raised.
    #     - If the environment's observation space doesn't seem to be image-based, does
    #       nothing.
    # """

    # NOTE: Removing this from the continual setting.
    # By default 1 for this setting, meaning that the context is a linear interpolation
    # between the start context (usually the default task for the environment) and a
    # randomly sampled task.
    # nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])

    # Wether to convert the observations / actions / rewards of the envs (and their
    # spaces) such that they return Tensors rather than numpy arrays.
    # TODO: Maybe switch this to True by default?
    prefer_tensors: bool = False

    # Path to a json file from which to read the train task schedule.
    train_task_schedule_path: Optional[Path] = None
    # Path to a json file from which to read the validation task schedule.
    val_task_schedule_path: Optional[Path] = None
    # Path to a json file from which to read the test task schedule.
    test_task_schedule_path: Optional[Path] = None

    # Wether observations from the environments whould include
    # the end-of-episode signal. Only really useful if your method will iterate
    # over the environments in the dataloader style
    # (as does the baseline method).
    add_done_to_observations: bool = False

    # The maximum number of steps per episode. When None, there is no limit.
    max_episode_steps: Optional[int] = None

    # Transforms to be applied by default to the observatons of the train/valid/test
    # environments.
    transforms: List[Transforms] = list_field()
    # Transforms to be applied to the training environment, in addition to those already
    # in `transforms`.
    train_transforms: List[Transforms] = list_field()
    # Transforms to be applied to the validation environment, in addition to those
    # already in `transforms`.
    val_transforms: List[Transforms] = list_field()
    # Transforms to be applied to the testing environment, in addition to those already
    # in `transforms`.
    test_transforms: List[Transforms] = list_field()

    # When True, a Monitor-like wrapper will be applied to the training environment
    # and monitor the 'online' performance during training. Note that in SL, this will
    # also cause the Rewards (y) to be withheld until actions are passed to the `send`
    # method of the Environment.
    monitor_training_performance: bool = flag(True)

    #
    # -------- Fields below don't have corresponding command-line arguments. -----------
    #
    train_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)
    val_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)
    test_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)

    # TODO: Naming is a bit inconsistent, using `valid` here, whereas we use `val`
    # elsewhere.
    train_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)
    val_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)
    test_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)

    # keyword arguments to be passed to the base environment through gym.make(base_env, **kwargs).
    base_env_kwargs: Dict = dict_field(cmd=False)

    batch_size: Optional[int] = field(default=None, cmd=False)
    num_workers: Optional[int] = field(default=None, cmd=False)

    # Maximum number of training steps per task.
    # NOTE: In this particular setting there aren't clear 'tasks' to speak of.
    train_steps_per_task: Optional[int] = None
    # Number of test steps per task.
    # NOTE: In this particular setting there aren't clear 'tasks' to speak of.
    test_steps_per_task: Optional[int] = None

    # # Deprecated: use `train_max_steps` instead.
    # max_steps: Optional[int] = deprecated_property(redirects_to="train_max_steps")
    # # Deprecated: use `test_max_steps` instead.
    # test_steps: Optional[int] = deprecated_property(redirects_to="test_max_steps")
    # # Deprecated, use `train_steps_per_task` instead.
    # steps_per_task: Optional[int] = deprecated_property(redirects_to="train_steps_per_task")

    def __post_init__(self):
        defaults = {f.name: f.default for f in fields(self)}

        super().__post_init__()

        # TODO: Fix nnoying little issues with this trio of fields that are interlinked:
        if self.test_steps_per_task is not None:
            # We need set the value of self.test_max_steps and self.test_steps_per_task
            if self.test_task_schedule and max(self.test_task_schedule) != len(
                self.test_task_schedule
            ):
                self.test_max_steps = max(self.test_task_schedule)
            elif self.test_max_steps == defaults["test_max_steps"]:
                self.test_max_steps = self.nb_tasks * self.test_steps_per_task
            else:
                self.nb_tasks = self.test_max_steps // self.test_steps_per_task

        # if self.max_steps is not None:
        #     warnings.warn(DeprecationWarning("'max_steps' is deprecated, use 'train_max_steps' instead."))
        #     self.train_max_steps = self.max_steps
        # if self.test_steps is not None:
        #     warnings.warn(DeprecationWarning("'test_steps' is deprecated, use 'test_max_steps' instead."))

        if self.dataset and self.dataset not in self.available_datasets.values():
            try:
                self.dataset = find_matching_dataset(self.available_datasets, self.dataset)
            except NotImplementedError as e:
                logger.info(f"Will try to use custom dataset {self.dataset}.")
            except Exception as e:
                if getattr(self, "train_envs", []):
                    logger.info(f"Using custom environments / datasets.")
                else:
                    raise gym.error.UnregisteredEnv(
                        f"({e}) The chosen dataset/environment ({self.dataset}) isn't in the dict of "
                        f"available datasets/environments, and a task schedule was not passed, "
                        f"so this Setting ({type(self).__name__}) doesn't know how to create "
                        f"tasks for that env!\n"
                        f"Supported envs:\n"
                        + ("\n".join(f"- {k}: {v}" for k, v in self.available_datasets.items()))
                    )

        # The ids of the train/valid/test environments.
        self.train_dataset: Union[str, Callable[[], gym.Env]] = self.train_dataset or self.dataset
        self.val_dataset: Union[str, Callable[[], gym.Env]] = self.val_dataset or self.dataset
        self.test_dataset: Union[str, Callable[[], gym.Env]] = self.test_dataset or self.dataset

        logger.info(f"Chosen dataset: {textwrap.shorten(str(self.train_dataset), 50)}")
        # # The environment 'ID' associated with each 'simple name'.
        # self.train_dataset_id: str = self._get_dataset_id(self.train_dataset)
        # self.val_dataset_id: str = self._get_dataset_id(self.val_dataset)
        # self.train_dataset_id: str = self._get_dataset_id(self.train_dataset)

        # Set the number of tasks depending on the increment, and vice-versa.
        # (as only one of the two should be used).
        assert self.train_max_steps, "assuming this should always be set, for now."

        # Load the task schedules from the corresponding files, if present.
        if self.train_task_schedule_path:
            self.train_task_schedule = _load_task_schedule(self.train_task_schedule_path)
            self.nb_tasks = len(self.train_task_schedule) - 1
        if self.val_task_schedule_path:
            self.val_task_schedule = _load_task_schedule(self.val_task_schedule_path)
        if self.test_task_schedule_path:
            self.test_task_schedule = _load_task_schedule(self.test_task_schedule_path)

        self.train_env: gym.Env
        self.valid_env: gym.Env
        self.test_env: gym.Env

        # Temporary environments which are created and used only for creating the task
        # schedules and the 'base' observation spaces, and then closed right after.
        self._temp_train_env: Optional[gym.Env] = self._make_env(self.train_dataset)
        self._temp_val_env: Optional[gym.Env] = None
        self._temp_test_env: Optional[gym.Env] = None
        # Create the task schedules, using the 'task sampling' function from `tasks.py`.

        # TODO: PLEASE HELP I'm going mad because of the validation logic for these
        # fields!!
        if not self.train_task_schedule:
            self.train_task_schedule = self.create_train_task_schedule()
        elif max(self.train_task_schedule) == len(self.train_task_schedule) - 1:
            # If the keys correspond to the task ids rather than the steps:
            if self.nb_tasks in [defaults["nb_tasks"], None]:
                self.nb_tasks = len(self.train_task_schedule) - 1
                if self.nb_tasks < 1:
                    raise RuntimeError(f"Need at least 2 entries in the task schedule!")
                logger.info(
                    f"Assuming that the last entry in the provided task schedule is "
                    f"the final state, and that there are {self.nb_tasks} tasks. "
                )
            self.train_steps_per_task = (
                self.train_steps_per_task or self.train_max_steps // self.nb_tasks
            )
            new_keys = np.linspace(
                0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
            ).tolist()
            assert len(new_keys) == len(self.train_task_schedule)
            self.train_task_schedule = type(self.train_task_schedule)(
                {
                    new_key: self.train_task_schedule[old_key]
                    for new_key, old_key in zip(new_keys, sorted(self.train_task_schedule.keys()))
                }
            )
        elif self.smooth_task_boundaries:
            # We have a task schedule for Continual RL.
            if self.train_max_steps == defaults["train_max_steps"]:
                self.train_max_steps = max(self.train_task_schedule)

        if self.smooth_task_boundaries:
            # NOTE: Need to have an entry at the final step
            last_task_step = max(self.train_task_schedule.keys())
            last_task = self.train_task_schedule[last_task_step]
            if self.train_max_steps not in self.train_task_schedule:
                # FIXME Duplicating the last task for now?
                self.train_task_schedule[self.train_max_steps] = last_task

        if 0 not in self.train_task_schedule.keys():
            raise RuntimeError(
                "`train_task_schedule` needs an entry at key 0, as the initial state"
            )
        if self.train_max_steps != max(self.train_task_schedule):
            if self.train_max_steps in [defaults["train_max_steps"], None]:
                # TODO: This might be wrong no?
                self.train_max_steps = max(self.train_task_schedule)
                logger.info(f"Setting `train_max_steps` to {self.train_max_steps}")
            elif self.smooth_task_boundaries:
                raise RuntimeError(
                    f"For now, the train task schedule needs to have a value at key "
                    f"`train_max_steps` ({self.train_max_steps})."
                )
            else:
                last_task_step = max(self.train_task_schedule)
                last_task = self.train_task_schedule[last_task_step]
                logger.debug("Using the last task as the final state.")
                self.train_task_schedule[self.train_max_steps] = last_task

        if not self.val_task_schedule:
            # Avoid creating an additional env, just reuse the train_temp_env.
            self._temp_val_env = (
                self._temp_train_env
                if self.val_dataset == self.train_dataset
                else self._make_env(self.val_dataset)
            )
            self.val_task_schedule = self.create_val_task_schedule()
        elif max(self.val_task_schedule) == len(self.val_task_schedule) - 1:
            # If the keys correspond to the task ids rather than the transition steps
            expected_nb_tasks = len(self.val_task_schedule)
            old_keys = sorted(self.val_task_schedule.keys())
            new_keys = np.linspace(
                0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
            ).tolist()
            assert len(new_keys) == len(self.train_task_schedule)
            self.val_task_schedule = type(self.val_task_schedule)(
                {
                    new_key: self.val_task_schedule[old_key]
                    for new_key, old_key in zip(new_keys, old_keys)
                }
            )

        if not self.test_task_schedule:
            self._temp_test_env = (
                self._temp_train_env
                if self.test_dataset == self.train_dataset
                else self._make_env(self.val_dataset)
            )
            self.test_task_schedule = self.create_test_task_schedule()
        elif max(self.test_task_schedule) == len(self.test_task_schedule) - 1:
            # If the keys correspond to the task ids rather than the transition steps
            old_keys = sorted(self.test_task_schedule.keys())
            new_keys = np.linspace(
                0, self.test_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
            ).tolist()
            self.test_task_schedule = type(self.test_task_schedule)(
                {
                    new_key: self.test_task_schedule[old_key]
                    for new_key, old_key in zip(new_keys, old_keys)
                }
            )
        if 0 not in self.test_task_schedule.keys():
            raise RuntimeError("`test_task_schedule` needs an entry at key 0, as the initial state")
        if self.test_max_steps != max(self.test_task_schedule):
            if self.test_max_steps == defaults["test_max_steps"]:
                self.test_max_steps = max(self.test_task_schedule)
                logger.info(f"Setting `test_max_steps` to {self.test_max_steps}")
            elif self.smooth_task_boundaries:
                raise RuntimeError(
                    f"For now, the test task schedule needs to have a value at key "
                    f"`test_max_steps` ({self.test_max_steps}). "
                )

        # Close the temporary environments.
        # NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting.

        if self._temp_train_env:
            # self._temp_train_env.close()
            pass
        if self._temp_val_env and self._temp_val_env is not self._temp_train_env:
            # self._temp_val_env.close()
            pass
        if self._temp_test_env and self._temp_test_env is not self._temp_train_env:
            # self._temp_test_env.close()
            pass

        train_task_lengths: List[int] = [
            task_b_step - task_a_step
            for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys()))
        ]
        # TODO: This will crash if nb_tasks is 1, right?
        # train_max_steps = train_last_boundary + train_task_lengths[-1]
        test_task_lengths: List[int] = [
            task_b_step - task_a_step
            for task_a_step, task_b_step in pairwise(sorted(self.test_task_schedule.keys()))
        ]

        if not (
            len(self.train_task_schedule)
            == len(self.test_task_schedule)
            == len(self.val_task_schedule)
        ):
            raise RuntimeError(
                "Training, validation and testing task schedules should have the same "
                "number of items for now."
            )

        train_last_boundary = max(set(self.train_task_schedule.keys()) - {self.train_max_steps})
        test_last_boundary = max(set(self.test_task_schedule.keys()) - {self.test_max_steps})

        # TODO: Really annoying validation logic for these fields needs to be simplified
        # somehow.
        # if self.train_steps_per_task is None:
        #     # if self.nb_tasks
        #     train_steps_per_task = self.train_max_steps // self.nb_tasks
        #     if self.train_task_schedule:
        #         task_lengths = [
        #             b - a for a, b in pairwise(self.train_task_schedule.keys())
        #         ]
        #         if any(
        #             abs(task_length - train_steps_per_task) > 1
        #             for task_length in task_lengths
        #         ):
        #             raise RuntimeError(
        #                 f"Trying to set a value for `train_steps_per_task`, but "
        #                 f"the keys of the task schedule are either uneven, or not "
        #                 f"equal to {train_steps_per_task}: "
        #                 f"task schedule keys: {self.train_task_schedule.keys()}"
        #             )
        #     self.train_steps_per_task = train_steps_per_task

        # FIXME: This is quite confusing:
        expected_nb_tasks = len(self.train_task_schedule) - 1
        # if (
        #     self.train_max_steps not in [defaults["train_max_steps"], None]
        #     and self.train_max_steps == max(self.train_task_schedule)
        # ) or self.smooth_task_boundaries:
        #     expected_nb_tasks -= 1

        if self.nb_tasks != expected_nb_tasks:
            if self.nb_tasks in [None, defaults["nb_tasks"]]:
                assert len(self.train_task_schedule) == len(self.test_task_schedule)
                self.nb_tasks = len(self.train_task_schedule) - 1
                logger.info(f"`nb_tasks` set to {self.nb_tasks} based on the task schedule")
            else:
                raise RuntimeError(
                    f"The passed number of tasks ({self.nb_tasks}) is inconsistent "
                    f"with train_max_steps ({self.train_max_steps}) and the "
                    f"passed task schedule (with keys "
                    f"{self.train_task_schedule.keys()}): "
                    f"Expected nb_tasks to be None or {expected_nb_tasks}."
                )

        if not train_task_lengths:
            assert not test_task_lengths
            assert expected_nb_tasks == 1
            assert self.train_max_steps > 0
            assert self.test_max_steps > 0
            train_max_steps = self.train_max_steps
            test_max_steps = self.test_max_steps
        else:
            train_max_steps = sum(train_task_lengths)
            test_max_steps = sum(test_task_lengths)
            # train_max_steps = round(train_last_boundary + train_task_lengths[-1])
            # test_max_steps = round(test_last_boundary + test_task_lengths[-1])

        if self.train_max_steps != train_max_steps:
            if self.train_max_steps == defaults["train_max_steps"]:
                self.train_max_steps = train_max_steps
            else:
                raise RuntimeError(
                    f"Value of train_max_steps ({self.train_max_steps}) is "
                    f"inconsistent with the given train task schedule, which has "
                    f"the last task boundary at step {train_last_boundary}, with "
                    f"task lengths of {train_task_lengths}, as it suggests the maximum "
                    f"total number of steps to be {train_last_boundary} + "
                    f"{train_task_lengths[-1]} => {train_max_steps}!"
                )
        if self.test_max_steps != test_max_steps:
            if self.test_max_steps == defaults["test_max_steps"]:
                self.test_max_steps = test_max_steps
            else:
                raise RuntimeError(
                    f"Value of test_max_steps ({self.test_max_steps}) is "
                    f"inconsistent with the given test task schedule (which has keys "
                    f"{self.test_task_schedule.keys()}). Expected the last key to be "
                    f"{test_max_steps}"
                )

        if self.train_steps_per_task is None:
            self.train_steps_per_task = self.train_max_steps // self.nb_tasks
        # TODO: Fix these annoying interactions once and for all.
        assert self.train_max_steps // self.nb_tasks == self.train_steps_per_task, (
            self.train_max_steps,
            self.nb_tasks,
            self.train_steps_per_task,
            self.train_task_schedule.keys(),
        )

        if self.test_steps_per_task is None:
            self.test_steps_per_task = self.test_max_steps // self.nb_tasks
        assert self.test_max_steps // self.nb_tasks == self.test_steps_per_task, (
            self.test_max_steps,
            self.nb_tasks,
            self.test_steps_per_task,
            self.test_task_schedule.keys(),
        )

    def create_train_task_schedule(self) -> TaskSchedule:
        # change_steps = [0, self.train_max_steps]
        # Ex: nb_tasks == 5, train_max_steps = 10_000:
        # change_steps = [0, 2_000, 4_000, 6_000, 8_000, 10_000]
        if self.train_steps_per_task is not None:
            train_max_steps = self.train_steps_per_task * self.nb_tasks
            # if self.smooth_task_boundaries:
            #     train_max_steps = self.train_steps_per_task * self.nb_tasks
            # else:
            #     train_max_steps = self.train_steps_per_task * self.nb_tasks
        else:
            train_max_steps = self.train_max_steps
            assert self.nb_tasks is not None

        task_schedule_keys = np.linspace(
            0, train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
        ).tolist()
        return self.create_task_schedule(
            temp_env=self._temp_train_env,
            change_steps=task_schedule_keys,
            # # TODO: Add properties for the train/valid/test seeds?
            seed=self.config.seed if self.config else 123,
        )

    def create_val_task_schedule(self) -> TaskSchedule:
        # Always the same as train task schedule for now.
        return self.train_task_schedule.copy()

    def create_test_task_schedule(self) -> TaskSchedule[ContinuousTask]:
        # Re-scale the steps in the task schedule based on self.test_max_steps
        # NOTE: Using the same task schedule as in training and validation for now.
        if self.train_task_schedule:
            nb_tasks = len(self.train_task_schedule) - 1
        else:
            nb_tasks = self.nb_tasks
        # TODO: Do we want to re-allow the `test_steps_per_task` argument?
        if self.test_steps_per_task is not None:
            test_max_steps = self.test_steps_per_task * nb_tasks
        else:
            test_max_steps = self.test_max_steps
        test_task_schedule_keys = np.linspace(
            0, test_max_steps, nb_tasks + 1, endpoint=True, dtype=int
        ).tolist()
        return {
            step: task
            for step, task in zip(test_task_schedule_keys, self.train_task_schedule.values())
        }

    def create_task_schedule(
        self,
        temp_env: gym.Env,
        change_steps: List[int],
        seed: int = None,
    ) -> Dict[int, Dict]:
        """Create the task schedule, which maps from a step to the changes that
        will occur in the environment when that step is reached.

        Uses the provided `temp_env` to generate the random tasks at the steps
        given in `change_steps` (a list of integers).

        Returns a dictionary mapping from integers (the steps) to the changes
        that will occur in the env at that step.

        TODO: For now in ContinualRL we use an interpolation of a dict of attributes
        to be set on the unwrapped env, but in IncrementalRL it is possible to pass
        callables to be applied on the environment at a given timestep.
        """
        task_schedule: Dict[int, Dict] = {}
        # TODO: Make it possible to use something other than steps as keys in the task
        # schedule, something like a NamedTuple[int, DeltaType], e.g. Episodes(10) or Steps(10)
        # something like that!
        # IDEA: Even fancier, we could use a TimeDelta to say "do one hour of task 0"!!
        for step in change_steps:
            # TODO: Pass wether its for training/validation/testing?
            task = type(self)._task_sampling_function(
                temp_env,
                step=step,
                change_steps=change_steps,
                seed=seed,
            )
            task_schedule[step] = task

        return task_schedule

    @property
    def observation_space(self) -> TypedDictSpace:
        """The un-batched observation space, based on the choice of dataset and
        the transforms at `self.transforms` (which apply to the train/valid/test
        environments).

        The returned spaces is a TypedDictSpace, with the following properties/items:
        - `x`: observation space (e.g. `Image` space)
        - `task_labels`: Union[Discrete, Sparse[Discrete]]
           The task labels for each sample when task labels are available,
           otherwise the task labels space is `Sparse`, and entries will be `None`.
        """
        # TODO: Is it right that we set the observation space on the Setting to be the
        # observation space of the current train environment?
        # In what situation could there be any difference between those?
        # - Changing the 'transforms' attributes after training?
        # if self.train_env is not None:
        #     # assert self._observation_space == self.train_env.observation_space
        #     return self.train_env.observation_space
        if isinstance(self._temp_train_env.observation_space, TypedDictSpace):
            x_space = self._temp_train_env.observation_space.x
            task_label_space = self._temp_train_env.observation_space.task_labels
        else:
            x_space = self._temp_train_env.observation_space
            # apply the transforms to the observation space.
            for transform in self.transforms:
                x_space = transform(x_space)
            task_label_space = self.task_label_space

        done_space = spaces.Box(0, 1, shape=(), dtype=bool)
        if not self.add_done_to_observations:
            done_space = Sparse(done_space, sparsity=1)

        observation_space = TypedDictSpace(
            x=x_space,
            task_labels=task_label_space,
            done=done_space,
            dtype=self.Observations,
        )

        if self.prefer_tensors:
            observation_space = add_tensor_support(observation_space)
        assert isinstance(observation_space, TypedDictSpace)
        return observation_space

    @property
    def task_label_space(self) -> gym.Space:
        # TODO: Explore an alternative design for the task sampling, based more around
        # gym spaces rather than the generic function approach that's currently used?
        # FIXME: This isn't really elegant, there isn't a `nb_tasks` attribute on the
        # ContinualRLSetting anymore, so we have to do a bit of a hack.. Would be
        # cleaner to maybe put this in the assumption class, under
        # `self.task_label_space`?
        task_label_space = spaces.Box(0.0, 1.0, shape=())
        if not self.task_labels_at_train_time or not self.task_labels_at_test_time:
            sparsity = 1
            if self.task_labels_at_train_time ^ self.task_labels_at_test_time:
                # We have task labels "50%" of the time, ish:
                sparsity = 0.5
            task_label_space = Sparse(task_label_space, sparsity=sparsity)
        return task_label_space

    @property
    def action_space(self) -> gym.Space:
        # TODO: Convert the action/reward spaces so they also use TypedDictSpace (even
        # if they just have one item), so that it correctly reflects the objects that
        # the envs accept.
        y_pred_space = self._temp_train_env.action_space
        # action_space = TypedDictSpace(y_pred=y_pred_space, dtype=self.Actions)
        return y_pred_space

    @property
    def reward_space(self) -> gym.Space:
        reward_range = self._temp_train_env.reward_range
        return getattr(
            self._temp_train_env,
            "reward_space",
            spaces.Box(reward_range[0], reward_range[1], shape=()),
        )

    def apply(self, method: Method, config: Config = None) -> "ContinualRLSetting.Results":
        """Apply the given method on this setting to producing some results."""
        # Use the supplied config, or parse one from the arguments that were
        # used to create `self`.
        self.config = config or self._setup_config(method)
        logger.debug(f"Config: {self.config}")

        # TODO: Test to make sure that this doesn't cause any other bugs with respect to
        # the display of stuff:
        # Call this method, which creates a virtual display if necessary.
        self.config.get_display()

        # TODO: Should we really overwrite the method's 'config' attribute here?
        if not getattr(method, "config", None):
            method.config = self.config

        # TODO: Remove `Setting.configure(method)` entirely, from everywhere,
        # and use the `prepare_data` or `setup` methods instead (since these
        # `configure` methods aren't using the `method` anyway.)
        method.configure(setting=self)

        # BUG This won't work if the task schedule uses callables as the values (as
        # they aren't json-serializable.)
        if self.stationary_context:
            logger.info(
                "Train tasks: " + json.dumps(list(self.train_task_schedule.values()), indent="\t")
            )
        else:
            try:
                logger.info(
                    "Train task schedule:" + json.dumps(self.train_task_schedule, indent="\t")
                )
                # BUG: Sometimes the task schedule isnt json-serializable!
            except TypeError:
                logger.info("Train task schedule: ")
                for key, value in self.train_task_schedule.items():
                    logger.info(f"{key}: {value}")

        if self.config.debug:
            logger.debug("Test task schedule:" + json.dumps(self.test_task_schedule, indent="\t"))

        # Run the Training loop (which is defined in ContinualAssumption).
        results = self.main_loop(method)

        logger.info("Results summary:")
        logger.info(results.to_log_dict())
        logger.info(results.summary())
        method.receive_results(self, results=results)
        return results

        # Run the Test loop (which is defined in IncrementalAssumption).
        # results: RlResults = self.test_loop(method)

    def setup(self, stage: str = None) -> None:
        # Called before the start of each task during training, validation and
        # testing.
        super().setup(stage=stage)
        if stage in {"fit", None}:
            self.train_wrappers = self.create_train_wrappers()
        if stage in {"validate", None}:
            self.valid_wrappers = self.create_valid_wrappers()
        elif stage in {"test", None}:
            self.test_wrappers = self.create_test_wrappers()

    def prepare_data(self, *args, **kwargs) -> None:
        # We don't really download anything atm.
        if self.config is None:
            self.config = Config()
        super().prepare_data(*args, **kwargs)

    def train_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> ActiveEnvironment:
        """Create a training gym.Env/DataLoader for the current task.

        Parameters
        ----------
        batch_size : int, optional
            The batch size, which in this case is the number of environments to
            run in parallel. When `None`, the env won't be vectorized. Defaults
            to None.
        num_workers : int, optional
            The number of workers (processes) to use in the vectorized env. When
            None, the envs are run in sequence, which could be very slow. Only
            applies when `batch_size` is not None. Defaults to None.

        Returns
        -------
        GymDataLoader
            A (possibly vectorized) environment/dataloader for the current task.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        # NOTE: We actually want to call setup every time, so we re-create the
        # wrappers for each task.
        self.setup("fit")

        batch_size = batch_size or self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers
        train_seed = self.config.seed if self.config else None
        env_factory = partial(
            self._make_env,
            base_env=self.train_dataset,
            wrappers=self.train_wrappers,
            **self.base_env_kwargs,
        )
        env_dataloader = self._make_env_dataloader(
            env_factory,
            batch_size=batch_size,
            num_workers=num_workers,
            max_steps=self.steps_per_phase,
            max_episodes=self.train_max_episodes,
            seed=train_seed,
        )

        if self.monitor_training_performance:
            # NOTE: It doesn't always make sense to log stuff with the current task ID!
            wandb_prefix = "Train"
            if self.known_task_boundaries_at_train_time:
                wandb_prefix += f"/Task {self.current_task_id}"
            env_dataloader = MeasureRLPerformanceWrapper(env_dataloader, wandb_prefix=wandb_prefix)

        if self.config.render and batch_size is None:
            env_dataloader = RenderEnvWrapper(env_dataloader)

        self.train_env = env_dataloader
        # BUG: There is a mismatch between the train env's observation space and the
        # shape of its observations.
        # self.observation_space = self.train_env.observation_space

        return self.train_env

    def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> Environment:
        """Create a validation gym.Env/DataLoader for the current task.

        Parameters
        ----------
        batch_size : int, optional
            The batch size, which in this case is the number of environments to
            run in parallel. When `None`, the env won't be vectorized. Defaults
            to None.
        num_workers : int, optional
            The number of workers (processes) to use in the vectorized env. When
            None, the envs are run in sequence, which could be very slow. Only
            applies when `batch_size` is not None. Defaults to None.

        Returns
        -------
        GymDataLoader
            A (possibly vectorized) environment/dataloader for the current task.
        """
        if not self.has_prepared_data:
            self.prepare_data()

        # Need to force this to happen every time, because the wrappers might change
        # between tasks.
        self._has_setup_validate = False
        self.setup("validate")

        env_factory = partial(
            self._make_env,
            base_env=self.val_dataset,
            wrappers=self.valid_wrappers,
            **self.base_env_kwargs,
        )
        valid_seed = self.config.seed if self.config else None
        env_dataloader = self._make_env_dataloader(
            env_factory,
            batch_size=batch_size or self.batch_size,
            num_workers=num_workers if num_workers is not None else self.num_workers,
            max_steps=self.steps_per_phase,
            # TODO: Create a new property to limit validation episodes?
            max_episodes=self.train_max_episodes,
            seed=valid_seed,
        )

        if self.monitor_training_performance:
            # NOTE: We also add it here, just so it logs metrics to wandb.
            # NOTE: It doesn't always make sense to log stuff with the current task ID!
            wandb_prefix = "Valid"
            if self.known_task_boundaries_at_train_time:
                wandb_prefix += f"/Task {self.current_task_id}"
            env_dataloader = MeasureRLPerformanceWrapper(env_dataloader, wandb_prefix=wandb_prefix)

        self.val_env = env_dataloader
        return self.val_env

    def test_dataloader(self, batch_size: int = None, num_workers: int = None) -> TestEnvironment:
        """Create the test 'dataloader/gym.Env' for all tasks.

        NOTE: This test environment isn't just for the current task, it actually
        contains the sequence of all tasks. This is different than the train or
        validation environments, since if the task labels are available at train
        time, then calling train/valid_dataloader` returns the envs for the
        current task only, and the `.fit` method is called once per task.

        This environment is also different in that it is wrapped with a Monitor,
        which we might eventually use to save the results/gifs/logs of the
        testing runs.

        Parameters
        ----------
        batch_size : int, optional
            The batch size, which in this case is the number of environments to
            run in parallel. When `None`, the env won't be vectorized. Defaults
            to None.
        num_workers : int, optional
            The number of workers (processes) to use in the vectorized env. When
            None, the envs are run in sequence, which could be very slow. Only
            applies when `batch_size` is not None. Defaults to None.

        Returns
        -------
        TestEnvironment
            A testing environment which keeps track of the performance of the
            actor and accumulates logs/statistics that are used to eventually
            create the 'Result' object.
        """
        if not self.has_prepared_data:
            self.prepare_data()
        # NOTE: New for PL: The call doesn't go through if self._has_setup_test is True
        # Need to force this to happen every time, because the wrappers might change
        # between tasks.
        self._has_setup_test = False
        self.setup("test")
        # BUG: gym.wrappers.Monitor doesn't want to play nice when applied to
        # Vectorized env, it seems..
        # FIXME: Remove this when the Monitor class works correctly with
        # batched environments.
        batch_size = batch_size or self.batch_size
        if batch_size is not None:
            logger.warning(
                UserWarning(
                    colorize(
                        f"WIP: Only support batch size of `None` (i.e., a single env) "
                        f"for the test environments of RL Settings at the moment, "
                        f"because the Monitor class from gym doesn't work with "
                        f"VectorEnvs. (batch size was {batch_size})",
                        "yellow",
                    )
                )
            )
            batch_size = None

        num_workers = num_workers if num_workers is not None else self.num_workers
        test_seed = self.config.seed if self.config else None

        env_factory = partial(
            self._make_env,
            base_env=self.test_dataset,
            wrappers=self.test_wrappers,
            **self.base_env_kwargs,
        )
        # TODO: Pass the max_steps argument to this `_make_env_dataloader` method,
        # rather than to a `step_limit` on the TestEnvironment.
        env_dataloader = self._make_env_dataloader(
            env_factory,
            batch_size=batch_size,
            num_workers=num_workers,
        )
        if self.test_max_episodes is not None:
            raise NotImplementedError(f"TODO: Use `self.test_max_episodes`")

        test_loop_max_steps = self.test_max_steps // (batch_size or 1)
        # TODO: Find where to configure this 'test directory' for the outputs of
        # the Monitor.
        if wandb.run:
            test_dir = wandb.run.dir
        else:
            test_dir = self.config.log_dir

        # TODO: Split this up into an ActionLimit wrapper, a RecordVideo wrapper,
        # and a RecordEpisodeStatistics wrapper.
        self.test_env = self.TestEnvironment(
            env_dataloader,
            task_schedule=self.test_task_schedule,
            directory=test_dir,
            step_limit=test_loop_max_steps,
            config=self.config,
            force=True,
            video_callable=None if wandb.run or self.config.render else False,
        )
        self.test_env.seed(seed=test_seed)
        self.test_env.action_space.seed(seed=test_seed)
        self.test_env.observation_space.seed(seed=test_seed)
        return self.test_env

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        In the case of ContinualRL and DiscreteTaskAgnosticRL, fit is only called once,
        with an environment that shifts between all the tasks. In IncrementalRL, fit is
        called once per task, while in TraditionalRL and MultiTaskRL, fit is called
        once.
        """
        return 1

    @property
    def steps_per_phase(self) -> Optional[int]:
        """Returns the number of steps per training "phase", i.e. the max number of
        (steps for now) that can be taken in the training environment passed to
        `Method.fit`

        In most settings, this is the same as `steps_per_task`.

        Returns
        -------
        Optional[int]
            `None` if `max_steps` is None, else `max_steps // phases`.
        """
        return None if self.train_max_steps is None else self.train_max_steps // self.phases

    @staticmethod
    def _make_env(
        base_env: Union[str, gym.Env, Callable[[], gym.Env]],
        wrappers: List[Callable[[gym.Env], gym.Env]] = None,
        **base_env_kwargs: Dict,
    ) -> gym.Env:
        """Helper function to create a single (non-vectorized) environment."""
        env: gym.Env
        if isinstance(base_env, str):
            env = gym.make(base_env, **base_env_kwargs)
        elif isinstance(base_env, gym.Env):
            env = base_env
        elif callable(base_env):
            env = base_env(**base_env_kwargs)
        else:
            raise RuntimeError(
                f"base_env should either be a string, a callable, or a gym "
                f"env. (got {base_env})."
            )
        wrappers = wrappers or []
        for wrapper in wrappers:
            env = wrapper(env)
        return env

    def _make_env_dataloader(
        self,
        env_factory: Callable[[], gym.Env],
        batch_size: Optional[int],
        num_workers: Optional[int] = None,
        seed: Optional[int] = None,
        max_steps: Optional[int] = None,
        max_episodes: Optional[int] = None,
    ) -> GymDataLoader:
        """Helper function for creating a (possibly vectorized) environment."""
        logger.debug(f"batch_size: {batch_size}, num_workers: {num_workers}, seed: {seed}")

        env: Union[gym.Env, gym.vector.VectorEnv]
        if batch_size is None:
            env = env_factory()
        else:
            env = make_batched_env(
                env_factory,
                batch_size=batch_size,
                num_workers=num_workers,
                # TODO: Still debugging shared memory + custom spaces (e.g. Sparse).
                shared_memory=False,
            )
        if max_steps:
            env = ActionLimit(env, max_steps=max_steps)
        if max_episodes:
            env = EpisodeLimit(env, max_episodes=max_episodes)

        # Apply the "post-batch" wrappers:
        # from sequoia.common.gym_wrappers import ConvertToFromTensors
        # TODO: Only the BaseMethod requires this, we should enable it only
        # from the BaseMethod, and leave it 'off' by default.
        if self.add_done_to_observations:
            env = AddDoneToObservation(env)

        if self.prefer_tensors and self.config.device:
            # TODO: Put this before or after the image transforms?
            env = TransformObservation(env, f=partial(move, device=self.config.device))
            env = TransformReward(env, f=partial(move, device=self.config.device))
        # # Convert the samples to tensors and move them to the right device.
        # env = ConvertToFromTensors(env)
        # env = ConvertToFromTensors(env, device=self.config.device)
        # Add a wrapper that converts numpy arrays / etc to Observations/Rewards
        # and from Actions objects to numpy arrays.
        env = TypedObjectsWrapper(
            env,
            observations_type=self.Observations,
            rewards_type=self.Rewards,
            actions_type=self.Actions,
        )
        # Create an IterableDataset from the env using the EnvDataset wrapper.
        dataset = EnvDataset(env)

        # Create a GymDataLoader for the EnvDataset.
        env_dataloader = GymDataLoader(dataset)

        if batch_size and seed:
            # Seed each environment with its own seed (based on the base seed).
            env.seed([seed + i for i in range(env_dataloader.num_envs)])
        else:
            env.seed(seed)
            env.action_space.seed(seed)
            env.observation_space.seed(seed)

        return env_dataloader

    def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Get the list of wrappers to add to each training environment.

        The result of this method must be pickleable when using
        multiprocessing.

        Returns
        -------
        List[Callable[[gym.Env], gym.Env]]
            [description]
        """
        # We add a restriction to prevent users from getting data from
        # previous or future tasks.
        # NOTE: This assumes that tasks all have the same length.
        return self._make_wrappers(
            base_env=self.train_dataset,
            task_schedule=self.train_task_schedule,
            # TODO: Removing this, but we have to check that it doesn't change when/how
            # the task boundaries are given to the Method.
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.train_transforms,
            starting_step=0,
            max_steps=self.train_max_steps,
            new_random_task_on_reset=self.stationary_context,
        )

    def create_valid_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Get the list of wrappers to add to each validation environment.

        The result of this method must be pickleable when using
        multiprocessing.

        Returns
        -------
        List[Callable[[gym.Env], gym.Env]]
            [description]

        TODO: Decide how this 'validation' environment should behave in
        comparison with the train and test environments.
        """
        return self._make_wrappers(
            base_env=self.val_dataset,
            task_schedule=self.val_task_schedule,
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.val_transforms,
            starting_step=0,
            # TODO: Should there be a limit on the validation steps/episodes?
            max_steps=self.train_max_steps,
            new_random_task_on_reset=self.stationary_context,
        )

    def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Get the list of wrappers to add to a single test environment.

        The result of this method must be pickleable when using
        multiprocessing.

        Returns
        -------
        List[Callable[[gym.Env], gym.Env]]
            [description]
        """
        return self._make_wrappers(
            base_env=self.test_dataset,
            task_schedule=self.test_task_schedule,
            # sharp_task_boundaries=self.known_task_boundaries_at_test_time,
            task_labels_available=self.task_labels_at_test_time,
            transforms=self.transforms + self.test_transforms,
            starting_step=0,
            max_steps=self.test_max_steps,
            new_random_task_on_reset=self.stationary_context,
        )

    def _make_wrappers(
        self,
        base_env: Union[str, gym.Env, Callable[[], gym.Env]],
        task_schedule: Dict[int, Dict],
        # sharp_task_boundaries: bool,
        task_labels_available: bool,
        transforms: List[Transforms] = None,
        starting_step: int = None,
        max_steps: int = None,
        new_random_task_on_reset: bool = False,
    ) -> List[Callable[[gym.Env], gym.Env]]:
        """helper function for creating the train/valid/test wrappers.

        These wrappers get applied *before* the batching, if applicable.
        """
        wrappers: List[Callable[[gym.Env], gym.Env]] = []

        # TODO: Add some kind of Wrapper around the dataset to make it
        # semi-supervised?

        if self.max_episode_steps:
            wrappers.append(partial(TimeLimit, max_episode_steps=self.max_episode_steps))

        # NOTE: Removing this 'ActionLimit' from the 'pre-batch' wrappers.
        # wrappers.append(partial(ActionLimit, max_steps=max_steps))

        # if is_classic_control_env(base_env):
        # If we are in a classic control env, and we dont want the state to
        # be fully-observable (i.e. we want pixel observations rather than
        # getting the pole angle, velocity, etc.), then add the
        # PixelObservation wrapper to the list of wrappers.
        # if self.force_pixel_observations:
        #     wrappers.append(PixelObservationWrapper)

        # TODO: Temporary fix for the `is_atari_env` function, which is used to check if the env
        # needs a `AtariPreprocessing` wrapper added.
        if isinstance(base_env, (str, gym.Env)) and is_atari_env(base_env):
            # TODO: Figure out the differences (if there are any) between the
            # AtariWrapper from SB3 and the AtariPreprocessing wrapper from gym.
            wrappers.append(GymAtariWrapper)

        if transforms:
            # Apply image transforms if the env will have image-like obs space
            # Wrapper to 'wrap' the observation space into an Image space (subclass of
            # Box with useful fields like `c`, `h`, `w`, etc.)
            wrappers.append(ImageObservations)
            # Wrapper to apply the image transforms to the env.
            wrappers.append(partial(TransformObservation, f=transforms))

        if task_schedule is not None:
            # Add a wrapper which will add non-stationarity to the environment.
            # The "task" transitions will either be sharp or smooth.
            # In either case, the task ids for each sample are added to the
            # observations, and the dicts containing the task information (e.g. the
            # current values of the env attributes from the task schedule) get added
            # to the 'info' dicts.
            nb_tasks = None
            if self.smooth_task_boundaries:
                # Add a wrapper that creates smooth tasks.
                cl_wrapper = SmoothTransitions
            else:
                assert self.nb_tasks >= 1
                # Add a wrapper that creates sharp tasks.
                # NOTE: The naming here is less than ideal! This isn't "multi-task" as-in stationary
                # by default. It just means an env which can do multiple tasks. However, when the
                # `new_random_task_on_reset` argument is set, then it does sample tasks IID.
                cl_wrapper = MultiTaskEnvironment
                nb_tasks = self.nb_tasks

            assert starting_step is not None
            assert max_steps is not None
            wrappers.append(
                partial(
                    cl_wrapper,
                    noise_std=self.task_noise_std,
                    task_schedule=task_schedule,
                    add_task_id_to_obs=True,
                    add_task_dict_to_info=False,
                    starting_step=starting_step,
                    nb_tasks=nb_tasks,
                    new_random_task_on_reset=new_random_task_on_reset,
                    max_steps=max_steps,
                )
            )
            # If the task labels aren't available, we then add another wrapper that
            # hides that information (setting both of them to None) and also marks
            # those spaces as `Sparse`.
            if not task_labels_available:
                # NOTE: This sets the task labels to None, rather than removing
                # them entirely.
                # wrappers.append(RemoveTaskLabelsWrapper)
                wrappers.append(HideTaskLabelsWrapper)

        return wrappers

    def _get_objective_scaling_factor(self) -> float:
        """Return the factor to be multiplied with the mean reward per episode
        in order to produce a 'performance score' between 0 and 1.

        Returns
        -------
        float
            The scaling factor to use.
        """
        # TODO: remove this, currently used just so we can get a 'scaling factor' to use
        # to scale the 'mean reward per episode' to a score between 0 and 1.
        # TODO: Add other environments, for instance 1/200 for cartpole.
        # TODO: Rework this so its based on the reward threshold!
        max_reward_per_episode = 1
        if isinstance(self.dataset, str) and self.dataset.startswith("MetaMonsterKong"):
            max_reward_per_episode = 100
        elif isinstance(self.dataset, str) and self.dataset == "CartPole-v0":
            max_reward_per_episode = 200
        else:
            warnings.warn(
                RuntimeWarning(
                    f"Unable to determine the right scaling factor to use for dataset "
                    f"{self.dataset} when calculating the performance score! "
                    f"The CL Score of this run will most probably not be accurate."
                )
            )
        return 1 / max_reward_per_episode

    def _get_simple_name(self, env_name_or_id: str) -> Optional[str]:
        """Returns the 'simple name' for the given environment ID.
        For example, when passed "CartPole-v0", returns "cartpole".

        When not found, returns None.
        """
        if env_name_or_id in self.available_datasets.keys():
            return env_name_or_id

        if env_name_or_id in self.available_datasets.values():
            simple_name: str = [
                k for k, v in self.available_datasets.items() if v == env_name_or_id
            ][0]
            return simple_name
        return None


def _load_task_schedule(file_path: Path) -> Dict[int, Dict]:
    """Load a task schedule from the given path."""
    with open(file_path) as f:
        task_schedule = json.load(f)
        return {int(k): task_schedule[k] for k in sorted(task_schedule.keys())}


if __name__ == "__main__":
    ContinualRLSetting.main()


def find_matching_dataset(
    available_datasets: Dict[str, Union[str, Any]], dataset: str
) -> Optional[Union[str, Any]]:
    """Compares `dataset` with the keys in the `available_datasets` dict and return the
    value of the matching key if found, else returns None.
    """
    if dataset in available_datasets:
        return available_datasets[dataset]

    if not isinstance(dataset, str):
        raise NotImplementedError(dataset)

    chosen_env_name, _, chosen_version = dataset.partition("-v")
    for key, env_id in available_datasets.items():
        if dataset == key:
            assert False, "this should be reached, since we do that check above"

        env_name, _, env_version = key.partition("-v")
        if chosen_version:
            # chosen: half_cheetah
            # key: HalfCheetah-v2
            # HalfCheetah-v2
            # halfcheetah-v2
            # half_cheetah_v2
            if chosen_version != env_version:
                continue
            if names_match(chosen_env_name, env_name):
                return env_id
        elif names_match(chosen_env_name, env_name):
            # Look for matching entries with that name, and select the highest
            # available version.
            datasets_with_that_name = {
                other_key: other_env_id
                for other_key, other_env_id in available_datasets.items()
                if names_match(chosen_env_name, other_key.partition("-v")[0])
            }
            if len(datasets_with_that_name) == 1:
                return env_id
            versions = {
                other_key: int(other_key.partition("-v")[-1])
                for other_key in datasets_with_that_name
            }
            return max(datasets_with_that_name, key=versions.get)

    closest_matches = difflib.get_close_matches(dataset, available_datasets)
    if closest_matches:
        closest_match_key: str = closest_matches[0]
        closest_match: Union[str, Any] = available_datasets[closest_match_key]
        if chosen_version:
            # Find the 'version' number of the closest match, and check that it fits.
            closest_match_version = closest_match_key.partition("-v")[-1]
            if not closest_match_version:
                assert isinstance(closest_match, str)
                closest_match_version = closest_match.partition("-v")[-1]

            if chosen_version == closest_match_version:
                return closest_match

            raise gym.error.UnregisteredEnv(
                f"Can't find any matching entries for chosen dataset {dataset} "
                f"with that same version (closest entries: {closest_matches}) "
            )

        warnings.warn(
            RuntimeWarning(
                f"Can't find matching entry for chosen dataset {dataset}, using "
                f"closest match: {closest_match}"
            )
        )
        return closest_match
        # raise RuntimeError(f"Can't find any matching entries for chosen dataset {dataset}. "
        #                 f"Closest entries: {closest_matches}")

    raise gym.error.UnregisteredEnv(
        f"Can't find any matching entries for chosen dataset {dataset}."
    )
    # assert False, (dataset, closest_matches)


================================================
FILE: sequoia/settings/rl/continual/setting_test.py
================================================
import dataclasses
from dataclasses import asdict, is_dataclass, replace
from functools import partial, singledispatch
from pathlib import Path
from typing import Any, Callable, Union, ClassVar, Dict, List, Optional, Sequence, Type
import typing

import gym
import matplotlib.pyplot as plt
import numpy as np
import pytest
from gym import spaces
from gym.vector.utils import batch_space

from sequoia.common.config import Config
from sequoia.common.spaces import TypedDictSpace
from sequoia.common.spaces.sparse import Sparse
from sequoia.conftest import (
    MUJOCO_INSTALLED,
    mujoco_required,
    param_requires_monsterkong,
    param_requires_mujoco,
)
from sequoia.settings.assumptions.incremental_test import DummyMethod as _DummyMethod
from sequoia.settings.base.setting_test import SettingTests
from sequoia.settings.rl.incremental.setting import IncrementalRLSetting
from sequoia.settings.rl.setting_test import DummyMethod
from sequoia.utils.utils import pairwise, take
from sequoia.settings.base import Setting
from .setting import ContinualRLSetting


@pytest.mark.parametrize(
    "dataset",
    [
        "CartPole-v8",
        "Breakout-v9",
        param_requires_mujoco("Ant-v0"),
        param_requires_monsterkong("MetaMonsterKong-v0"),
    ],
)
def test_passing_unsupported_dataset_raises_error(dataset: Any):
    with pytest.raises((gym.error.Error, NotImplementedError)):
        _ = ContinualRLSetting(dataset=dataset)


def test_acrobot_attributes_change_over_time():
    from sequoia.settings.rl.setting_test import CheckAttributesWrapper
    from sequoia.settings.rl.wrappers import MeasureRLPerformanceWrapper
    from sequoia.settings.rl.continual.environment import GymDataLoader
    from sequoia.common.gym_wrappers.env_dataset import EnvDataset
    from sequoia.settings.rl.wrappers import TypedObjectsWrapper
    from sequoia.common.gym_wrappers.action_limit import ActionLimit
    from sequoia.settings.rl.wrappers import HideTaskLabelsWrapper
    from sequoia.common.gym_wrappers.smooth_environment import SmoothTransitions

    task_schedule = {
        0: {
            "LINK_LENGTH_1": 1.0,
            "LINK_LENGTH_2": 1.0,
            "LINK_MASS_1": 1.0,
            "LINK_MASS_2": 1.0,
            "LINK_COM_POS_1": 0.5,
            "LINK_COM_POS_2": 0.5,
            "LINK_MOI": 1.0,
        },
        100: {
            "LINK_LENGTH_1": 1.077662352662672,
            "LINK_LENGTH_2": 1.0029158956681965,
            "LINK_MASS_1": 1.284506509206828,
            "LINK_MASS_2": 1.3452415995540132,
            "LINK_COM_POS_1": 0.3838164987591757,
            "LINK_COM_POS_2": 0.6022014573018389,
            "LINK_MOI": 0.866228909018773,
        },
        200: {
            "LINK_LENGTH_1": 0.9787461324812216,
            "LINK_LENGTH_2": 1.1761685623559348,
            "LINK_MASS_1": 1.0598898754474704,
            "LINK_MASS_2": 1.1760598598046939,
            "LINK_COM_POS_1": 0.4523967193123413,
            "LINK_COM_POS_2": 0.4100516516032442,
            "LINK_MOI": 1.010250702300972,
        },
    }
    from .objects import Observations

    attributes = list(task_schedule[0].keys())
    assert Observations is ContinualRLSetting.Observations
    max_steps = 200
    max_episode_steps = 10
    # List of w
    wrapper_fns = []
    from gym.envs.classic_control.acrobot import AcrobotEnv
    from gym.wrappers import TimeLimit

    base_env: AcrobotEnv = gym.make("Acrobot-v1")  # type: ignore
    base_env = AcrobotEnv()
    base_env = TimeLimit(base_env, max_episode_steps=max_episode_steps)
    env = wrap(
        base_env,
        lambda env: SmoothTransitions(
            env,
            task_schedule=task_schedule,
            add_task_id_to_obs=True,
            only_update_on_episode_end=False,
        ),
        HideTaskLabelsWrapper,
        lambda env: ActionLimit(env, max_steps=10_000),
        lambda env: TypedObjectsWrapper(
            env,
            observations_type=ContinualRLSetting.Observations,
            # observation_space=TypedDictSpace(x:Box([ -1.        -1.        -1.        -1.       -12.566371 -28.274334], [ 1.        1.        1.    ...one:Sparse(Box(False, True, (), bool), sparsity=1), dtype=<class 'sequoia.settings.rl.continual.objects.Observations'>)
            observation_space=TypedDictSpace(
                x=spaces.Box(
                    np.asfarray([-1.0, -1.0, -1.0, -1.0, -12.566371, -28.274334]),
                    np.asfarray([1.0, 1.0, 1.0, 1.0, 12.566371, 28.274334]),
                    (6,),
                    np.float32,
                ),
                task_labels=Sparse(spaces.Box(0.0, 1.0, (), np.float32), sparsity=1),
                done=Sparse(spaces.Box(False, True, (), bool), sparsity=1),
                dtype=Observations,
            ),
            action_space=spaces.Discrete(3),
            actions_type=ContinualRLSetting.Actions,
            rewards_type=ContinualRLSetting.Rewards,
            reward_space=spaces.Box(-np.inf, np.inf, (), np.float32),
        ),
        EnvDataset,
        GymDataLoader,
        MeasureRLPerformanceWrapper,
        lambda env: CheckAttributesWrapper(env, attributes=attributes),
    )

    import itertools

    env.seed(123)
    episodes = max_steps // max_episode_steps
    done = False
    total_steps = 0
    for episode in range(episodes):
        obs = env.reset()
        done = False

        step: int = 0
        for step in itertools.count():
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            total_steps += 1
            link_length_1 = env.LINK_LENGTH_1
            if done:
                break
        current_values = env.values[max(env.values)]
        # assert current_values == env.current_task  # NOTE: A bit too fine-grained. This is slightly different.
        print(
            f"End of episode {episode} at step {total_steps} (lasted {step} steps): \n\t{current_values}"
        )

    values_at_each_step = env.values
    for attribute in attributes:
        train_values: List[float] = [
            values_dict[attribute] for step, values_dict in values_at_each_step.items()
        ]
        # We store the values before and after each step, so it's fine if they are the same at that last
        # step.
        assert train_values[0] == train_values[1]
        assert len(train_values) == len(set(train_values)) + 1


from typing import TypeVar

E = TypeVar("E", bound=gym.Env)
W = TypeVar("W", bound=gym.Wrapper)


def wrap(
    env: E, *wrapper_fns: Union[Type[W], Callable[[Union[E, W]], W]]
) -> Union[E, W, Union[W, E]]:
    """Wraps the environment `env` with the provided wrapper types or wrapper functions.

    The wrapper functions are applied in order to `env`, meaning the first item is the innermost
    wrapper, and the last item in `wrapper_fns` is the outermost wrapper.

    Parameters
    ----------
    env : E
        [description]

    Returns
    -------
    Union[W, E]
        [description]
    """
    wrapped_env: Union[W, E] = env
    for wrapper_fn in wrapper_fns:
        wrapped_env = wrapper_fn(wrapped_env)
    if typing.TYPE_CHECKING:
        assert isinstance(wrapped_env, (E, W))
    return wrapped_env


def wrap_reversed(
    env: E, *wrapper_fns: Union[Type[W], Callable[[Union[E, W]], W]]
) -> Union[E, W, Union[W, E]]:
    return wrap(env, *reversed(wrapper_fns))


@singledispatch
def _equal(a: Any, b: Any) -> bool:
    """Utility function used to check if two thing are equal.

    NOTE: This is only really useful/necessary because `functools.partial` objects can be present
    as attributes on the setting, usually either in the task schedule (or in the
    [train/val/test]_envs for the IncrementalRLSetting subclasses).
    The `functools.partial` class doesn't support equality: two partial objects with the same funcs,
    args and kwargs are still not considered equal for some reason.

    This function has a special handler for `partial` objects, so that they are considered equal if
    and only if their funcs, args and keywords are the same.
    This makes it possible to easily check for equality between settings, which is used for example
    in the tests below.
    """
    if is_dataclass(a):
        return is_dataclass(b) and _equal(asdict(a), asdict(b))
    return a == b


@_equal.register
def _partials_equal(a: partial, b: partial) -> bool:
    # NOTE: Using the recursive call so we can compare nested partials.
    return (
        isinstance(b, partial)
        and _equal(a.func, b.func)
        and _equal(a.args, b.args)
        and _equal(a.keywords, b.keywords)
    )


# NOTE: Need to also register handlers for list and dict, since they might have partials as
# items.
@_equal.register(list)
def _lists_equal(a: List, b: List) -> bool:
    return len(a) == len(b) and all(_equal(v_a, v_b) for v_a, v_b in zip(a, b))


@_equal.register(dict)
def _dicts_equal(a: Dict, b: Dict) -> bool:
    if a.keys() != b.keys():
        return False

    for k in a:
        v_a, v_b = a[k], b[k]
        if not _equal(v_a, v_b):
            print(f"Values differ at key {k}: {v_a}, {v_b}")
            return False
    return True


def all_different_from_next(sequence: Sequence) -> bool:
    """Returns True if each value in the sequence is different from the next."""
    return not any(_equal(v, next_v) for v, next_v in pairwise(sequence))


class TestContinualRLSetting(SettingTests):
    Setting: ClassVar[Type[Setting]] = ContinualRLSetting
    dataset: pytest.fixture

    @pytest.fixture()
    def setting_kwargs(self, dataset: str, config: Config):
        """Fixture used to pass keyword arguments when creating a Setting."""
        return {"dataset": dataset, "config": config}

    def test_passing_supported_dataset(self, setting_kwargs: Dict):
        setting = self.Setting(**setting_kwargs)
        assert setting.train_task_schedule
        assert setting.val_task_schedule
        assert setting.test_task_schedule
        # Passing the dataset created a task schedule.
        assert all(setting.train_task_schedule.values()), "Should have non-empty tasks."
        assert all(setting.val_task_schedule.values()), "Should have non-empty tasks."
        assert all(setting.test_task_schedule.values()), "Should have non-empty tasks."

    @pytest.mark.parametrize("seed", [123, 456])
    def test_task_schedule_is_reproducible(self, dataset: str, seed: Optional[int]):
        setting_a = self.Setting(dataset=dataset, config=Config(seed=seed))
        setting_b = self.Setting(dataset=dataset, config=Config(seed=seed))
        assert setting_a.train_task_schedule == setting_b.train_task_schedule
        assert setting_a.val_task_schedule == setting_b.val_task_schedule
        assert setting_a.test_task_schedule == setting_b.test_task_schedule

    @pytest.mark.xfail(
        reason="Reworking/removing this mechanism, makes things a bit too complicated."
    )
    def test_using_deprecated_fields(self):
        # BUG: It's tough to get this to raise a warning, because it's happening
        # inside the constructor in the dataclasses.py file, so we have to mess with
        # descriptors etc, which isn't great.
        # with pytest.raises(DeprecationWarning):
        #     setting = self.Setting(nb_tasks=5, max_steps=123)
        setting = self.Setting(nb_tasks=5, max_steps=123)
        assert setting.train_max_steps == 123

        with pytest.warns(DeprecationWarning):
            setting.max_steps = 456
        assert setting.train_max_steps == 456

        with pytest.warns(DeprecationWarning):
            setting = self.Setting(nb_tasks=5, test_max_steps=123)
        assert setting.test_max_steps == 123

        with pytest.warns(DeprecationWarning):
            setting.test_steps = 456
        assert setting.test_max_steps == 456

    def test_tasks_are_different(self, setting_kwargs: Dict[str, Any], config: Config):
        """Check that the tasks different from the next."""
        config = setting_kwargs.pop("config", config)
        assert config.seed is not None
        setting = self.Setting(**setting_kwargs, config=config)

        # Check that each task is different from the next.
        assert all_different_from_next(setting.train_task_schedule.values())
        assert all_different_from_next(setting.val_task_schedule.values())
        assert all_different_from_next(setting.test_task_schedule.values())

    def test_settings_attributes_are_the_same_for_given_seed(
        self, setting_kwargs: Dict[str, Any], config: Config
    ):
        """Make sure that the settings' attributes are the same if passed the same seed."""
        # Make sure that there is a random seed set, otherwise use the one present in `config`.
        config: Config = setting_kwargs.pop("config", config)
        assert config.seed is not None
        setting_1 = self.Setting(**setting_kwargs, config=config)

        # Uses the same config and seed, and check that the attributes of the two settings are
        # identical.
        setting_2 = self.Setting(**setting_kwargs, config=config)

        # Check that the settings have the same attributes.
        assert _equal(dataclasses.asdict(setting_1), dataclasses.asdict(setting_2))

        # These next lines are redundant, but just to be clear:
        assert setting_1.train_task_schedule == setting_2.train_task_schedule
        assert setting_1.val_task_schedule == setting_2.val_task_schedule
        assert setting_1.test_task_schedule == setting_2.test_task_schedule

    def test_tasks_are_different_when_seed_is_different(
        self, setting_kwargs: Dict[str, Any], config: Config
    ):
        # Create another setting with a different seed, and check that at least the generated tasks
        # are different.
        config = setting_kwargs.pop("config", config)
        assert config.seed is not None
        setting_1 = self.Setting(**setting_kwargs, config=config)
        assert setting_1.train_task_schedule

        different_seed = config.seed + 123
        setting_3 = self.Setting(**setting_kwargs, config=replace(config, seed=different_seed))

        setting_1_dict = dataclasses.asdict(setting_1)
        setting_3_dict = dataclasses.asdict(setting_3)

        # Remove the seeds, which are obviously different, and then check that the dicts from the
        # two settings are still different.
        assert setting_1_dict["config"].pop("seed") == config.seed
        assert setting_3_dict["config"].pop("seed") == different_seed
        if "LPG-FTW" in setting_1.dataset:
            # NOTE: The rest of the setting's attributes might be identical (they currently are, but
            # this could change), so skipping these datasets seems like the right thing to do.
            pytest.skip("LPG-FTW datasets always create the same tasks, no matter the seed.")

        assert not _equal(setting_1_dict, setting_3_dict)

        # Additionally, explicitly check that either the train schedule or the train envs are
        # different, since the check above could have passed due to some other attribute being
        # different between the two settings.
        if isinstance(setting_1, IncrementalRLSetting) and setting_1.train_envs:
            assert isinstance(setting_3, IncrementalRLSetting)
            # Using custom envs for each task.
            assert not _equal(setting_1.train_envs, setting_3.train_envs)
            assert not _equal(setting_1.val_envs, setting_3.val_envs)
            assert not _equal(setting_1.test_envs, setting_3.test_envs)
        else:
            # Using a single env with a task schedule.
            assert not _equal(setting_1.train_task_schedule, setting_3.train_task_schedule)
            assert not _equal(setting_1.val_task_schedule, setting_3.val_task_schedule)
            assert not _equal(setting_1.test_task_schedule, setting_3.test_task_schedule)

    def test_env_attributes_change(self, setting_kwargs: Dict[str, Any], config: Config):
        """Check that the values of the given attributes do change at each step during
        training.
        """
        setting_kwargs.setdefault("nb_tasks", 2)
        setting_kwargs.setdefault("train_max_steps", 1000)
        setting_kwargs.setdefault("max_episode_steps", 50)
        setting_kwargs.setdefault("test_max_steps", 1000)
        setting = self.Setting(**setting_kwargs)

        assert setting.train_task_schedule

        # NOTE: Have to check for `setting.train_envs` because in that case the task schedule won't
        # be used.
        from sequoia.settings.rl.incremental.setting import IncrementalRLSetting

        if isinstance(setting, IncrementalRLSetting) and setting._using_custom_envs_foreach_task:
            # It would be pretty hard to check for the "task values" in this case, because the
            # custom envs for each task might not be just the same env type but with different
            # attributes!
            pytest.skip("Using custom envs for each task instead of a task schedule.")

        assert all(setting.train_task_schedule.values())
        assert setting.nb_tasks == setting_kwargs["nb_tasks"]
        assert setting.train_steps_per_task == setting_kwargs["train_max_steps"] // setting.nb_tasks
        assert setting.train_max_steps == setting_kwargs["train_max_steps"]

        attributes = set().union(*[task.keys() for task in setting.train_task_schedule.values()])

        method = DummyMethod()

        results = setting.apply(method, config=config)

        assert results
        self.validate_results(setting, method, results)
        # TODO: Need to limit the episodes per step in MonsterKong.
        # In MonsterKong, we might have 0 reward, since this might not even
        # constitute a full episode.
        # assert results.objective

        for attribute in attributes:
            train_values: List[float] = [
                values[attribute]
                for values_dict in method.all_train_values
                for step, values in values_dict.items()
            ]
            assert train_values
            task_schedule_values: List[float] = {
                step: task[attribute] for step, task in setting.train_task_schedule.items()
            }
            self.validate_env_value_changes(
                setting=setting,
                attribute=attribute,
                task_schedule_for_attr=task_schedule_values,
                train_values=train_values,
            )

    @staticmethod
    def validate_env_value_changes(
        setting: ContinualRLSetting,
        attribute: str,
        task_schedule_for_attr: Dict[str, float],
        train_values: List[float],
    ):
        """Given an attribute name, and the values of that attribute in the
        task schedule, check that the actual values for that attribute
        encountered during training make sense, based on the type of
        non-stationarity present in this Setting.
        """
        assert len(set(task_schedule_for_attr.values())) == setting.nb_tasks + 1, (
            f"Task schedule should have had {setting.nb_tasks + 1} distinct values for "
            f"attribute {attribute}: {task_schedule_for_attr}"
        )

        if setting.smooth_task_boundaries:
            # Should have one (unique) value for the attribute at each step during training
            # This is the truth condition for the ContinualRLSetting.
            # NOTE: There's an offset by 1 here because of when the env is closed.
            # NOTE: This test won't really work with integer values, but that doesn't matter
            # right now because we don't/won't support changing the values of integer
            # parameters in this "continuous" task setting.
            assert len(set(train_values)) == setting.train_max_steps, (
                f"Should have encountered {setting.train_max_steps} distinct values "
                f"for attribute {attribute}: during training!"
            )
        else:
            from ..discrete.setting import DiscreteTaskAgnosticRLSetting

            setting: DiscreteTaskAgnosticRLSetting
            train_tasks = setting.nb_tasks
            unique_attribute_values = set(train_values)

            assert setting.train_task_schedule.keys() == task_schedule_for_attr.keys()
            for k, v in task_schedule_for_attr.items():
                task_dict = setting.train_task_schedule[k]
                assert attribute in task_dict
                assert task_dict[attribute] == v

            assert len(unique_attribute_values) == train_tasks, (
                type(setting),
                attribute,
                unique_attribute_values,
                task_schedule_for_attr,
                setting.nb_tasks,
            )

    def validate_results(
        self,
        setting: ContinualRLSetting,
        method: DummyMethod,
        results: ContinualRLSetting.Results,
    ) -> None:
        assert results
        assert results.objective
        assert method.n_task_switches == 0
        assert method.n_fit_calls == 1
        assert not method.received_task_ids
        assert not method.received_while_training

    @pytest.mark.parametrize(
        "batch_size",
        [None, 1, 3],
    )
    @pytest.mark.timeout(60)
    def test_check_iterate_and_step(
        self,
        setting_kwargs: Dict[str, Any],
        batch_size: Optional[int],
    ):
        """Test that the observations are of the right type and shape, regardless
        of wether we iterate on the env by calling 'step' or by using it as a
        DataLoader.
        """
        setting_kwargs.setdefault("num_workers", 0)

        dataset: str = setting_kwargs["dataset"]
        from gym.envs.registration import registry

        if dataset in registry.env_specs:
            with gym.make(dataset) as temp_env:
                expected_x_space = temp_env.observation_space
                expected_action_space = temp_env.action_space
        else:
            # NOTE: Not ideal: Have to create a setting just to get the observation space
            temp_setting = self.Setting(**setting_kwargs)
            # NOTE: Using the test dataloader so the task labels space is a Sparse(Discrete(n)) in
            # the worst case, and so all observations (None or integers) are valid samples.
            with temp_setting.test_dataloader() as temp_env:
                # e = temp_env
                # while e.unwrapped is not e:
                #     print(f"Wrapper of type {type(e)} has obs space of {e.observation_space}")
                #     e = e.env
                # print(f"Unwrapped obs space is {e.observation_space}")
                # assert False, temp_env
                expected_x_space = temp_env.observation_space.x
                expected_action_space = temp_env.action_space
            del temp_setting

        setting = self.Setting(**setting_kwargs)

        if batch_size is not None:
            expected_batched_x_space = batch_space(expected_x_space, batch_size)
            expected_batched_action_space = batch_space(setting.action_space, batch_size)
        else:
            expected_batched_x_space = expected_x_space
            expected_batched_action_space = expected_action_space

        assert setting.observation_space.x == expected_x_space
        assert setting.action_space == expected_action_space

        # TODO: This is changing:
        assert setting.train_transforms == []
        # assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]

        def check_env_spaces(env: gym.Env) -> None:
            if env.batch_size is not None:
                # TODO: This might not be totally accurate, for example because the
                # TransformObservation wrapper applied to a VectorEnv doesn't change the
                # single_observation_space, AFAIR.
                assert env.single_observation_space.x == expected_x_space
                assert env.single_action_space == expected_action_space
                assert isinstance(env.observation_space, TypedDictSpace), (
                    env,
                    env.observation_space,
                )
                assert env.observation_space.x == expected_batched_x_space
                assert env.action_space == expected_batched_action_space
            else:
                assert env.observation_space.x == expected_x_space
                assert env.action_space == expected_action_space

        # FIXME: Move this to an instance method on the test class so that subclasses
        # can change stuff in it.
        def check_obs(obs: ContinualRLSetting.Observations) -> None:
            if isinstance(self.Setting, partial):
                # NOTE: This Happens when we sneakily switch out the self.Setting
                # attribute in other tests (for the SettingProxy for example).
                assert isinstance(obs, self.Setting.args[0].Observations)
            else:
                assert isinstance(obs, self.Setting.Observations)
            assert obs.x in expected_batched_x_space
            # In this particular case here, the task labels should be None.
            # FIXME: For InrementalRL, this isn't correct! TestIncrementalRL should
            # therefore have its own version of this function.
            if self.Setting is ContinualRLSetting:
                assert obs.task_labels is None or all(
                    task_label == None for task_label in obs.task_labels
                )

        with setting.train_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size == batch_size
            check_env_spaces(env)

            # BUG: The dataset's observation space has task_labels as a Discrete, but the task
            # labels are None.
            setting: ContinualRLSetting
            if setting.task_labels_at_train_time:
                if batch_size is not None:
                    assert isinstance(env.observation_space.task_labels, spaces.MultiDiscrete)
                else:
                    assert isinstance(env.observation_space.task_labels, spaces.Discrete)
            elif setting.known_task_boundaries_at_train_time:
                assert isinstance(env.observation_space.task_labels, Sparse)

            obs = env.reset()
            # BUG: TODO: The observation space that we use should actually check with
            # isinstance and over the fields that fit in the space. Here there is a bug
            # because the env observations also have a `done` field, while the space
            # doesnt.
            # assert obs in env.observation_space
            assert obs.x in env.observation_space.x  # this works though.

            # BUG: This doesn't currently work: (would need a tuple value rather than an
            # array.
            # assert obs.task_labels in env.observation_space.task_labels
            assert obs.task_labels in env.observation_space.task_labels
            if batch_size:
                assert obs.x[0] in setting.observation_space.x
                assert (
                    obs.task_labels is None
                    or obs.task_labels[0] in setting.observation_space.task_labels
                )
            else:
                assert obs in setting.observation_space

            reset_obs = env.reset()
            check_obs(reset_obs)

            # BUG: Environment is closed? (batch_size = 3, dataset = 'CartPole-v0')
            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())

        with setting.val_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size == batch_size
            check_env_spaces(env)

            reset_obs = env.reset()
            check_obs(reset_obs)

            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())

        # NOTE: Limitting the batch size at test time to None (i.e. a single env)
        # because of how the Monitor class works atm.
        batch_size = None
        expected_batched_x_space = expected_x_space
        expected_batched_action_space = expected_action_space

        # NOTE: Need to make sure that the 'directory' passed to the Monitor
        # wrapper is a temp dir. Should be the case, but just checking.
        assert setting.config.log_dir != Path("results")

        with setting.test_dataloader(batch_size=batch_size, num_workers=0) as env:
            assert env.batch_size is None
            check_env_spaces(env)

            reset_obs = env.reset()
            check_obs(reset_obs)

            step_obs, *_ = env.step(env.action_space.sample())
            check_obs(step_obs)

            # NOTE: Can't do this here, unless the episode is over, because the Monitor
            # doesn't want us to end an episode early!
            # for iter_obs in take(env, 3):
            #     check_obs(iter_obs)
            #     _ = env.send(env.action_space.sample())

        with setting.test_dataloader(batch_size=batch_size) as env:
            assert not env.is_closed()
            # NOTE: Can't do this here, unless the episode is over, because the Monitor
            # doesn't want us to end an episode early!
            for iter_obs in take(env, 3):
                check_obs(iter_obs)
                _ = env.send(env.action_space.sample())

    @pytest.mark.no_xvfb
    @pytest.mark.timeout(20)
    @pytest.mark.skipif(
        (not Path("temp").exists()),
        reason="Need temp dir for saving the figure this test creates.",
    )
    @mujoco_required
    def test_show_distributions(self, config: Config):
        setting = self.Setting(
            dataset="half_cheetah",
            max_steps=1_000,
            max_episode_steps=100,
            config=config,
        )

        fig, axes = plt.subplots(2, 3)
        name_to_env_fn = {
            "train": setting.train_dataloader,
            "valid": setting.val_dataloader,
            "test": setting.test_dataloader,
        }
        for i, (name, env_fn) in enumerate(name_to_env_fn.items()):
            env = env_fn(batch_size=None, num_workers=None)

            gravities: List[float] = []
            task_labels: List[Optional[int]] = []
            total_steps = 0
            while not env.is_closed():
                obs = env.reset()
                done = False
                steps_in_episode = 0

                while not done:
                    t = obs.task_labels
                    obs, reward, done, info = env.step(env.action_space.sample())
                    total_steps += 1
                    steps_in_episode += 1
                    y = reward.y

                    gravities.append(env.gravity)
                    print(total_steps, env.gravity)
                    if total_steps > 100:
                        assert env.gravity != -9.81

                    task_labels.append(t)

            x = np.arange(len(gravities))
            axes[0, i].plot(x, gravities, label="gravities")
            axes[0, i].legend()
            axes[0, i].set_title(f"{name} gravities")
            axes[0, i].set_xlabel("Step index")
            axes[0, i].set_ylabel("Value")

            # for task_id in task_ids:
            #     y = [t_counter.get(task_id) for t_counter in t_counters]
            #     axes[1, i].plot(x, y, label=f"task_id={task_id}")
            # axes[1, i].legend()
            # axes[1, i].set_title(f"{name} task_id")
            # axes[1, i].set_xlabel("Batch index")
            # axes[1, i].set_ylabel("Count in batch")

        plt.legend()

        Path("temp").mkdir(exist_ok=True)
        fig.set_size_inches((6, 4), forward=False)
        plt.savefig(f"temp/{self.Setting.__name__}.png")
        # plt.waitforbuttonpress(10)
        # plt.show()


# @pytest.mark.xfail(reason="TODO: pl_bolts DQN only accepts string environment names..")
# def test_dqn_on_env(tmp_path: Path):
#     """ TODO: Would be nice if we could have the models work directly on the
#     gym envs..
#     """
#     from pl_bolts.models.rl import DQN
#     from pytorch_lightning import Trainer

#     setting = ContinualRLSetting()
#     env = setting.train_dataloader(batch_size=None)
#     model = DQN(env)
#     trainer = Trainer(fast_dev_run=True, default_root_dir=tmp_path)
#     success = trainer.fit(model)
#     assert success == 1


def test_passing_task_schedule_sets_other_attributes_correctly():
    # TODO: Figure out a way to test that the tasks are switching over time.
    setting = ContinualRLSetting(
        dataset="CartPole-v0",
        train_task_schedule={
            0: {"gravity": 5.0},
            100: {"gravity": 10.0},
            200: {"gravity": 20.0},
        },
        test_max_steps=10_000,
    )
    assert setting.phases == 1
    assert setting.nb_tasks == 2
    # assert setting.steps_per_task == 100
    assert setting.test_task_schedule == {
        0: {"gravity": 5.0},
        5_000: {"gravity": 10.0},
        10_000: {"gravity": 20.0},
    }
    assert setting.test_max_steps == 10_000
    # assert setting.test_steps_per_task == 5_000

    setting = ContinualRLSetting(
        dataset="CartPole-v0",
        train_task_schedule={
            0: {"gravity": 5.0},
            100: {"gravity": 10.0},
            200: {"gravity": 20.0},
        },
        test_max_steps=2000,
        # test_steps_per_task=100,
    )
    assert setting.phases == 1
    # assert setting.nb_tasks == 2
    # assert setting.steps_per_task == 100
    assert setting.test_task_schedule == {
        0: {"gravity": 5.0},
        1000: {"gravity": 10.0},
        2000: {"gravity": 20.0},
    }
    assert setting.test_max_steps == 2000
    # assert setting.test_steps_per_task == 100


def test_fit_and_on_task_switch_calls():
    setting = ContinualRLSetting(
        dataset="CartPole-v0",
        # nb_tasks=5,
        # train_steps_per_task=100,
        train_max_steps=500,
        test_max_steps=500,
        # test_steps_per_task=100,
        train_transforms=[],
        test_transforms=[],
        val_transforms=[],
    )
    method = _DummyMethod()
    _ = setting.apply(method)
    # == 30 task switches in total.


if MUJOCO_INSTALLED:
    from sequoia.settings.rl.envs.mujoco import (
        ContinualHalfCheetahEnv,
        ContinualHalfCheetahV2Env,
        ContinualHalfCheetahV3Env,
        ContinualHopperEnv,
        ContinualHopperV2Env,
        ContinualHopperV3Env,
        ContinualWalker2dV2Env,
        ContinualWalker2dV3Env,
    )

    @mujoco_required
    @pytest.mark.parametrize(
        "dataset, expected_env_type",
        [
            ("half_cheetah", ContinualHalfCheetahEnv),
            ("halfcheetah", ContinualHalfCheetahEnv),
            ("HalfCheetah-v2", ContinualHalfCheetahV2Env),
            ("HalfCheetah-v3", ContinualHalfCheetahV3Env),
            ("ContinualHalfCheetah-v2", ContinualHalfCheetahV2Env),
            ("ContinualHalfCheetah-v3", ContinualHalfCheetahV3Env),
            ("ContinualHopper-v2", ContinualHopperEnv),
            ("hopper", ContinualHopperEnv),
            ("Hopper-v2", ContinualHopperV2Env),
            ("Hopper-v3", ContinualHopperV3Env),
            ("walker2d", ContinualWalker2dV3Env),
            ("Walker2d-v2", ContinualWalker2dV2Env),
            ("Walker2d-v3", ContinualWalker2dV3Env),
            ("ContinualWalker2d-v2", ContinualWalker2dV2Env),
            ("ContinualWalker2d-v3", ContinualWalker2dV3Env),
        ],
    )
    def test_mujoco_env_name_maps_to_continual_variant(
        dataset: str, expected_env_type: Type[gym.Env]
    ):
        setting = ContinualRLSetting(dataset=dataset, train_max_steps=10_000, test_max_steps=10_000)
        train_env = setting.train_dataloader()
        assert isinstance(train_env.unwrapped, expected_env_type)


================================================
FILE: sequoia/settings/rl/continual/tasks.py
================================================
""" Handlers for creating tasks in different environments.

TODO: Add more envs:
- [ ] PyBullet!
- [ ] Box2d!
- [ ] ProcGen!
- [ ] dm_control!

from gym.envs.box2d import BipedalWalker, BipedalWalkerHardcore
"""
import difflib
import inspect
import warnings
from functools import partial, singledispatch
from typing import Any, Callable, Dict, List, Type, TypeVar, Union

import gym
import numpy as np
from gym.envs.classic_control import (
    AcrobotEnv,
    CartPoleEnv,
    Continuous_MountainCarEnv,
    MountainCarEnv,
    PendulumEnv,
)
from gym.envs.registration import EnvRegistry, EnvSpec, load, registry

from sequoia.common.gym_wrappers.multi_task_environment import make_env_attributes_task
from sequoia.settings.rl.envs import MUJOCO_INSTALLED, sequoia_registry
from sequoia.utils.utils import camel_case

# Idea: Create a true 'Task' class?
Task = Any
ContinuousTask = Dict[str, float]
TaskType = TypeVar("TaskType", bound=ContinuousTask)
# TODO: Create a fancier class for the TaskSchedule, as described in the test file.
# IDEA: Have the Task Schedule be a 'list' of Task objects, each of which has a
# 'duration' parameter, which are accumulated to create the 'keys' of the task schedule!
# TaskSchedule = Dict[int, TaskType]


class TaskSchedule(Dict[int, TaskType]):
    pass


class EnvironmentNotSupportedError(gym.error.UnregisteredEnv):
    """Error raised when we don't know how to create a task for the given environment."""


def names_match(name_a: str, name_b: str) -> bool:
    a_variants = (name_a, name_a.lower(), camel_case(name_a))
    b_variants = (name_b, name_b.lower(), camel_case(name_b))
    # TODO: Not sure about this 'endswith' stuff, e.g. with MountainCarContinuous vs MountainCar?
    return (
        name_a in b_variants or name_b in a_variants
    )  # or name_a.endswith(b_variants) or name_b.endswith(a_variants)


def _is_supported(
    env_id: str,
    _make_task_function: Callable[..., ContinuousTask],
    env_registry: EnvRegistry = registry,
) -> bool:
    """Returns wether Sequoia is able to create (continuous) tasks for the given
    environment.

    WIP: It is better not to use this directly, and instead use the equivalent
    `is_supported` function which is created dynamically below.
    """

    def _has_handler(some_env_type: Type[gym.Env]) -> bool:
        """Returns wether the "make task" function has a registered handler for the
        given envs.
        """
        return some_env_type in _make_task_function.registry or (
            not inspect.isfunction(some_env_type)
            and _make_task_function.dispatch(some_env_type)
            is not _make_task_function.dispatch(object)
        )

    if isinstance(env_id, str):
        env_spec = env_registry.spec(env_id)

    elif isinstance(env_id, EnvSpec):
        env_spec = env_id
        env_id = env_spec.id

    elif inspect.isclass(env_id) and issubclass(env_id, gym.Env):
        env_type = env_id
        env_spec = None
        if _has_handler(env_type):
            return True
        env_id = env_type.__name__
        class_name = env_type.__name__
    else:
        raise NotImplementedError(env_id, type(env_id))

    assert isinstance(env_id, str)
    if env_spec:
        assert isinstance(env_spec, EnvSpec)

        if callable(env_spec.entry_point):
            if _has_handler(env_spec.entry_point):
                return True
            class_name = env_spec.entry_point.__name__
        else:
            assert isinstance(env_spec.entry_point, str)
            _module, _, class_name = env_spec.entry_point.partition(":")

    registered_class_names = tuple(c.__name__ for c in _make_task_function.registry)

    if class_name in registered_class_names:
        return True
    elif class_name.startswith(registered_class_names):
        return True

    close_matches = difflib.get_close_matches(class_name, registered_class_names)
    if not close_matches:
        return False
    return False


def task_sampling_function(
    env_registry: EnvRegistry = registry, based_on: Callable[[gym.Env], TaskType] = None
) -> Callable[[gym.Env], TaskType]:
    """Decorator for a "make_task" function (e.g. `make_continuous_task`,
    `make_discrete_task`, etc.) that does the following:

    1. Creates a singledispatch callable from the given function, if necessary;
    2. Registers three useful handlers, for strings, environment types, and wrappers to
    the new function.
    3. Adds a 'is_supported' function on that function (see NOTE below);
    4. Adds all the registered handlers from the `based_on` function, if passed;

    NOTE (@lebrice): not sure about this is_supported being created and set on the
    function itself. It would probably be cleaner to create a class like TaskCreator or
    something that has the same methods as the underlying singledispatch callable.

    NOTE: A task sampling function should give back the same task when given the same
    seed, step and change_steps.
    """

    def _wrapper(make_task_fn: Callable[[gym.Env], TaskType]) -> Callable[[gym.Env], TaskType]:

        if not hasattr(make_task_fn, "registry"):
            make_task_fn = singledispatch(make_task_fn)

        @make_task_fn.register(type)
        def make_discrete_task_from_type(env_type: Type[gym.Env], **kwargs) -> ContinuousTask:
            try:
                # Try to create a task without actually instantiating the env, by passing the
                # type of env as the 'env' argument, rather than an env instance.
                env_handler_function = make_task_fn.dispatch(env_type)
                return env_handler_function(env_type, **kwargs)
            except Exception as exc:
                raise RuntimeError(
                    f"Unable to create a task based only on the env type {env_type}: {exc}\n"
                ) from exc

        @make_task_fn.register(str)
        def make_discrete_task_by_id(
            env: str,
            **kwargs,
        ) -> Union[Dict[str, Any], Any]:
            # Load the entry-point class, and use it to determine what handler to use.
            # TODO: Actually instantiate the env here? or just dispatch based on the env class?
            if env not in env_registry.env_specs:
                raise RuntimeError(
                    f"Can't create a task for env id {env}, since it isn't a registered env id."
                )
            env_spec: EnvSpec = env_registry.env_specs[env]
            env_entry_point: Callable[..., gym.Env] = load(env_spec.entry_point)
            # import inspect

            try:
                task: ContinuousTask = make_discrete_task_from_type(env_entry_point, **kwargs)
                return task

            except RuntimeError as exc:
                warnings.warn(
                    RuntimeWarning(
                        f"A temporary environment will have to be created in order to make a task: {exc}"
                    )
                )

            with gym.make(env) as temp_env:
                # IDEA: Could avoid re-creating the env between calls to this function, for
                # instance by saving a single temp env in a global variable and overwriting
                # it if `env` is of a different type.
                return make_task_fn(temp_env, **kwargs)

        @make_task_fn.register
        def make_discrete_for_wrapped_env(
            env: gym.Wrapper,
            step: int,
            change_steps: List[int] = None,
            **kwargs,
        ) -> Union[Dict[str, Any], Any]:
            # NOTE: Not sure if this is totally a good idea...
            # If someone registers a handler for some kind of Wrapper, than all envs wrapped
            # with that wrapper will use that handler, instead of their base environment type.
            return make_task_fn(env.env, step=step, change_steps=change_steps, **kwargs)

        if based_on is not None:
            for registered_type, registered_handler in based_on.registry.items():
                # NOTE: Skipping these types since we register new handlers above. Not
                # sure if it's necessary, since it might just overwrite an old handler
                # to register a new one for the same type?
                if registered_type not in [object, str, type, gym.Wrapper]:
                    make_task_fn.register(registered_type, registered_handler)

        make_task_fn.is_supported = partial(_is_supported, _make_task_fn=make_task_fn)

        return make_task_fn

    return _wrapper


@singledispatch
def make_continuous_task(
    env: gym.Env,
    step: int,
    change_steps: List[int],
    seed: int = None,
    **kwargs,
) -> ContinuousTask:
    """Generic function used by Sequoia's RL settings to create a "task" that will be
    applied to an environment like `env`.

    To add support for a new type of environment, simply register a handler function:

    ```
    @make_continuous_task.register(SomeGymEnvClass)
    def make_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,):
        return {"my_attribute": random.random()}
    ```

    NOTE: In order to create tasks for an environment through its string 'id', and to
    avoid having to actually instantiate an environment, `env` could perhaps be a type
    of environment rather than an actual environment instance. If your function can't
    handle this (raises an exception somehow), then a temporary environment will be
    created, and a warning will be raised.

    TODO: remove / rename this 'change_steps' to 'max_steps' instead.
    """
    raise NotImplementedError(f"Don't currently know how to create tasks for env {env}")


make_continuous_task = task_sampling_function(env_registry=sequoia_registry)(make_continuous_task)
is_supported = partial(_is_supported, _make_task_function=make_continuous_task)

# from functools import _SingleDispatchCallable

# Dictionary mapping from environment type to a dict of environment values which can be
# modified with multiplicative gaussian noise.
_ENV_TASK_ATTRIBUTES: Dict[Union[Type[gym.Env]], Dict[str, float]] = {
    CartPoleEnv: {
        "gravity": 9.8,
        "masscart": 1.0,
        "masspole": 0.1,
        "length": 0.5,
        "force_mag": 10.0,
        "tau": 0.02,
    },
    PendulumEnv: {
        "max_speed": 8.0,
        "max_torque": 2.0,
        # "dt" = .05
        "g": 10.0,
        "m": 1.0,
        "l": 1.0,
    },
    MountainCarEnv: {
        "gravity": 0.0025,
        "goal_position": 0.45,  # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
        # BUG: Since we use multiplicative noise, this won't change over time.
        # "goal_velocity": 0,
    },
    Continuous_MountainCarEnv: {
        "goal_position": 0.45,  # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
        # BUG: Since we use multiplicative noise, this won't change over time.
        # "goal_velocity": 0,
    },
    # TODO: Test AcrobotEnv
    AcrobotEnv: {
        "LINK_LENGTH_1": 1.0,  # [m]
        "LINK_LENGTH_2": 1.0,  # [m]
        "LINK_MASS_1": 1.0,  #: [kg] mass of link 1
        "LINK_MASS_2": 1.0,  #: [kg] mass of link 2
        "LINK_COM_POS_1": 0.5,  #: [m] position of the center of mass of link 1
        "LINK_COM_POS_2": 0.5,  #: [m] position of the center of mass of link 2
        "LINK_MOI": 1.0,  #: moments of inertia for both links
    },
    # TODO: Add more of the classic control envs here.
    # TODO: Need to get the attributes to modify in each environment type and
    # add them here.
    # AtariEnv: [
    #     # TODO: Maybe have something like the difficulty as the CL 'task' ?
    #     # difficulties = temp_env.ale.getAvailableDifficulties()
    #     # "game_difficulty",
    # ],
}


@make_continuous_task.register(CartPoleEnv)
@make_continuous_task.register(PendulumEnv)
@make_continuous_task.register(MountainCarEnv)
@make_continuous_task.register(Continuous_MountainCarEnv)
@make_continuous_task.register(AcrobotEnv)
def make_task_for_classic_control_env(
    env: gym.Env,
    step: int,
    change_steps: List[int] = None,
    task_params: Union[List[str], Dict[str, Any]] = None,
    seed: int = None,
    noise_std: float = 0.2,
):
    # NOTE: `step` doesn't matter here, all tasks are independant.
    task_params = task_params or _ENV_TASK_ATTRIBUTES[type(env.unwrapped)]
    if step == 0:
        # Use the 'default' task as the first task.
        return task_params.copy()

    # Make this more reproducible: When given the same seed and same step, return the
    # same task.
    if seed is not None:
        rng = np.random.default_rng(seed + step)
    else:
        rng = None
    # Default back to the 'env attributes' task, which multiplies the default values
    # with normally distributed scaling coefficients.
    # TODO: Need to refactor the whole MultiTaskEnv/SmoothTransition wrappers / tasks
    # etc.
    return make_env_attributes_task(
        env,
        task_params=task_params,
        rng=rng,
        noise_std=noise_std,
    )


# IDEA: Could probably not have these big ugly IF statements since we have the stubs for
# the different mujoco env classes anyway.

if MUJOCO_INSTALLED:
    from sequoia.settings.rl.envs.mujoco import (
        ContinualHalfCheetahV2Env,
        ContinualHalfCheetahV3Env,
        ContinualHopperV2Env,
        ContinualHopperV3Env,
        ContinualWalker2dV2Env,
        ContinualWalker2dV3Env,
        ModifiedGravityEnv,
    )

    default_mujoco_gravity = -9.81

    @make_continuous_task.register(ContinualHopperV2Env)
    @make_continuous_task.register(ContinualHopperV3Env)
    @make_continuous_task.register(ContinualWalker2dV2Env)
    @make_continuous_task.register(ContinualWalker2dV3Env)
    @make_continuous_task.register(ContinualHalfCheetahV2Env)
    @make_continuous_task.register(ContinualHalfCheetahV3Env)
    def make_task_for_modified_gravity_env(
        env: ModifiedGravityEnv,
        step: int,
        change_steps: List[int],
        seed: int = None,
        **kwargs,
    ) -> Union[Dict[str, Any], Any]:
        step_seed = seed * step if seed is not None else None
        # NOTE: np.random.default_rng(None) will NOT give the same result every first
        # time it is called, so this won't cause any issues with the same gravity being
        # sampled for all tasks if `seed` is None.
        rng = np.random.default_rng(step_seed)
        if step == 0:
            coefficient = 1
        else:
            coefficient = rng.uniform() + 0.5
        # TODO: Do we want to start with normal gravity?
        gravity = coefficient * default_mujoco_gravity
        return {"gravity": gravity}


================================================
FILE: sequoia/settings/rl/continual/tasks_test.py
================================================
from typing import Type

import pytest

from sequoia.conftest import mujoco_required
from sequoia.settings.rl.envs import (
    ContinualHalfCheetahEnv,
    ContinualHalfCheetahV2Env,
    ContinualHalfCheetahV3Env,
    ContinualHopperEnv,
    ContinualWalker2dEnv,
    MujocoEnv,
)

from .tasks import is_supported, make_continuous_task


@mujoco_required
@pytest.mark.parametrize(
    "env_type",
    [
        ContinualHalfCheetahV2Env,
        ContinualHalfCheetahV3Env,
        ContinualHopperEnv,
        ContinualWalker2dEnv,
        ContinualHalfCheetahEnv,
    ],
)
def test_mujoco_tasks(env_type: Type[MujocoEnv]):
    assert is_supported("HalfCheetah-v2")

    from gym.envs.mujoco import HalfCheetahEnv

    # We shouldn't mark the *original* envs as supported, rather, we should only mark
    # our variants as supported.
    assert not is_supported(HalfCheetahEnv)

    assert is_supported(env_type)

    task = make_continuous_task(env_type, step=0, change_steps=[0, 100, 200])
    assert task == {"gravity": -9.81}

    task_a = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=123)
    task_b = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=123)
    task_c = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=456)
    # NOTE: Not sure that this will always give exactly the same result, since idk how
    # seeding is dependant on the machine running the code.
    # assert task == {'gravity': -10.134188877055529}
    assert task_a == task_b
    assert task_a != task_c


================================================
FILE: sequoia/settings/rl/continual/test_environment.py
================================================
import itertools
import math
from typing import Dict

from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.continual import ContinualResults, TestEnvironment

# TODO: Refactor those so they are based on the MeasureRLPerformanceWrapper, which works
# with vectorized envs.


class ContinualRLTestEnvironment(TestEnvironment):
    def __init__(self, *args, task_schedule: Dict, **kwargs):
        super().__init__(*args, **kwargs)
        self.task_schedule = task_schedule
        self.boundary_steps = [step // (self.batch_size or 1) for step in self.task_schedule.keys()]

    def __len__(self):
        return math.ceil(self.step_limit / (getattr(self.env, "batch_size", 1) or 1))

    def get_results(self) -> ContinualResults[EpisodeMetrics]:
        # TODO: Place the metrics in the right 'bin' at the end of each episode during
        # testing depending on the task at that time, rather than what's happening here,
        # where we're getting all the rewards and episode lengths at the end and then
        # sort it out into the bins based on the task schedule. ALSO: this would make it
        # easier to support monitoring batched RL environments, since these `Monitor`
        # methods (get_episode_rewards, get_episode_lengths, etc) assume the environment
        # isn't batched.
        rewards = self.get_episode_rewards()
        lengths = self.get_episode_lengths()

        task_schedule: Dict[int, Dict] = self.task_schedule
        task_steps = sorted(task_schedule.keys())
        assert 0 in task_steps

        test_results = ContinualResults()
        for step, episode_reward, episode_length in zip(
            itertools.accumulate(lengths), rewards, lengths
        ):
            # Given the step, find the task id.
            episode_metric = EpisodeMetrics(
                n_samples=1,
                mean_episode_reward=episode_reward,
                mean_episode_length=episode_length,
            )
            test_results.metrics.append(episode_metric)
        return test_results

    def render(self, mode="human", **kwargs):
        # TODO: This might not be setup right. Need to check.
        image_batch = super().render(mode=mode, **kwargs)
        if mode == "rgb_array" and self.batch_size:
            return tile_images(image_batch)
        return image_batch

    def _after_reset(self, observation):
        # Is this going to work fine when the observations are batched though?
        return super()._after_reset(observation)


================================================
FILE: sequoia/settings/rl/discrete/__init__.py
================================================
from .setting import DiscreteTaskAgnosticRLSetting
from .tasks import make_discrete_task


================================================
FILE: sequoia/settings/rl/discrete/multienv_wrappers.py
================================================
""" Wrappers that around multiple environments.

These wrappers can be used to get different kinds of multi-task environments, or even to
concatenate environments.
"""
from abc import ABC, abstractmethod
from typing import Any, Callable, List, Optional, Sequence, Union

import gym
import numpy as np
from gym import spaces

from sequoia.common.gym_wrappers import IterableWrapper
from sequoia.common.gym_wrappers.multi_task_environment import add_task_labels
from sequoia.common.gym_wrappers.utils import MayCloseEarly
from sequoia.utils.generic_functions import concatenate
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


def instantiate_env(env: Union[str, gym.Env, Callable[[], gym.Env]]) -> gym.Env:
    if isinstance(env, gym.Env):
        return env
    if isinstance(env, str):
        return gym.make(env)
    assert callable(env)
    return env()


class MultiEnvWrapper(IterableWrapper, ABC):
    """TODO: Wrapper like that iterates over the envs.

    Could look a little bit like this:
    https://github.com/rlworkgroup/garage/blob/master/src/garage/envs/multi_env_wrapper.py
    """

    def __init__(self, envs: List[gym.Env], add_task_ids: bool = False):
        self._envs = envs.copy()
        self._current_task_id = 0
        self.nb_tasks = len(envs)
        self._envs_is_closed: Sequence[bool] = np.zeros([self.nb_tasks], dtype=bool)
        self._add_task_labels = add_task_ids
        self.rng: np.random.Generator = np.random.default_rng()

        self._instantiate_env(self._current_task_id)
        super().__init__(env=self._envs[self._current_task_id])
        self.task_label_space = spaces.Discrete(self.nb_tasks)
        if self._add_task_labels:
            self.observation_space = add_task_labels(
                self.env.observation_space, self.task_label_space
            )

    def _instantiate_env(self, index: int) -> None:
        self._envs[index] = instantiate_env(self._envs[index])

    def set_task(self, task_id: int) -> None:
        if self.is_closed(env_index=None):
            raise gym.error.ClosedEnvironmentError(
                f"Can't call set_task on the env, since it's already closed."
            )
        self._current_task_id = task_id
        # Use super().__init__() to reset the `self.env` attribute in gym.Wrapper.
        # TODO: This also resets the '_is_closed' on self.
        # TODO: This resets the 'observation_' and 'action_' etc objects that are saved
        # in the constructor of the 'IterableWrapper'
        self._instantiate_env(self._current_task_id)
        gym.Wrapper.__init__(self, env=self._envs[self._current_task_id])
        if self._add_task_labels:
            self.observation_space = add_task_labels(
                self.env.observation_space, self.task_label_space
            )

    @abstractmethod
    def next_task(self) -> int:
        pass

    def reset(self):
        if all(self._envs_is_closed):
            self.close()
        elif isinstance(self.env, MayCloseEarly) and self.env.is_closed():
            self._envs_is_closed[self._current_task_id] = True
        self.set_task(self.next_task())
        obs = super().reset()
        return self.observation(obs)

    def step(self, action):
        obs, rewards, done, info = super().step(action)
        obs = self.observation(obs)
        return obs, rewards, done, info

    def is_closed(self, env_index: int = None):
        """returns `True` if the environment at index `env_index` is closed, otherwise
        if `env_index` is None, returns `True` if `close()` was called on the wrapper.
        (todo: or if all envs are closed.)
        """
        if env_index is None:
            # Return wether this wrapper itself was closed manually (from outside).
            # TODO: Should we also check if all envs are closed? If so, should we close
            # this env manually?
            if self._is_closed:
                return True
            elif all(self.is_closed(env_id) for env_id in range(self.nb_tasks)):
                self.close(env_index=None)
                return True
            return False

        assert isinstance(env_index, int)
        # Return wether the env at that index is closed.
        if isinstance(self._envs[env_index], MayCloseEarly):
            env_is_closed = self._envs[env_index].is_closed()
            # NOTE: These shouls always be the same, but just in case:
            self._envs_is_closed[env_index] = env_is_closed
        return self._envs_is_closed[env_index]

    def close(self, env_index: int = None) -> None:
        """Close the environment for the given index, or of all envs if `env_index` is
        `None`.
        """
        if env_index is None:
            logger.info(f"Closing all envs")
            for env_index, (env_is_closed, env) in enumerate(zip(self._envs_is_closed, self._envs)):
                if not env_is_closed:
                    self._envs_is_closed[env_index] = True
                    env.close()
            # BUG: Not sure why this is actually causing a recursion error.. The idea
            # was to call `MayCloseEarly.close()`.
            # super().close()
            self._is_closed = True
        else:
            if self._envs_is_closed[env_index]:
                raise RuntimeError(f"Env at index {env_index} is already closed...")
            self._envs_is_closed[env_index] = True
            self._envs[env_index].close()

    def seed(self, seed: Optional[int] = None) -> List[int]:
        """Sets the seed for this env's random number generator(s).

        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.

        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
            number generators. The first value in the list should be the
            "main" seed, or the value which a reproducer should pass to
            'seed'. Often, the main seed equals the provided 'seed', but
            this won't be true if seed=None, for example.
        """
        self.rng = np.random.default_rng(seed)
        env_seeds = self.rng.integers(0, 1e8, size=len(self._envs)).tolist()
        seeds = env_seeds.copy()
        for index, env_seed in enumerate(env_seeds):
            # NOTE: Would be nice to be able to NOT instantiate all the envs and just
            # seed them when they get created, but then we wouldn't be able to return
            # the seeds from all envs here (which I'm not 100% sure its thaaat useful..)
            self._instantiate_env(index)
            env = self._envs[index]
            env_seeds: Optional[List[int]] = env.seed(env_seed)
            seeds.extend(env_seeds or [])
        return seeds

    def observation(self, observation):
        if self._add_task_labels:
            return add_task_labels(observation, task_labels=self._current_task_id)
        return observation


class ConcatEnvsWrapper(MultiEnvWrapper):
    """Wrapper that exhausts the current environment before moving onto the next."""

    def __init__(
        self,
        envs: List[gym.Env],
        add_task_ids: bool = False,
        on_task_switch_callback: Callable[[Optional[int]], Any] = None,
    ):
        super().__init__(envs, add_task_ids=add_task_ids)
        self.on_task_switch_callback = on_task_switch_callback

    def set_task(self, task_id: int) -> None:
        # NOTE: If any wrappers try to store things onto the unwrapped env, then those
        # would need to be transfered over to the new env here.
        super().set_task(task_id)

    def reset(self):
        old_task = self._current_task_id
        observation = super().reset()
        new_task = self._current_task_id
        if self.on_task_switch_callback and old_task != new_task:
            self.on_task_switch_callback(new_task if self._add_task_labels else None)
        return observation

    def next_task(self) -> int:
        assert not all(self._envs_is_closed)
        if not self._envs_is_closed[self._current_task_id]:
            return self._current_task_id
        # TODO: Close the env when we reach the end? or leave that up to the wrapper?
        return (self._current_task_id + 1) % self.nb_tasks

    def __iter__(self):
        return super().__iter__()

    def send(self, action):
        return super().send(action)


# Register this as a 'concat' handler for gym environments!


@concatenate.register(gym.Env)
def _concatenate_gym_envs(first_env: gym.Env, *other_envs: gym.Env) -> ConcatEnvsWrapper:
    return ConcatEnvsWrapper([first_env, *other_envs])


class RoundRobinWrapper(MultiEnvWrapper):
    """MultiEnvWrapper that alternates between the non-closed environments in a
    round-robin fashion.
    """

    def __init__(self, envs, add_task_ids=False):
        super().__init__(envs, add_task_ids=add_task_ids)
        self._current_task_id = -1

    def next_task(self) -> int:
        assert not all(self._envs_is_closed)
        next_task = (self._current_task_id + 1) % self.nb_tasks
        while self._envs_is_closed[next_task]:
            next_task += 1
            next_task %= self.nb_tasks
        return next_task


class RandomMultiEnvWrapper(MultiEnvWrapper):
    def next_task(self) -> int:
        assert not all(self._envs_is_closed)
        available_ids = np.arange(self.nb_tasks)[~self._envs_is_closed].tolist()
        return self.rng.choice(available_ids)


class CustomMultiEnvWrapper(MultiEnvWrapper):
    """MultiEnvWrapper that uses a custom callable to determine which env to use next."""

    def __init__(
        self,
        envs: List[gym.Env],
        add_task_ids: bool = False,
        custom_new_task_fn: Callable[[MultiEnvWrapper], int] = None,
    ):
        super().__init__(envs, add_task_ids=add_task_ids)
        assert custom_new_task_fn, "Must pass a custom function to this wrapper."
        self._custom_new_task_fn = custom_new_task_fn

    def next_task(self):
        return self._custom_new_task_fn
        return super().next_task()


================================================
FILE: sequoia/settings/rl/discrete/multienv_wrappers_test.py
================================================
from collections import Counter
from functools import partial
from typing import List, Optional

import gym
import pytest
from gym import spaces
from gym.wrappers import TimeLimit

from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit
from sequoia.common.spaces import TypedDictSpace
from sequoia.settings.rl.continual.make_env import wrap
from sequoia.utils.utils import unique_consecutive_with_index

from .multienv_wrappers import ConcatEnvsWrapper, RandomMultiEnvWrapper, RoundRobinWrapper


class TestMultiEnvWrappers:
    @pytest.fixture()
    def iterable_env(self) -> gym.Env:
        return EnvDataset(gym.make("CartPole-v0"))

    @pytest.mark.parametrize("add_task_ids", [False, True])
    @pytest.mark.parametrize("nb_tasks", [5, 1])
    @pytest.mark.parametrize("pass_fn_instead_of_env", [False, True])
    def test_concat(self, add_task_ids: bool, nb_tasks: int, pass_fn_instead_of_env: bool):
        def set_attributes(env: gym.Env, **attributes) -> gym.Env:
            for k, v in attributes.items():
                setattr(env.unwrapped, k, v)
            return env

        max_episodes_per_task = 5
        envs = [
            partial(
                EpisodeLimit,
                TimeLimit(
                    set_attributes(gym.make("CartPole-v0"), length=0.1 + 0.2 * i),
                    max_episode_steps=10,
                ),
                max_episodes=max_episodes_per_task,
            )
            for i in range(nb_tasks)
        ]
        if not pass_fn_instead_of_env:
            envs = [env_fn() for env_fn in envs]

        env = ConcatEnvsWrapper(envs, add_task_ids=add_task_ids)
        assert env.nb_tasks == nb_tasks

        if add_task_ids:
            assert env.observation_space["task_labels"] == spaces.Discrete(env.nb_tasks)
        lengths = []
        for episode in range(nb_tasks * max_episodes_per_task):
            print(f"Episode: {episode}, length: {round(env.unwrapped.length, 5)}")
            obs = env.reset()
            lengths.append(env.unwrapped.length)

            env_id = episode // max_episodes_per_task
            assert env._current_task_id == env_id, episode
            if add_task_ids:
                assert obs["task_labels"] == env_id
            step = 0
            done = False
            while not done:
                obs, rewards, done, info = env.step(env.action_space.sample())
                step += 1
                if step == 10:
                    assert done
                assert step <= 10

        # NOTE: It's pretty cool that we actually recover something like the task
        # schedule here! :D
        episode_task_schedule = dict(unique_consecutive_with_index(lengths))
        assert episode_task_schedule == {
            i * max_episodes_per_task: 0.1 + 0.2 * i for i in range(nb_tasks)
        }
        assert env.is_closed()

        # TODO: This does the same with an additional StepLimit (ActionLimit) wrapper,
        # and isn't stable because it depends on each episode being 10 long, and
        # CartPole ends earlier sometimes.
        # envs = [
        #     ActionLimit(TimeLimit(gym.make("CartPole-v0"), max_episode_steps=10), max_steps=50)
        #     for i in range(5)
        # ]
        # env = ConcatEnvsWrapper(envs)
        # assert env.nb_tasks == 5

        # for episode in range(25):
        #     print(f"Episode: {episode}")
        #     print(env.max_steps, env.step_count())
        #     obs = env.reset()
        #     env_id = episode // 5
        #     assert env._current_task_id == env_id, episode
        #     step = 0
        #     done = False
        #     while not done:
        #         print(step)
        #         obs, rewards, done, info = env.step(env.action_space.sample())
        #         step += 1
        #         if step == 10:
        #             assert done
        #         assert step <= 10

        # assert env.is_closed()

    @pytest.mark.parametrize("add_task_ids", [False, True])
    @pytest.mark.parametrize("nb_tasks", [5, 1])
    def test_roundrobin(self, add_task_ids: bool, nb_tasks: int):
        max_episodes_per_task = 5
        max_episode_steps = 10
        envs = [
            EpisodeLimit(
                TimeLimit(gym.make("CartPole-v0"), max_episode_steps=max_episode_steps),
                max_episodes=max_episodes_per_task,
            )
            for i in range(nb_tasks)
        ]
        env = RoundRobinWrapper(envs, add_task_ids=add_task_ids)
        assert env.nb_tasks == nb_tasks
        if add_task_ids:
            assert env.observation_space["task_labels"] == spaces.Discrete(env.nb_tasks)
        else:
            assert env.observation_space == env._envs[0].observation_space

        for episode in range(nb_tasks * max_episodes_per_task):
            print(f"Episode: {episode}")
            obs = env.reset()
            env_id = episode % nb_tasks
            assert env._current_task_id == env_id, episode
            step = 0
            done = False
            while not done:
                print(step)
                obs, rewards, done, info = env.step(env.action_space.sample())
                step += 1
                if step == max_episode_steps:
                    assert done
                assert step <= max_episode_steps

        assert env.is_closed()

    def test_random(self):
        episodes_per_task = 5
        max_episode_steps = 10
        nb_tasks = 5
        envs = [
            EpisodeLimit(
                TimeLimit(gym.make("CartPole-v0"), max_episode_steps=max_episode_steps),
                max_episodes=episodes_per_task,
            )
            for i in range(nb_tasks)
        ]
        env = RandomMultiEnvWrapper(envs)
        env.seed(123)
        assert env.nb_tasks == nb_tasks
        task_ids: List[int] = []
        for episode in range(nb_tasks * episodes_per_task):
            print(f"Episode: {episode}")
            obs = env.reset()
            env_id = episode // nb_tasks
            task_ids.append(env._current_task_id)
            step = 0
            done = False
            print(env._envs_is_closed)
            while not done:
                print(step)
                obs, rewards, done, info = env.step(env.action_space.sample())
                step += 1
                if step == max_episode_steps:
                    assert done
                assert step <= max_episode_steps
        assert env.is_closed()
        from collections import Counter

        # Assert that the task ids are 'random':
        import torch

        assert len(torch.unique_consecutive(torch.as_tensor(task_ids))) > nb_tasks
        assert Counter(task_ids) == {i: episodes_per_task for i in range(nb_tasks)}

    def test_iteration(self, iterable_env: gym.Env):
        """TODO: Interesting bug! Might be because when switching between envs, we're
        setting the 'cached' attributes onto the unwrapped env, and so when we move to
        another env, we all of a sudden don't have those attributes!
        """
        max_episode_steps = 10
        episodes_per_task = 5
        add_task_ids = True
        nb_tasks = 5

        def set_attributes(env: gym.Env, **attributes) -> gym.Env:
            for k, v in attributes.items():
                setattr(env.unwrapped, k, v)
            return env

        from functools import partial

        envs = [
            wrap(
                gym.make("CartPole-v0"),
                [
                    partial(TimeLimit, max_episode_steps=max_episode_steps),
                    partial(set_attributes, length=0.1 + 0.2 * i),
                    partial(EpisodeLimit, max_episodes=episodes_per_task),
                ],
            )
            for i in range(nb_tasks)
        ]

        on_task_switch_received_task_ids: List[Optional[int]] = []

        def on_task_switch(task_id: Optional[int]) -> None:
            print(f"On task switch: {task_id}.")
            on_task_switch_received_task_ids.append(task_id)

        env = ConcatEnvsWrapper(
            envs, add_task_ids=add_task_ids, on_task_switch_callback=on_task_switch
        )
        env = EnvDataset(env)

        env.seed(123)
        assert env.nb_tasks == nb_tasks
        if add_task_ids:
            assert env.observation_space == TypedDictSpace(
                x=env.env._envs[0].observation_space,
                task_labels=spaces.Discrete(nb_tasks),
            )
        else:
            assert env.observation_space == env.env._envs[0].observation_space
        assert env.observation_space.sample() in env.observation_space
        task_ids: List[int] = []
        lengths_at_each_step = []
        lengths_at_each_episode = []

        for episode in range(nb_tasks * episodes_per_task):
            env_id = episode // episodes_per_task

            episode_task_ids: List[int] = []

            for step, obs in enumerate(env):
                assert obs in env.observation_space
                print(f"Episode {episode}, Step {step}: obs: {obs}, length: {env.unwrapped.length}")
                if step == 0:
                    lengths_at_each_episode.append(env.unwrapped.length)
                lengths_at_each_step.append(env.unwrapped.length)

                if add_task_ids:
                    assert list(obs.keys()) == ["x", "task_labels"]
                    obs_task_id = obs["task_labels"]
                    episode_task_ids.append(obs_task_id)
                    print(f"obs Task id: {obs_task_id}")

                rewards = env.send(env.action_space.sample())
                if step > max_episode_steps:
                    assert False, "huh?"

            if add_task_ids:
                assert (
                    len(set(episode_task_ids)) == 1
                ), f"all observations within an episode should have the same task id.: {episode_task_ids}"
                # Add the unique task id from this episode to the list of all task ids.
                task_ids.extend(set(episode_task_ids))

        actual_task_schedule = dict(unique_consecutive_with_index(lengths_at_each_step))
        assert len(actual_task_schedule) == nb_tasks
        assert env.is_closed()

        if add_task_ids:
            assert task_ids == sum([[i] * episodes_per_task for i in range(nb_tasks)], [])
            # should have received one per boundary
            assert on_task_switch_received_task_ids == list(range(1, nb_tasks))
            assert Counter(task_ids) == {i: episodes_per_task for i in range(nb_tasks)}
        else:
            assert on_task_switch_received_task_ids == [None] * (nb_tasks - 1)

    def test_adding_envs(self):
        from sequoia.common.gym_wrappers.env_dataset import EnvDataset

        env_1 = EnvDataset(
            EpisodeLimit(TimeLimit(gym.make("CartPole-v1"), max_episode_steps=10), max_episodes=5)
        )
        env_2 = EnvDataset(
            EpisodeLimit(TimeLimit(gym.make("CartPole-v1"), max_episode_steps=10), max_episodes=5)
        )
        chained_env = env_1 + env_2
        assert chained_env._envs[0] is env_1
        assert chained_env._envs[1] is env_2
        # TODO: Do we add a 'len' attribute?
        # assert False, len(chained_env)
        # assert


def test_batched_envs():
    """TODO: Not sure how this will work with batched envs, but if it did, we could
    allow batch_size > 1 in Discrete, or batched custom envs in Incremental.
    """


================================================
FILE: sequoia/settings/rl/discrete/results.py
================================================
from typing import ClassVar, TypeVar

from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.discrete_results import TaskSequenceResults

MetricType = TypeVar("MetricsType", bound=EpisodeMetrics)


class DiscreteTaskAgnosticRLResults(TaskSequenceResults[MetricType]):
    """Results for a sequence of tasks in an RL Setting

    This can be seen as one row of a transfer matrix.
    NOTE: This is not the entire transfer matrix because in the Discrete settings we don't
    evaluate after learning each task.
    """

    # Higher mean reward / episode => better
    lower_is_better: ClassVar[bool] = False

    objective_name: ClassVar[str] = "Mean reward per episode"

    # Minimum runtime considered (in hours).
    # (No extra points are obtained for going faster than this.)
    min_runtime_hours: ClassVar[float] = 1.5
    # Maximum runtime allowed (in hours).
    max_runtime_hours: ClassVar[float] = 12.0


================================================
FILE: sequoia/settings/rl/discrete/setting.py
================================================
from dataclasses import dataclass
from typing import Any, Callable, ClassVar, Dict, Optional, Type, Union

from gym.envs.registration import EnvSpec, registry
from simple_parsing import field
from simple_parsing.helpers import choice

from sequoia.common.gym_wrappers.utils import is_monsterkong_env
from sequoia.settings.assumptions.context_discreteness import DiscreteContextAssumption
from sequoia.settings.rl.continual.tasks import TaskSchedule, registry
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import dict_union

from ..continual.setting import ContinualRLSetting
from ..continual.setting import supported_envs as _parent_supported_envs
from .tasks import DiscreteTask, is_supported, make_discrete_task
from .test_environment import DiscreteTaskAgnosticRLTestEnvironment

logger = get_logger(__name__)

supported_envs: Dict[str, EnvSpec] = dict_union(
    _parent_supported_envs,
    {
        spec.id: spec
        for env_id, spec in registry.env_specs.items()
        if spec.id not in _parent_supported_envs and is_supported(env_id)
    },
)
available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs}

from .results import DiscreteTaskAgnosticRLResults


@dataclass
class DiscreteTaskAgnosticRLSetting(DiscreteContextAssumption, ContinualRLSetting):
    """Continual Reinforcement Learning Setting where there are clear task boundaries,
    but where the task information isn't available.
    """

    # TODO: Update the type or results that we get for this Setting.
    Results: ClassVar[Type[Results]] = DiscreteTaskAgnosticRLResults

    # The type wrapper used to wrap the test environment, and which produces the
    # results.
    TestEnvironment: ClassVar[Type[TestEnvironment]] = DiscreteTaskAgnosticRLTestEnvironment

    # The function used to create the tasks for the chosen env.
    _task_sampling_function: ClassVar[Callable[..., DiscreteTask]] = make_discrete_task

    # Class variable that holds the dict of available environments.
    available_datasets: ClassVar[Dict[str, Union[str, Any]]] = available_datasets

    # Which environment (a.k.a. "dataset") to learn on.
    # The dataset could be either a string (env id or a key from the
    # available_datasets dict), a gym.Env, or a callable that returns a
    # single environment.
    dataset: str = choice(available_datasets, default="CartPole-v0")

    # The number of "tasks" that will be created for the training, valid and test
    # environments. When left unset, will use a default value that makes sense
    # (something like 5).
    nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])

    # Maximum number of training steps per task.
    train_steps_per_task: Optional[int] = None
    # Number of test steps per task.
    test_steps_per_task: Optional[int] = None

    # # Maximum number of episodes in total.
    # train_max_episodes: Optional[int] = None
    # # TODO: Add tests for this 'max episodes' and 'episodes_per_task'.
    # train_max_episodes_per_task: Optional[int] = None
    # # Total number of steps in the test loop. (Also acts as the "length" of the testing
    # # environment.)
    # test_max_steps_per_task: int = 10_000
    # test_max_episodes_per_task: Optional[int] = None

    # # Max number of steps per training task. When left unset and when `train_max_steps`
    # # is set, takes the value of `train_max_steps` divided by `nb_tasks`.
    # train_max_steps_per_task: Optional[int] = None
    # # (WIP): Maximum number of episodes per training task. When left unset and when
    # # `train_max_episodes` is set, takes the value of `train_max_episodes` divided by
    # # `nb_tasks`.
    # train_max_episodes_per_task: Optional[int] = None
    # # Maximum number of steps per task in the test loop. When left unset and when
    # # `test_max_steps` is set, takes the value of `test_max_steps` divided by `nb_tasks`.
    # test_max_steps_per_task: Optional[int] = None
    # # (WIP): Maximum number of episodes per test task. When left unset and when
    # # `test_max_episodes` is set, takes the value of `test_max_episodes` divided by
    # # `nb_tasks`.
    # test_max_episodes_per_task: Optional[int] = None

    # def warn(self, warning: Warning):
    #     logger.warning(warning)
    #     warnings.warn(warning)

    def __post_init__(self):
        # TODO: Rework all the messy fields from before by just considering these as eg.
        # the maximum number of steps per task, rather than the fixed number of steps
        # per task.
        assert not self.smooth_task_boundaries

        super().__post_init__()

        if self.max_episode_steps is None:
            if is_monsterkong_env(self.dataset):
                self.max_episode_steps = 500

    def create_train_task_schedule(self) -> TaskSchedule[DiscreteTask]:
        # IDEA: Could convert max_episodes into max_steps if max_steps_per_episode is
        # set.
        return super().create_train_task_schedule()

    def create_val_task_schedule(self) -> TaskSchedule[DiscreteTask]:
        # Always the same as train task schedule for now.
        return super().create_val_task_schedule()

    def create_test_task_schedule(self) -> TaskSchedule[DiscreteTask]:
        return super().create_test_task_schedule()


================================================
FILE: sequoia/settings/rl/discrete/setting_test.py
================================================
from dataclasses import fields
from typing import Any, ClassVar, Dict, Optional, Type

import gym
import pytest

from sequoia.common.config import Config
from sequoia.conftest import monsterkong_required, param_requires_monsterkong
from sequoia.methods import Method
from sequoia.settings.assumptions.incremental_test import DummyMethod as _DummyMethod
from sequoia.settings.rl.envs import MetaMonsterKongEnv

from ..continual.setting_test import TestContinualRLSetting as ContinualRLSettingTests
from .setting import DiscreteTaskAgnosticRLSetting


class TestDiscreteTaskAgnosticRLSetting(ContinualRLSettingTests):
    Setting: ClassVar[Type[Setting]] = DiscreteTaskAgnosticRLSetting
    dataset: pytest.fixture

    @pytest.fixture(params=[1, 3])
    def nb_tasks(self, request):
        n = request.param
        return n

    @pytest.fixture()
    def setting_kwargs(self, dataset: str, nb_tasks: int, config: Config):
        """Fixture used to pass keyword arguments when creating a Setting."""
        return {"dataset": dataset, "nb_tasks": nb_tasks, "config": config}

    @pytest.mark.parametrize(
        "dataset, expected_resulting_name",
        [
            param_requires_monsterkong("monsterkong", "MetaMonsterKong-v0"),
            param_requires_monsterkong("monsterkong-v0", "MetaMonsterKong-v0"),
            param_requires_monsterkong("meta_monsterkong", "MetaMonsterKong-v0"),
            ("cartpole", "CartPole-v1"),
        ],
    )
    def test_passing_name_variant_works(self, dataset: str, expected_resulting_name: str):
        assert self.Setting(dataset=dataset).dataset == expected_resulting_name

    def validate_results(
        self,
        setting: DiscreteTaskAgnosticRLSetting,
        method: Method,
        results: DiscreteTaskAgnosticRLSetting.Results,
    ) -> None:
        assert results
        assert results.objective
        assert len(results.task_results) == setting.nb_tasks
        assert [
            sum(task_result.metrics) == task_result.average_metrics
            for task_result in results.task_results
        ]
        assert (
            sum(task_result.average_metrics for task_result in results.task_results)
            == results.average_metrics
        )

    @pytest.mark.parametrize("give_nb_tasks", [True, False])
    @pytest.mark.parametrize("give_train_max_steps", [True, False])
    @pytest.mark.parametrize(
        "give_train_task_schedule, ids_instead_of_steps",
        [(True, False), (True, True), (False, False)],
    )
    @pytest.mark.parametrize(
        "nb_tasks, train_max_steps, train_task_schedule",
        [
            (1, 10_000, {0: {"gravity": 5.0}, 10_000: {"gravity": 10}}),
            (
                4,
                100_000,
                {
                    0: {"gravity": 5.0},
                    25_000: {"gravity": 10},
                    50_000: {"gravity": 10},
                    75_000: {"gravity": 10},
                    100_000: {"gravity": 20},
                },
            ),
        ],
    )
    def test_fields_are_consistent(
        self,
        nb_tasks: Optional[int],
        train_max_steps: Optional[int],
        train_task_schedule: Optional[Dict[str, Any]],
        give_nb_tasks: bool,
        give_train_max_steps: bool,
        give_train_task_schedule: bool,
        ids_instead_of_steps: bool,
    ):

        # give_nb_tasks = True
        # give_max_steps = True
        # give_task_schedule = True
        defaults = {f.name: f.default for f in fields(self.Setting)}
        default_max_train_steps = defaults["train_max_steps"]
        default_nb_tasks = defaults["nb_tasks"]
        # TODO: Same test for test_max_steps?
        full_kwargs = dict(
            nb_tasks=nb_tasks,
            train_max_steps=train_max_steps,
            train_task_schedule=train_task_schedule,
        )
        # TODO: Should also pass nothing, and expect an error to be raised?
        kwargs = full_kwargs.copy()
        if not give_nb_tasks:
            kwargs.pop("nb_tasks")
        if not give_train_max_steps:
            kwargs.pop("train_max_steps")
        if not give_train_task_schedule:
            kwargs.pop("train_task_schedule")
        elif ids_instead_of_steps:
            kwargs["train_task_schedule"] = {
                i: task for i, (step, task) in enumerate(train_task_schedule.items())
            }

        setting = self.Setting(**kwargs)
        assert (
            setting.nb_tasks == nb_tasks
            if give_nb_tasks
            else len(train_task_schedule)
            if give_train_task_schedule
            else default_nb_tasks
        )
        assert (
            setting.train_max_steps == train_max_steps
            if give_train_max_steps
            else max(train_task_schedule)
            if give_train_task_schedule
            else default_max_train_steps
        )
        assert list(setting.train_task_schedule.keys()) == [
            i * (setting.train_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1)
        ]
        assert list(setting.val_task_schedule.keys()) == [
            i * (setting.train_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1)
        ]
        assert list(setting.test_task_schedule.keys()) == [
            i * (setting.test_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1)
        ]

        # When giving only the number of tasks:


from typing import Any, Dict, Optional


def test_fit_and_on_task_switch_calls(config: Config):
    setting = DiscreteTaskAgnosticRLSetting(
        dataset="CartPole-v0",
        # nb_tasks=5,
        # train_steps_per_task=100,
        train_max_steps=500,
        test_max_steps=500,
        # test_steps_per_task=100,
        train_transforms=[],
        test_transforms=[],
        val_transforms=[],
        config=config,
    )
    method = _DummyMethod()
    _ = setting.apply(method)
    # == 30 task switches in total.
    assert method.n_task_switches == 0
    assert method.n_fit_calls == 1
    assert not method.received_task_ids
    assert not method.received_while_training


@monsterkong_required
@pytest.mark.parametrize(
    "dataset, expected_env_type",
    [
        ("MetaMonsterKong-v0", MetaMonsterKongEnv),
        ("monsterkong", MetaMonsterKongEnv),
        ("PixelMetaMonsterKong-v0", MetaMonsterKongEnv),
        ("monster_kong", MetaMonsterKongEnv),
        ("monster_kong", MetaMonsterKongEnv),
        # ("halfcheetah", ContinualHalfCheetahEnv),
        # ("HalfCheetah-v2", ContinualHalfCheetahV2Env),
        # ("HalfCheetah-v3", ContinualHalfCheetahV3Env),
        # ("ContinualHalfCheetah-v2", ContinualHalfCheetahV2Env),
        # ("ContinualHalfCheetah-v3", ContinualHalfCheetahV3Env),
        # ("ContinualHopper-v2", ContinualHopperEnv),
        # ("hopper", ContinualHopperEnv),
        # ("Hopper-v2", ContinualHopperEnv),
        # ("walker2d", ContinualWalker2dV3Env),
        # ("Walker2d-v2", ContinualWalker2dV2Env),
        # ("Walker2d-v3", ContinualWalker2dV3Env),
        # ("ContinualWalker2d-v2", ContinualWalker2dV2Env),
        # ("ContinualWalker2d-v3", ContinualWalker2dV3Env),
    ],
)
def test_monsterkong_env_name_maps_to_continual_variant(
    dataset: str, expected_env_type: Type[gym.Env]
):
    setting = DiscreteTaskAgnosticRLSetting(
        dataset=dataset, train_max_steps=10_000, test_max_steps=10_000
    )
    train_env = setting.train_dataloader()
    assert isinstance(train_env.unwrapped, expected_env_type)


================================================
FILE: sequoia/settings/rl/discrete/tasks.py
================================================
""" Functions that create 'discrete' tasks for an environment. 

TODO: Once we have a wrapper that can seamlessly switch from one env to the next, then
move the "incremental" tasks from `incremental/tasks.py` to this level.
"""

import warnings
from functools import partial, singledispatch
from typing import Any, Callable, Dict, List, Optional, Union

import gym
import numpy as np

from sequoia.settings.rl.envs import MONSTERKONG_INSTALLED, MetaMonsterKongEnv, sequoia_registry

from ..continual.tasks import (
    ContinuousTask,
    _is_supported,
    make_continuous_task,
    task_sampling_function,
)

DiscreteTask = Union[ContinuousTask, Callable[[gym.Env], Any]]


@task_sampling_function(env_registry=sequoia_registry, based_on=make_continuous_task)
@singledispatch
def make_discrete_task(
    env: gym.Env,
    *,
    step: int,
    change_steps: List[int],
    seed: int = None,
    **kwargs,
) -> DiscreteTask:
    """Generic function used by Sequoia's `DiscreteTaskAgnosticRLSetting` (and its
    descendants) to create a "task" that will be applied to an environment like `env`.

    To add support for a new type of environment, simply register a handler function:

    ```
    @make_discrete_task.register(SomeGymEnvClass)
    def make_discrete_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,):
        return {"my_attribute": random.random()}
    ```
    """
    raise NotImplementedError(f"Don't currently know how to create a discrete task for env {env}")
    # return make_continuous_task(
    #     env, step=step, change_steps=change_steps, seed=seed, **kwargs
    # )


is_supported = partial(_is_supported, _make_task_function=make_discrete_task)


if MONSTERKONG_INSTALLED:
    # In MonsterKong the tasks can be changed on-the-fly, whereas they can't in the
    # size-based MUJOCO envs.

    @make_discrete_task.register
    def make_task_for_monsterkong_env(
        env: MetaMonsterKongEnv,
        step: int,
        change_steps: List[int] = None,
        seed: int = None,
        **kwargs,
    ) -> Union[Dict[str, Any], Any]:
        """Samples a task for the MonsterKong environment.

        TODO: When given a seed, sample the task randomly (but deterministicly) using
        the seed.
        """
        assert change_steps is not None, "Need task boundaries to construct the task schedule."

        if step not in change_steps:
            raise RuntimeError(
                f"Monsterkong's has discrete tasks, {step} should be in {change_steps}!"
            )
        task_index = change_steps.index(step)

        # TODO: double-check with @mattriemer on this:
        n_supported_levels = 30
        # IDEA: Could also have a list of supported levels
        levels = list(range(n_supported_levels))
        nb_tasks = len(change_steps)

        rng: Optional[np.random.Generator] = None
        if seed is not None:
            # perform a deterministic shuffling of the 'task ids'
            rng = np.random.default_rng(seed)
            rng.shuffle(levels)

        level: int
        if task_index >= n_supported_levels:
            warnings.warn(
                RuntimeWarning(
                    f"The given task id ({task_index}) is greater than the number of "
                    f"levels currently available in MonsterKong "
                    f"({n_supported_levels})!\n"
                    f"Multiple tasks may therefore use the same level!"
                )
            )
            # Option 1: Loop back around, using the same task as the first task?
            # (Probably not a good idea, since then we might get to train on the first
            # tasks right before testing begins! (which isnt great as a CL evaluation)
            # task_index %= n_supported_levels

            # Option 2 (better): Sample levels at random after all other levels have been
            # exhausted.
            # NOTE: Other calls to this should not get the same value!
            rng = rng or np.random.default_rng(seed)
            random_extra_levels = rng.integers(
                0, n_supported_levels, size=nb_tasks - n_supported_levels
            )
            level = int(random_extra_levels[task_index - n_supported_levels])
        else:
            level = levels[task_index]

        return {"level": level}


================================================
FILE: sequoia/settings/rl/discrete/tasks_test.py
================================================
import pytest

from sequoia.conftest import monsterkong_required
from sequoia.settings.rl.envs import MetaMonsterKongEnv

from .tasks import make_discrete_task


@monsterkong_required
def test_monsterkong_tasks():
    # assert make_discrete_task.is_supported(MetaMonsterKongEnv)
    task = make_discrete_task(MetaMonsterKongEnv, step=0, change_steps=[0, 100, 200])
    assert task == {"level": 0}

    task = make_discrete_task(MetaMonsterKongEnv, step=100, change_steps=[0, 100, 200])
    assert task == {"level": 1}

    with pytest.raises(RuntimeError):
        _ = make_discrete_task(MetaMonsterKongEnv, step=123, change_steps=[0, 100, 200])


================================================
FILE: sequoia/settings/rl/discrete/test_environment.py
================================================
import itertools
import math
from typing import Dict

from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.discrete_results import TaskSequenceResults
from sequoia.settings.assumptions.iid_results import TaskResults

from ..continual.test_environment import ContinualRLTestEnvironment


class DiscreteTaskAgnosticRLTestEnvironment(ContinualRLTestEnvironment):
    def __init__(self, *args, task_schedule: Dict, **kwargs):
        super().__init__(*args, task_schedule=task_schedule, **kwargs)
        self.task_schedule = task_schedule
        self.boundary_steps = [step // (self.batch_size or 1) for step in self.task_schedule.keys()]
        # TODO: Removing the last entry since it's the terminal state.
        self.boundary_steps.pop(-1)

    def __len__(self):
        return math.ceil(self.step_limit / (getattr(self.env, "batch_size", 1) or 1))

    def get_results(self) -> TaskSequenceResults[EpisodeMetrics]:
        # TODO: Place the metrics in the right 'bin' at the end of each episode during
        # testing depending on the task at that time, rather than what's happening here,
        # where we're getting all the rewards and episode lengths at the end and then
        # sort it out into the bins based on the task schedule. ALSO: this would make it
        # easier to support monitoring batched RL environments, since these `Monitor`
        # methods (get_episode_rewards, get_episode_lengths, etc) assume the environment
        # isn't batched.
        rewards = self.get_episode_rewards()
        lengths = self.get_episode_lengths()

        task_schedule: Dict[int, Dict] = self.task_schedule
        task_steps = sorted(task_schedule.keys())
        # TODO: Removing the last entry since it's the terminal state.
        task_steps.pop(-1)

        assert 0 in task_steps
        import bisect

        nb_tasks = len(task_steps)
        assert nb_tasks >= 1

        test_results = TaskSequenceResults([TaskResults() for _ in range(nb_tasks)])
        # TODO: Fix this, since the task id might not be related to the steps!
        for step, episode_reward, episode_length in zip(
            itertools.accumulate(lengths), rewards, lengths
        ):
            # Given the step, find the task id.
            task_id = bisect.bisect_right(task_steps, step) - 1

            episode_metric = EpisodeMetrics(
                n_samples=1,
                mean_episode_reward=episode_reward,
                mean_episode_length=episode_length,
            )

            test_results.task_results[task_id].metrics.append(episode_metric)

        return test_results

    def render(self, mode="human", **kwargs):
        # TODO: This might not be setup right. Need to check.
        image_batch = super().render(mode=mode, **kwargs)
        if mode == "rgb_array" and self.batch_size:
            return tile_images(image_batch)
        return image_batch

    def _after_reset(self, observation):
        # Is this going to work fine when the observations are batched though?
        return super()._after_reset(observation)


================================================
FILE: sequoia/settings/rl/environment.py
================================================
from typing import *

from torch.utils.data import DataLoader, Dataset, IterableDataset

from sequoia.settings.base.environment import ActionType, Environment, ObservationType, RewardType
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)

from typing_extensions import Final

from .objects import ActionType, ObservationType, RewardType

# TODO: Instead of using a 'y' field for both the supervised learning labels/target and
# for the reward in RL, instead use a 'reward' field in RL, and a 'y' field in SL, where
# in SL the reward could actually be wether the chosen action was correct or not, and
# 'y' could contain the correct prediction for each action.


class RLEnvironment(DataLoader, Environment[ObservationType, ActionType, RewardType]):
    """Environment in an RL Setting.

    Extends DataLoader to support sending back actions to the 'dataset'.

    This could be useful for modeling RL or Active Learning, for instance, where
    the predictions (actions) have an impact on the data generation process.

    TODO: Not really used at the moment besides as the base class for the GymDataLoader.
    TODO: Maybe add a custom `map` class for generators?

    Iterating through an RL Environment is different than when iterating on an SL
    environment:
        - Batches only contain the observations, rather than (observations, rewards)
        - The rewards are given back after an action is sent to the environment using
          `send`.

    TODO: maybe change this class into something like a `FakeActiveEnvironment`.

    """

    actions_influence_future_observations: Final[bool] = True

    def __init__(self, dataset: Union[Dataset, IterableDataset], **dataloader_kwargs):
        super().__init__(dataset, **dataloader_kwargs)
        self.observation: ObservationType = None
        self.action: ActionType = None
        self.reward: RewardType = None

    # def __next__(self) -> ObservationType:
    #     return self.observation

    def send(self, action: ActionType) -> RewardType:
        """Sends an action to the 'dataset'/'Environment'.

        Does nothing when the environment is a simple Dataset (when it isn't an
        instance of EnvironmentBase).

        TODO: Figure out the interactions with num_workers and send, if any.
        """
        self.action = action
        if hasattr(self.dataset, "send"):
            self.reward = self.dataset.send(self.action)
        # TODO: Clean this up, this is taken care of in the GymDataLoader class.
        # if hasattr(self.dataset, "step"):
        #     self.observation, self.reward, self.done, self.info = self.dataset.step(self.action)
        else:
            assert (
                False
            ), "TODO: ActiveDataloader dataset should always have a `send` attribute for now."
        return self.reward


# Deprecated names for the same thing:
ActiveDataLoader = RLEnvironment
ActiveEnvironment = RLEnvironment


================================================
FILE: sequoia/settings/rl/environment_test.py
================================================
from typing import Generator

from torch import Tensor
from torchvision.datasets import MNIST

from sequoia.utils.logging_utils import log_calls

from .environment import ActiveEnvironment


class ActiveMnistEnvironment(ActiveEnvironment[Tensor, Tensor, Tensor]):
    """An Mnist environment which will keep showing the same class until a
    correct prediction is made, and then switch to another class.

    Which will keep giving the same class until the right prediction is made.
    """

    def __init__(self, start_class: int = 0, **kwargs):
        self.current_class: int = 0
        dataset = MNIST("data")
        super().__init__(dataset, batch_size=None, **kwargs)
        self.observation: Tensor = None
        self.reward: Tensor = None
        self.action: Tensor = None

    @log_calls
    def __next__(self) -> Tensor:
        for x, y in self.dataset:
            # keep iterating while the example isn't of the right type.
            if y == self.current_class:
                self.observation = x
                self.reward = y
                break

        print(f"next obs: {self.observation}, next reward = {self.reward}")
        return self.observation

    @log_calls
    def __iter__(self) -> Generator[Tensor, Tensor, None]:
        while True:
            action = yield next(self)
            if action is not None:
                logger.debug(f"Received an action of {action} while iterating..")
                self.reward = self.send(action)

    @log_calls
    def send(self, action: Tensor) -> Tensor:
        print(f"received action {action}, returning current label {self.reward}")
        self.action = action
        if action == self.current_class:
            print("Switching classes since the prediction was right!")
            self.current_class += 1
            self.current_class %= 10
        else:
            print("Prediction was wrong, staying on the same class.")
        return self.reward


def test_active_mnist_environment():
    """Test the active mnist env, which will keep giving the same class until the right prediction is made."""
    env = ActiveMnistEnvironment()
    # So in this test, the env will only give samples of class 0, until a correct
    # prediction is made, then it will switch to giving samples of class 1, etc.

    # what the current class is (just for testing)
    _current_class = 0
    # first loop, where we always predict the right label.
    for i, x in enumerate(env):
        print(f"x: {x}")
        y_pred = i % 10
        print(f"Sending prediction of {y_pred}")
        y_true = env.send(y_pred)
        print(f"Received back {y_true}")
        assert y_pred == y_true
        if i == 9:
            break

    # current class should be 0 as last prediction was 9 and correct.
    _current_class = 0

    # Second loop, where we always predict the wrong label.
    for i, x in enumerate(env):
        print(f"x: {x}")
        y_pred = 1
        y_true = env.send(y_pred)
        assert y_true == 0

        if i > 2:
            break

    x = next(env)
    y_pred = 0
    y_true = env.send(y_pred)
    assert y_true == 0

    x = next(env)
    y_true = env.send(1)
    assert y_true == 1


================================================
FILE: sequoia/settings/rl/envs/__init__.py
================================================
import copy
import json
from abc import ABC
from contextlib import redirect_stdout
from io import StringIO
from pathlib import Path
from typing import Dict, List, Type, Union

import gym
from gym.envs.registration import EnvSpec, registry

from sequoia.utils import get_logger

logger = get_logger(__name__)

# IDEA: Modify a copy of the gym registry?
# sequoia_registry = copy.deepcopy(registry)
sequoia_registry = registry

from .classic_control import PixelObservationWrapper, register_classic_control_variants
from .variant_spec import EnvVariantSpec

register_classic_control_variants(sequoia_registry)


ATARI_PY_INSTALLED = False
try:
    from ale_py.gym.environment import ALGymEnv

    AtariEnv = ALGymEnv

    ATARI_PY_INSTALLED = True
except (gym.error.DependencyNotInstalled, ImportError):

    class AtariEnv(gym.Env):
        pass


MONSTERKONG_INSTALLED = False
try:
    # Redirecting stdout because this import prints stuff.
    from .monsterkong import MetaMonsterKongEnv, register_monsterkong_variants

    register_monsterkong_variants(sequoia_registry)
    MONSTERKONG_INSTALLED = True

except ImportError:

    class MetaMonsterKongEnv(gym.Env):
        pass


MTENV_INSTALLED = False
mtenv_envs = []
try:
    from mtenv import MTEnv
    from mtenv.envs.registration import mtenv_registry

    mtenv_envs = [env_spec.id for env_spec in mtenv_registry.all()]
    MTENV_INSTALLED = True
except ImportError:
    # Create a 'dummy' class so we can safely use MTEnv in the type hints below.
    # Additionally, isinstance(some_env, MTEnv) will always fail when mtenv isn't
    # installed, which is good.
    class MTEnv(gym.Env):
        pass


MUJOCO_INSTALLED = False
try:
    import mujoco_py

    mj_path, _ = mujoco_py.utils.discover_mujoco()
    from gym.envs.mujoco import MujocoEnv

    from .mujoco import (
        ContinualHalfCheetahEnv,
        ContinualHalfCheetahV2Env,
        ContinualHalfCheetahV3Env,
        ContinualHopperEnv,
        ContinualHopperV2Env,
        ContinualHopperV3Env,
        ContinualWalker2dEnv,
        ContinualWalker2dV2Env,
        ContinualWalker2dV3Env,
        register_mujoco_variants,
    )

    register_mujoco_variants(env_registry=sequoia_registry)
    MUJOCO_INSTALLED = True
except (
    ImportError,
    AttributeError,
    ValueError,
    gym.error.DependencyNotInstalled,
) as exc:
    logger.debug(f"Couldn't import mujoco: ({exc})")
    # Create a 'dummy' class so we can safely use type hints everywhere.
    # Additionally, `isinstance(some_env, <this class>)`` will always fail when the
    # dependency isn't installed, which is good.
    class MujocoEnv(gym.Env):
        pass

    class ContinualHalfCheetahEnv(MujocoEnv):
        pass

    class ContinualHalfCheetahV2Env(MujocoEnv):
        pass

    class ContinualHalfCheetahV3Env(MujocoEnv):
        pass

    class ContinualHopperEnv(MujocoEnv):
        pass

    class ContinualHopperV2Env(MujocoEnv):
        pass

    class ContinualHopperV3Env(MujocoEnv):
        pass

    class ContinualWalker2dEnv(MujocoEnv):
        pass

    class ContinualWalker2dV2Env(MujocoEnv):
        pass

    class ContinualWalker2dV3Env(MujocoEnv):
        pass


METAWORLD_INSTALLED = False
metaworld_envs: List[Type[gym.Env]] = []

try:
    if not MUJOCO_INSTALLED:
        # Skip the stuff below, since metaworld requires mujoco anyway.
        raise ImportError

    import metaworld
    from metaworld import MetaWorldEnv

    # TODO: Use mujoco from metaworld? or from mujoco_py?
    from metaworld.envs.mujoco.mujoco_env import MujocoEnv as MetaWorldMujocoEnv
    from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv

    # from metaworld.envs.mujoco.mujoco_env import MujocoEnv

    METAWORLD_INSTALLED = True
    # metaworld_dir = getsourcefile(metaworld)
    # mujoco_dir = Path("~/.mujoco").expanduser()
    # TODO: Cache the names of the metaworld envs to a file, just so we don't take about
    # 10 seconds to import metaworld every time?
    # TODO: Make sure this also works on a cluster.
    # TODO: When updating metaworld, need to remove this file.
    envs_cache_file = Path("temp/metaworld_envs.json")
    envs_cache_file.parent.mkdir(exist_ok=True)
    all_metaworld_envs: Dict[str, List[str]] = {}

    if envs_cache_file.exists():
        with open(envs_cache_file, "r") as f:
            all_metaworld_envs = json.load(f)
    else:
        print(
            "Loading up the list of available envs from metaworld for the first time, "
            "this might take a while (usually ~10 seconds)."
        )

    if "ML10" not in all_metaworld_envs:
        ML10_envs = list(metaworld.ML10().train_classes.keys())
        all_metaworld_envs["ML10"] = ML10_envs

    with open(envs_cache_file, "w") as f:
        json.dump(all_metaworld_envs, f)

    metaworld_envs = sum([list(envs) for envs in all_metaworld_envs.values()], [])
except (ImportError, AttributeError, gym.error.DependencyNotInstalled) as e:
    logger.debug(f"Unable to import metaworld: {e}")
    # raise e


if not METAWORLD_INSTALLED:
    # Create a 'dummy' class so we can safely use MetaWorldEnv in the type hints below.
    # Additionally, isinstance(some_env, MetaWorldEnv) will always fail when metaworld
    # isn't installed, which is good.
    class MetaWorldEnv(gym.Env, ABC):
        pass

    class MetaWorldMujocoEnv(gym.Env, ABC):
        pass

    class SawyerXYZEnv(gym.Env, ABC):
        pass


================================================
FILE: sequoia/settings/rl/envs/classic_control.py
================================================
""" Registers variants of the classic-control envs that are used by sequoia. """
# TODO: Add Pixel???-v? variants for the classic-control envs.
from typing import Dict

from gym.envs.registration import EnvRegistry, EnvSpec, registry

from sequoia.common.gym_wrappers.pixel_observation import PixelObservationWrapper

from .variant_spec import EnvVariantSpec


def register_classic_control_variants(env_registry: EnvRegistry = registry) -> None:
    """Adds pixel variants for the classic-control envs to the given registry in-place."""
    classic_control_env_specs: Dict[str, EnvSpec] = {
        spec.id: spec
        for env_id, spec in env_registry.env_specs.items()
        if isinstance(spec.entry_point, str)
        and spec.entry_point.startswith("gym.envs.classic_control")
    }

    for env_id, env_spec in classic_control_env_specs.items():
        new_id = "Pixel" + env_id
        if new_id not in env_registry.env_specs:
            new_spec = EnvVariantSpec.of(
                env_spec, new_id=new_id, wrappers=[PixelObservationWrapper]
            )
            env_registry.env_specs[new_id] = new_spec


================================================
FILE: sequoia/settings/rl/envs/monsterkong.py
================================================
from contextlib import redirect_stdout
from io import StringIO

import numpy as np
from gym import spaces
from gym.envs.registration import EnvRegistry, EnvSpec, registry

# Avoid print statements from pygame package.
with redirect_stdout(StringIO()):
    from meta_monsterkong.make_env import MetaMonsterKongEnv

from .variant_spec import EnvVariantSpec


def observe_state(env: MetaMonsterKongEnv) -> MetaMonsterKongEnv:
    if not env.observe_state:
        env.unwrapped.observe_state = True
        env.unwrapped.observation_space = spaces.Box(
            0,
            292,
            [
                402,
            ],
            np.int16,
        )
    return env


def register_monsterkong_variants(env_registry: EnvRegistry = registry) -> None:
    for env_id in ["MetaMonsterKong-v0", "MetaMonsterKong-v1"]:
        spec: EnvSpec = env_registry.spec(env_id)

        # Add an explicit 'State' variant of the envs.
        new_env_id = "State" + env_id
        new_spec = EnvVariantSpec.of(
            spec,
            new_id=new_env_id,
            new_max_episode_steps=500,
            new_kwargs={"observe_state": True},
        )
        if new_env_id not in env_registry.env_specs:
            env_registry.env_specs[new_env_id] = new_spec

        # Add an explicit 'Pixel' variant of the envs (even though by default we currently
        # always observe the state).
        new_env_id = "Pixel" + env_id
        new_spec = EnvVariantSpec.of(
            spec,
            new_id=new_env_id,
            new_max_episode_steps=500,
            new_kwargs={"observe_state": False},
        )
        if new_env_id not in env_registry.env_specs:
            env_registry.env_specs[new_env_id] = new_spec


================================================
FILE: sequoia/settings/rl/envs/mujoco/__init__.py
================================================
""" CL environments based on the mujoco envs.

NOTE: This is based on https://github.com/Breakend/gym-extensions
"""
# from sequoia.conftest import mujoco_required
# pytestmark = mujoco_required

import os
from pathlib import Path
from typing import Callable, Dict, List, Type, Union

import gym
from gym.envs import register
from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
from gym.envs.registration import EnvRegistry, EnvSpec, load, registry

from sequoia.utils.logging_utils import get_logger

from ..variant_spec import EnvVariantSpec
from .half_cheetah import (
    ContinualHalfCheetahV2Env,
    ContinualHalfCheetahV3Env,
    HalfCheetahV2Env,
    HalfCheetahV3Env,
)
from .hopper import ContinualHopperV2Env, ContinualHopperV3Env, HopperV2Env, HopperV3Env
from .modified_gravity import ModifiedGravityEnv
from .modified_size import ModifiedSizeEnv
from .walker2d import ContinualWalker2dV2Env, ContinualWalker2dV3Env, Walker2dV2Env, Walker2dV3Env

logger = get_logger(__name__)

# NOTE: Prefer the 'V3' variants
# HalfCheetahEnv = HalfCheetahV3Env
# Walker2dEnv = Walker2dV3Env
ContinualHalfCheetahEnv = ContinualHalfCheetahV3Env
ContinualHopperEnv = ContinualHopperV3Env
ContinualWalker2dEnv = ContinualWalker2dV3Env

SOURCE_DIR = Path(os.path.dirname(os.path.abspath(__file__)))

__all__ = [
    "ContinualHalfCheetahEnv",
    "ContinualHalfCheetahV2Env",
    "ContinualHalfCheetahV3Env",
    "ContinualHopperV2Env",
    "ContinualHopperV3Env",
    "ContinualWalker2dEnv",
    "ContinualWalker2dV2Env",
    "ContinualWalker2dV3Env",
    "ModifiedGravityEnv",
    "ModifiedSizeEnv",
    "MujocoEnv",
]


def get_entry_point(Env: Type[gym.Env]) -> str:
    # TODO: Make sure this also works when Sequoia is installed in non-editable mode.
    return f"{Env.__module__}:{Env.__name__}"


# The list of mujoco envs which we explicitly have support for.
# TODO: Should probably use a Wrapper rather than a new base class (at least for the
# GravityEnv and the modifications that can be made to an already-instantiated env.
# NOTE: Using the same version tag as the

CURRENTLY_SUPPORTED_MUJOCO_ENVS: Dict[str, Type[MujocoEnv]] = {
    "HalfCheetah-v2": ContinualHalfCheetahV2Env,
    "HalfCheetah-v3": ContinualHalfCheetahV3Env,
    "Hopper-v2": ContinualHopperV2Env,
    "Hopper-v3": ContinualHopperV3Env,
    "Walker2d-v2": ContinualWalker2dV2Env,
    "Walker2d-v3": ContinualWalker2dV3Env,
}


# TODO: Register the 'continual' variants automatically by finding the entries in the
# registry that can be wrapped, and wrapping them.


# IDEA: Actually swap out the entries for these envs, rather than overwrite them?


def register_mujoco_variants(env_registry: EnvRegistry = registry) -> None:
    """Adds pixel variants for the classic-control envs to the given registry in-place."""
    # Dict from the env id to the original spec
    original_mujoco_env_specs: Dict[str, EnvSpec] = {
        original_env_id: env_registry.spec(original_env_id)
        for original_env_id in CURRENTLY_SUPPORTED_MUJOCO_ENVS
    }
    # Dict from the
    # TODO: Add broader support for mujoco envs
    new_entry_points = CURRENTLY_SUPPORTED_MUJOCO_ENVS

    # NOTE: Currently we do two things: Register a new spec with a different name, like
    # `ContinualWalker2d-v2`, as well as 'overwrite' the entry-point of the original
    # spec ("Walker2d-v2") to point to our custom subclass (ContinualWalker2dV2Env)
    prefixes = ["Continual", ""]
    # NOTE: It could actually make more sense to only register our variants, and
    # then have the Setting map one to the other intelligently, but it causes a bit more
    # trouble
    # prefixes = ["Continual"]
    for prefix in prefixes:
        for env_id, original_env_spec in original_mujoco_env_specs.items():
            # TODO: Use the same ID, or a different one?
            new_id = prefix + env_id

            if (new_id not in env_registry.env_specs or new_id == env_id) and not isinstance(
                original_env_spec, EnvVariantSpec
            ):
                new_spec = EnvVariantSpec.of(
                    original=original_env_spec,
                    new_id=new_id,
                    new_entry_point=new_entry_points[env_id],
                )
                env_registry.env_specs[new_id] = new_spec
                if new_id != env_id:
                    logger.debug(
                        f"Registering MuJoCO Environment variant of {env_id} at id {new_id}."
                    )
                else:
                    logger.debug(f"Overwriting the existing EnvSpec at id {env_id}")


# Replace the entry-point for these mujoco envs.
# IMPORTANT: This doesn't change anything about the envs, apart from making it possible
# to explicitly change the gravity or mass etc if you want.
# TODO: Should probably still only modify a custom/copied registry, so that importing
# Sequoia doesn't modify the gym registry when Sequoia isn't being used explicitly.
# registry.env_specs["HalfCheetah-v2"].entry_point = ContinualHalfCheetahV2Env
# registry.env_specs["HalfCheetah-v3"].entry_point = ContinualHalfCheetahV3Env
# registry.env_specs["Hopper-v2"].entry_point = ContinualHopperEnv
# registry.env_specs["Walker2d-v2"].entry_point = ContinualWalker2dEnv

# EnvSpec(
#     "HalfCheetah-v2",
#     entry_point=get_entry_point(Continu),
#     reward_threshold=None,
#     nondeterministic=False,
#     max_episode_steps=None,
#     kwargs=None,
# )


# gym.envs.register(
#     id="ContinualHalfCheetah-v2",
#     entry_point=get_entry_point(ContinualHalfCheetahV2Env),
#     max_episode_steps=1000,
#     reward_threshold=4800.0,
# )

# gym.envs.register(
#     id="ContinualHalfCheetah-v3",
#     entry_point=get_entry_point(ContinualHalfCheetahV3Env),
#     max_episode_steps=1000,
#     reward_threshold=4800.0,
# )

# gym.envs.register(
#     id="ContinualHopper-v2",
#     entry_point=get_entry_point(ContinualHopperEnv),
#     max_episode_steps=1000,
#     reward_threshold=4800.0,
# )

# gym.envs.register(
#     id="ContinualWalker2d-v3",
#     entry_point=get_entry_point(ContinualWalker2dEnv),
#     max_episode_steps=1000,
#     reward_threshold=4800.0,
# )


================================================
FILE: sequoia/settings/rl/envs/mujoco/half_cheetah.py
================================================
from typing import ClassVar, Dict, List

import numpy as np
from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.half_cheetah import HalfCheetahEnv as _HalfCheetahV2Env

# TODO: Use HalfCheetah-v3 instead, which allows explicitly to change the model file!
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv as _HalfCheetahV3Env

from .modified_gravity import ModifiedGravityEnv
from .modified_mass import ModifiedMassEnv
from .modified_size import ModifiedSizeEnv


class HalfCheetahV2Env(_HalfCheetahV2Env):
    """
    Simply allows changing of XML file, probably not necessary if we pull request the
    xml name as a kwarg in openai gym
    """

    BODY_NAMES: ClassVar[List[str]] = [
        "torso",
        "bthigh",
        "bshin",
        "bfoot",
        "fthigh",
        "fshin",
        "ffoot",
    ]

    def __init__(self, model_path: str = "half_cheetah.xml", frame_skip: int = 5):
        MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)


# Q: Why isn't HalfCheetahV3 based on HalfCheetahV2 in gym ?!


class HalfCheetahV3Env(_HalfCheetahV3Env):
    BODY_NAMES: ClassVar[List[str]] = [
        "torso",
        "bthigh",
        "bshin",
        "bfoot",
        "fthigh",
        "fshin",
        "ffoot",
    ]

    def __init__(
        self,
        model_path="half_cheetah.xml",
        forward_reward_weight: float = 1.0,
        ctrl_cost_weight: float = 0.1,
        reset_noise_scale: float = 0.1,
        exclude_current_positions_from_observation: bool = True,
        xml_file: str = None,
        frame_skip: int = 5,
    ):
        if frame_skip != 5:
            raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
        super().__init__(
            xml_file=xml_file or model_path,
            forward_reward_weight=forward_reward_weight,
            ctrl_cost_weight=ctrl_cost_weight,
            reset_noise_scale=reset_noise_scale,
            exclude_current_positions_from_observation=exclude_current_positions_from_observation,
        )


# class HalfCheetahGravityEnv(ModifiedGravityEnv, HalfCheetahEnv):
#     # NOTE: This environment could be used in ContinualRL!
#     def __init__(
#         self,
#         model_path: str = "half_cheetah.xml",
#         frame_skip: int = 5,
#         gravity: float = -9.81,
#     ):
#         super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity)


class HalfCheetahWithSensorEnv(HalfCheetahV2Env):
    """NOTE: unused for now.
    Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we
    get sensor readouts with distances to the wall
    """

    def __init__(self, model_path: str, frame_skip: int = 5, n_bins: int = 10):
        super().__init__(model_path=model_path, frame_skip=frame_skip)
        self.n_bins = n_bins

    def _get_obs(self):
        obs = np.concatenate(
            [
                super()._get_obs(),
                np.zeros(self.n_bins),  # NOTE: @lebrice HUH? what's the point of doing this?
                # goal_readings
            ]
        )
        return obs


# TODO: Rename these base classes to 'ModifyGravityMixin', 'ModifySizeMixin', etc.


class ContinualHalfCheetahV2Env(
    ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HalfCheetahV2Env
):
    def __init__(
        self,
        model_path: str = "half_cheetah.xml",
        frame_skip: int = 5,
        gravity=-9.81,
        body_name_to_size_scale: Dict[str, float] = None,
        body_name_to_mass_scale: Dict[str, float] = None,
    ):
        super().__init__(
            model_path=model_path,
            frame_skip=frame_skip,
            gravity=gravity,
            body_name_to_size_scale=body_name_to_size_scale,
            body_name_to_mass_scale=body_name_to_mass_scale,
        )


class ContinualHalfCheetahV3Env(
    ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HalfCheetahV3Env
):
    def __init__(
        self,
        model_path: str = "half_cheetah.xml",
        frame_skip: int = 5,
        forward_reward_weight: float = 1.0,
        ctrl_cost_weight: float = 0.1,
        reset_noise_scale: float = 0.1,
        exclude_current_positions_from_observation: bool = True,
        gravity=-9.81,
        body_name_to_size_scale: Dict[str, float] = None,
        body_name_to_mass_scale: Dict[str, float] = None,
        xml_file: str = None,
    ):
        super().__init__(
            model_path=xml_file or model_path,
            frame_skip=frame_skip,
            forward_reward_weight=forward_reward_weight,
            ctrl_cost_weight=ctrl_cost_weight,
            reset_noise_scale=reset_noise_scale,
            exclude_current_positions_from_observation=exclude_current_positions_from_observation,
            gravity=gravity,
            body_name_to_size_scale=body_name_to_size_scale,
            body_name_to_mass_scale=body_name_to_mass_scale,
        )


================================================
FILE: sequoia/settings/rl/envs/mujoco/half_cheetah_test.py
================================================
from typing import ClassVar, Type

from sequoia.conftest import mujoco_required

pytestmark = mujoco_required

from .half_cheetah import ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env
from .modified_gravity_test import ModifiedGravityEnvTests
from .modified_mass_test import ModifiedMassEnvTests
from .modified_size_test import ModifiedSizeEnvTests


@mujoco_required
class TestHalfCheetahV2(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
    Environment: ClassVar[Type[ContinualHalfCheetahV2Env]] = ContinualHalfCheetahV2Env


@mujoco_required
class TestHalfCheetahV3(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
    Environment: ClassVar[Type[ContinualHalfCheetahV3Env]] = ContinualHalfCheetahV3Env


================================================
FILE: sequoia/settings/rl/envs/mujoco/hopper.py
================================================
# TODO: Should we use HopperV3 instead?
from typing import ClassVar, Dict, List, Tuple

from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.hopper import HopperEnv as _HopperV2Env

# TODO: Use HalfCheetah-v3 instead, which allows explicitly to change the model file!
from gym.envs.mujoco.hopper_v3 import HopperEnv as _HopperV3Env

from .modified_gravity import ModifiedGravityEnv
from .modified_mass import ModifiedMassEnv
from .modified_size import ModifiedSizeEnv

# NOTE: Removed the `utils.EzPickle` base class (since it wasn't being passed any kwargs
# (and therefore wasn't saving any of the 'state') anyway.


class HopperV2Env(_HopperV2Env):
    """
    Simply allows changing of XML file, probably not necessary if we pull request the
    xml name as a kwarg in openai gym
    """

    BODY_NAMES: ClassVar[List[str]] = ["torso", "thigh", "leg", "foot"]

    def __init__(self, model_path: str = "hopper.xml", frame_skip: int = 4):
        MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
        # utils.EzPickle.__init__(self)


class HopperV3Env(_HopperV3Env):
    BODY_NAMES: ClassVar[List[str]] = ["torso", "thigh", "leg", "foot"]

    def __init__(
        self,
        model_path="hopper.xml",
        forward_reward_weight: float = 1.0,
        ctrl_cost_weight: float = 1e-3,
        healthy_reward: float = 1.0,
        terminate_when_unhealthy: bool = True,
        healthy_state_range: Tuple[float, float] = (-100.0, 100.0),
        healthy_z_range: Tuple[float, float] = (0.7, float("inf")),
        healthy_angle_range: Tuple[float, float] = (-0.2, 0.2),
        reset_noise_scale: float = 5e-3,
        exclude_current_positions_from_observation: bool = True,
        xml_file: str = None,
        frame_skip: int = 4,
    ):
        if frame_skip != 4:
            raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
        super().__init__(
            xml_file=xml_file or model_path,
            forward_reward_weight=forward_reward_weight,
            ctrl_cost_weight=ctrl_cost_weight,
            healthy_reward=healthy_reward,
            terminate_when_unhealthy=terminate_when_unhealthy,
            healthy_state_range=healthy_state_range,
            healthy_z_range=healthy_z_range,
            healthy_angle_range=healthy_angle_range,
            reset_noise_scale=reset_noise_scale,
            exclude_current_positions_from_observation=exclude_current_positions_from_observation,
        )


class HopperV2GravityEnv(ModifiedGravityEnv, HopperV2Env):
    # NOTE: This environment could be used in ContinualRL!
    def __init__(
        self,
        model_path: str = "hopper.xml",
        frame_skip: int = 4,
        gravity: float = -9.81,
    ):
        super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity)


class ContinualHopperV2Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HopperV2Env):
    def __init__(
        self,
        model_path: str = "hopper.xml",
        frame_skip: int = 4,
        gravity=-9.81,
        body_name_to_size_scale: Dict[str, float] = None,
        body_name_to_mass_scale: Dict[str, float] = None,
    ):
        super().__init__(
            model_path=model_path,
            frame_skip=frame_skip,
            gravity=gravity,
            body_name_to_size_scale=body_name_to_size_scale,
            body_name_to_mass_scale=body_name_to_mass_scale,
        )


class ContinualHopperV3Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HopperV3Env):
    def __init__(
        self,
        model_path="hopper.xml",
        forward_reward_weight: float = 1.0,
        ctrl_cost_weight: float = 1e-3,
        healthy_reward: float = 1.0,
        terminate_when_unhealthy: bool = True,
        healthy_state_range: Tuple[float, float] = (-100.0, 100.0),
        healthy_z_range: Tuple[float, float] = (0.7, float("inf")),
        healthy_angle_range: Tuple[float, float] = (-0.2, 0.2),
        reset_noise_scale: float = 5e-3,
        exclude_current_positions_from_observation: bool = True,
        # xml_file: str = None,
        frame_skip: int = 4,
        gravity=-9.81,
        body_name_to_size_scale: Dict[str, float] = None,
        body_name_to_mass_scale: Dict[str, float] = None,
    ):
        super().__init__(
            model_path=model_path,
            frame_skip=frame_skip,
            # xml_file=xml_file or model_path,
            forward_reward_weight=forward_reward_weight,
            ctrl_cost_weight=ctrl_cost_weight,
            healthy_reward=healthy_reward,
            terminate_when_unhealthy=terminate_when_unhealthy,
            healthy_state_range=healthy_state_range,
            healthy_z_range=healthy_z_range,
            healthy_angle_range=healthy_angle_range,
            reset_noise_scale=reset_noise_scale,
            exclude_current_positions_from_observation=exclude_current_positions_from_observation,
            gravity=gravity,
            body_name_to_size_scale=body_name_to_size_scale,
            body_name_to_mass_scale=body_name_to_mass_scale,
        )


# ------------- NOTE (@lebrice) -------------------------------
# Everything below this is unused.
# The idea was to do some kind of inverse-kinematics-ish math to fix the placement of the joints
# when the size of one of the parts of the model is changed.
#


# from typing import Dict


# def get_parent(tree: ElementTree, node: Element) -> Element:
#     parent_map: Dict[Element, Element] = {c: p for p in tree.iter() for c in p}
#     return parent_map[node]


# def update_world(
#     tree: ElementTree,
#     world_body: Element,
#     new_torso_max: Pos,
#     size_scaling_factor: float = 1.0,
#     **kwargs,
# ) -> None:
#     """propagate the changes from the body to the world, if need be."""
#     # TODO: Maybe move the camera etc?


# def update_torso(
#     tree: ElementTree = None,
#     torso_body: Element = None,
#     new_torso_min: Pos = None,
#     size_scaling_factor: float = 1.0,
#     geom_suffix="torso_geom",
#     **kwargs,
# ) -> None:
#     """'move' the torso body and its endpoints, after another bodypart has been
#     scaled.
#     This moves all relevant geoms and
#     joints and bodies,
#     Normally, this can update the
#     (through possibly recursive calls to one of `update_torso`,
#     `update_thigh`, `update_leg`, `update_foot`.)
#     """
#     assert size_scaling_factor != 0.0
#     body_name = "torso"
#     # Get the elements to be modified.
#     if torso_body is None:
#         assert tree is not None, "need the tree if torso_body is not given!"
#         if isinstance(tree, Element) and tree.tag == "body" and tree.get("name") == body_name:
#             torso_body = tree
#             tree = None
#         else:
#             torso_body = tree.find(f".//body[@name='{body_name}']")
#     assert torso_body is not None, "can't find the torso body!"

#     torso_geom = torso_body.find(f"./geom[@name='{body_name}']")
#     if torso_geom is None:
#         torso_geom = torso_body.find(f"./geom[@name='{body_name}_geom']")
#     if torso_geom is None:
#         raise RuntimeError(f"Can't find the geom for body part '{body_name}'!")

#     rooty_joint = torso_body.find("./joint[@name='rooty']")
#     rootz_joint = torso_body.find("./joint[@name='rootz']")

#     torso_body_pos = Pos.of_element(torso_body)

#     torso_geom_size = float(torso_geom.get("size"))
#     torso_geom_fromto = FromTo.of_element(torso_geom)
#     rootz_joint_ref = float(rootz_joint.get("ref"))
#     rooty_joint_pos = Pos.of_element(rooty_joint)

#     torso_max = torso_geom_fromto.start
#     torso_min = torso_geom_fromto.end
#     torso_length = torso_max - torso_min
#     assert torso_body_pos == torso_geom_fromto.center
#     # This happens to coincide with torso's pos.
#     assert rootz_joint_ref == torso_body_pos.z
#     assert rooty_joint_pos == torso_body_pos

#     if new_torso_min is None:
#         # Assume that the location of the base of the torso doesn't change, i.e. that
#         # this was called in order to JUST scale the torso and nothing else.
#         new_torso_min = torso_min
#     # new_torso_min is already given, calculate the other two:
#     new_torso_length = torso_length * (1 if size_scaling_factor is None else size_scaling_factor)
#     new_torso_max = new_torso_min + new_torso_length

#     # NOTE: fromto is from top to bottom here (maybe also everywhere else, not sure).
#     new_torso_geom_size = torso_geom_size * size_scaling_factor
#     new_torso_geom_fromto = FromTo(start=new_torso_max, end=new_torso_min)
#     new_torso_pos = (new_torso_max + new_torso_min) / 2
#     new_rootz_joint_ref = new_torso_pos.z
#     new_rooty_joint_pos = new_torso_pos

#     # Update the fields of the different elements.
#     torso_body.set("pos", new_torso_pos.to_str())
#     torso_geom.set("fromto", new_torso_geom_fromto.to_str())
#     torso_geom.set("size", new_torso_geom_size)

#     # TODO: Not sure if this makes sense: The rooty joint has a Pos that coincides
#     # with the torso pos.
#     new_torso_pos.set_in_element(rooty_joint)
#     # TODO: rootz has a 'ref' which also coincides with the torso pos.
#     rootz_joint.set("ref", str(new_rootz_joint_ref))
#     rooty_joint.set("pos", new_rooty_joint_pos)

#     new_torso_pos = new_torso_geom_fromto.center
#     # TODO: Also move the camera?

#     world_body: Optional[Element] = None
#     if tree is not None:
#         assert tree is not None, "need the tree if torso_body is not given!"
#         world_body = get_parent(tree, torso_body)

#     # Don't change the scaling of the parent, if this body part was scaled!
#     parent_scale_factor = 1 if size_scaling_factor != 1 else size_scaling_factor

#     update_world(
#         tree=tree,
#         world_body=world_body,
#         new_torso_min=new_torso_min,
#         new_torso_max=new_torso_max,
#         size_scaling_factor=parent_scale_factor,
#         **kwargs,
#     )


# def update_thigh(
#     tree: ElementTree = None,
#     thigh_body: Element = None,
#     new_thigh_min: Pos = None,
#     new_thigh_max: Pos = None,
#     size_scaling_factor: float = None,
#     **kwargs,
# ) -> None:
#     """'move' the thigh and its endpoints. This moves all relevant geoms and
#     joints and then moves the torso by calling `update_torso`.
#     """
#     # TODO:
#     new_torso_min = new_thigh_max
#     new_torso_max = todo

#     torso_body = get_parent(tree, thigh_body)
#     update_torso(
#         torso_body,
#         new_torso_min=new_torso_min,
#         new_torso_max=new_torso_max,
#         size_scaling_factor=size_scaling_factor,
#         new_thigh_min=new_thigh_min,
#         new_thigh_max=new_thigh_max,
#         **kwargs,
#     )


# def update_thigh(
#     tree: ElementTree = None,
#     thigh_body: Element = None,
#     new_thigh_min: Pos = None,
#     new_thigh_max: Pos = None,
#     size_scaling_factor: float = None,
#     **kwargs,
# ) -> None:
#     """'move' the thigh and its endpoints. This moves all relevant geoms and
#     joints and then moves the torso by calling `update_torso`.

#     """
#     new_torso_min = NotImplemented
#     new_thigh_max = NotImplemented
#     torso_body = get_parent(tree, thigh_body)
#     update_torso(
#         torso_body,
#         new_torso_min=new_torso_min,
#         size_scaling_factor=size_scaling_factor,
#         new_thigh_min=new_thigh_min,
#         new_thigh_max=new_thigh_max,  # Pass it in case the above components need it.
#         **kwargs,
#     )


# def scale_size(tree: ElementTree, body_name: str, scale: float) -> str:
#     tree = copy.deepcopy(tree)
#     target_body: Element = tree.find(f".//body[@name='{body_name}']")
#     parent_map: Dict[Element, Element] = {c: p for p in tree.iter() for c in p}

#     if body_name == "torso":
#         update_torso(tree, torso_body=target_body, size_scaling_factor=scale)
#     raise NotImplementedError(f"WIP")


================================================
FILE: sequoia/settings/rl/envs/mujoco/hopper_test.py
================================================
from sequoia.conftest import mujoco_required

pytestmark = mujoco_required
import inspect
import itertools
import os
from pathlib import Path
from typing import ClassVar, Type
from xml.etree.ElementTree import ElementTree, fromstring

import pytest
from gym.envs.mujoco import MujocoEnv

from sequoia.conftest import mujoco_required

from .hopper import ContinualHopperV2Env, ContinualHopperV3Env
from .modified_gravity_test import ModifiedGravityEnvTests
from .modified_mass_test import ModifiedMassEnvTests
from .modified_size_test import ModifiedSizeEnvTests

# # TODO: There is a bug in the way the hopper XML is generated, where the sticks / joints don't seem to follow.
# bob = ContinualHopperEnv(body_name_to_size_scale={"thigh": 2})
# assert False, bob


@mujoco_required
class TestContinualHopperV2Env(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
    Environment: ClassVar[Type[ContinualHopperV2Env]] = ContinualHopperV2Env


@mujoco_required
class TestContinualHopperV3Env(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
    Environment: ClassVar[Type[ContinualHopperV3Env]] = ContinualHopperV3Env


def load_tree(model_path: Path) -> ElementTree:
    # model_path = "hopper.xml"
    if model_path.startswith("/"):
        full_path = model_path
    else:
        full_path = os.path.join(
            os.path.dirname(inspect.getsourcefile(MujocoEnv)), "assets", model_path
        )
    if not os.path.exists(full_path):
        raise IOError(f"File {full_path} does not exist")

    with open(model_path, "r") as f:
        return f.read()


default_hopper_body_xml = f"""\
<worldbody>
    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" />
    <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane" />
    <body name="torso" pos="0 0 1.25">
        <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1" />
        <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide" />
        <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide" />
        <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge" />
        <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule" />
        <body name="thigh" pos="0 0 1.05">
            <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge" />
            <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule" />
            <body name="leg" pos="0 0 0.35">
                <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge" />
                <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule" />
                <body name="foot" pos="0.13/2 0 0.1">
                    <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge" />
                    <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule" />
                </body>
            </body>
        </body>
    </body>
</worldbody>
"""


def elements_equal(e1, e2) -> bool:
    """Taken from https://stackoverflow.com/a/24349916/6388696"""
    assert e1.tag == e2.tag
    assert e1.text == e2.text
    assert e1.tail == e2.tail
    assert e1.attrib == e2.attrib
    assert len(e1) == len(e2)
    assert all(elements_equal(c1, c2) for c1, c2 in zip(e1, e2))


@pytest.mark.xfail(reason="Dropping this for now, XML is really annoying.")
@pytest.mark.parametrize(
    "input_xml_str, scale_factor, output_xml_str",
    [
        (
            default_hopper_body_xml,
            1.0,
            default_hopper_body_xml,
        ),
        (
            default_hopper_body_xml,
            2.0,
            f"""\
        <worldbody>
            <body name="torso" pos="0 0 {1.45}">
                <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
                <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
                <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="{1.25}" stiffness="0" type="slide"/>
                <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 {1.45}" stiffness="0" type="hinge"/>
                <geom friction="0.9" fromto="0 0 {1.85} 0 0 1.05" name="torso_geom" size="{0.10}" type="capsule"/>
                <body name="thigh" pos="0 0 1.05">
                    <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
                    <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
                    <body name="leg" pos="0 0 0.35">
                        <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
                        <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
                        <body name="foot" pos="0.13/2 0 0.1">
                            <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
                            <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
                        </body>
                    </body>
                </body>
            </body>
        </worldbody>
        """,
        ),
    ],
    ids=(f"param{i}" for i in itertools.count()),
)
def test_change_torso(input_xml_str: str, scale_factor: float, output_xml_str: str):

    # # TODO: Get rid of annoying whitespace issues!
    pass

    input_tree = fromstring(input_xml_str)
    expected = fromstring(output_xml_str)

    # from io import StringIO
    # in_file = StringIO(input_xml_str)
    # out_file = StringIO(output_xml_str)
    # input_tree = parse(in_file)
    # expected = parse(out_file)

    update_torso(tree=input_tree, size_scale_factor=scale_factor)
    # import textwrap
    # from xml.dom import minidom
    # result = minidom.parseString(tostring(input_tree, method="text")).toprettyxml()
    result = input_tree
    assert elements_equal(result, expected)
    # expected = minidom.parseString().toprettyxml()
    assert result == expected


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_friction.py
================================================
""" TODO: Wrapper that modifies the friction, if possible on-the-fly. """
from typing import ClassVar

from gym.envs.mujoco import MujocoEnv


class ModifiedFrictionEnv(MujocoEnv):
    """
    Allows the gravity to be changed.

    Adapted from https://github.com/Breakend/gym-extensions/blob/master/gym_extensions/continuous/mujoco/gravity_envs.py
    """

    # IDEA: Use somethign like this to tell appart modifications which can be applied
    # on-the-fly on a given env to get multiple tasks, vs those that require creating a
    # new environment for each task.
    CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_friction_test.py
================================================
""" TODO: Tests for the 'modified friction' mujoco envs. """


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_gravity.py
================================================
import warnings
from typing import ClassVar

from gym.envs.mujoco import MujocoEnv

from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


class ModifiedGravityEnv(MujocoEnv):
    """
    Allows the gravity to be changed.

    Adapted from https://github.com/Breakend/gym-extensions/blob/master/gym_extensions/continuous/mujoco/gravity_envs.py
    """

    # IDEA: Use somethign like this to tell appart modifications which can be applied
    # on-the-fly on a given env to get multiple tasks, vs those that require creating a
    # new environment for each task.
    CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True

    def __init__(self, model_path: str, frame_skip: int, gravity: float = -9.81, **kwargs):
        super().__init__(model_path=model_path, frame_skip=frame_skip, **kwargs)
        # self.model.opt.gravity = (mujoco_py.mjtypes.c_double * 3)(*[0., 0., gravity])
        if gravity != -9.81:
            self.model.opt.gravity[2] = gravity
            # self.model._compute_subtree()
            # self.model.forward()
            self.sim.forward()
            # self.sim: MjSim
            logger.debug(f"Setting initial gravity to {self.gravity}")

    @property
    def gravity(self) -> float:
        return self.model.opt.gravity[2]

    @gravity.setter
    def gravity(self, value: float) -> None:
        # TODO: Seems to be bad practice to modify memory in-place for some reason?
        self.model.opt.gravity[2] = value
        # self.model.opt.gravity[2] = - abs(value)

    def set_gravity(self, value: float) -> None:
        if value >= 0:
            warnings.warn(
                RuntimeWarning(
                    "Not a good idea to use a positive value! (things will start to float)"
                )
            )
            # IDEA: always convert to negative value in the setter?
        self.gravity = value


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_gravity_test.py
================================================
""" TODO: Tests for the 'modified gravity' mujoco envs. """
from typing import ClassVar, Type, TypeVar

from gym.wrappers import TimeLimit

from sequoia.conftest import mujoco_required

pytestmark = mujoco_required

from .modified_gravity import ModifiedGravityEnv

EnvType = TypeVar("EnvType", bound=ModifiedGravityEnv)


class ModifiedGravityEnvTests:
    Environment: ClassVar[Type[EnvType]]

    # @pytest.mark.xfail(reason="The condition doesn't always work.")
    def test_change_gravity_each_step(self):
        env: ModifiedGravityEnv = self.Environment()
        max_episode_steps = 50
        n_episodes = 3

        # NOTE: Interestingly, the renderer will show
        # `env.frame_skip * max_episode_steps` frames per episode, even when
        # "Ren[d]er every frame" is set to False.
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
        total_steps = 0

        for episode in range(n_episodes):
            initial_state = env.reset()
            done = False
            episode_steps = 0

            start_y = initial_state[1]
            moved_up = 0
            previous_state = initial_state
            state = initial_state
            while not done:
                previous_state = state
                state, reward, done, info = env.step(env.action_space.sample())
                env.render("human")
                episode_steps += 1
                total_steps += 1

                # decrease the gravity continually over time.
                # By the end, things should be floating.
                env.set_gravity(-10 + 5 * total_steps / max_episode_steps)
                moved_up += state[1] > previous_state[1]
                # print(f"Moving upward? {obs[1] > state[1]}")

            if episode_steps != max_episode_steps:
                print(f"Episode ended early?")

            print(f"Gravity at end of episode: {env.gravity}")
            # TODO: Check that the position (in the observation) is obeying gravity?
            # if env.gravity <= 0:
            #     # Downward force, so should not have any significant preference for
            #     # moving up vs moving down.
            #     assert 0.4 <= (moved_up / max_episode_steps) <= 0.6, env.gravity
            # # if env.gravity == 0:
            # #     assert 0.5 <= (moved_up / max_episode_steps) <= 1.0
            # if env.gravity > 0:
            #     assert 0.5 <= (moved_up / max_episode_steps) <= 1.0, env.gravity

        assert total_steps <= n_episodes * max_episode_steps

        initial_z = env.init_qpos[1]
        final_z = env.sim.data.qpos[1]
        if env.gravity > 0:
            assert final_z > initial_z
        # TODO: These checks aren't deterministic, and only really "work" with
        # half-cheetah.
        # assert initial_z == 0
        # Check that the robot is high up in the sky! :D
        # assert final_z > 3
        # assert False, (env.init_qpos, env.sim.data.qpos)

    def test_task_schedule(self):
        # TODO: Reuse this test (and perhaps others from multi_task_environment_test.py)
        # but with this continual_half_cheetah instead of cartpole.
        original = self.Environment()
        starting_gravity = original.gravity

        task_schedule = {
            10: dict(gravity=starting_gravity),
            20: dict(gravity=-12.0),
            30: dict(gravity=0.9),
        }
        from sequoia.common.gym_wrappers import MultiTaskEnvironment

        env = MultiTaskEnvironment(original, task_schedule=task_schedule)
        env.seed(123)
        env.reset()
        for step in range(100):
            _, _, done, _ = env.step(env.action_space.sample())
            # env.render()
            if done:
                env.reset()

            if 0 <= step < 10:
                assert env.gravity == starting_gravity
            elif 10 <= step < 20:
                assert env.gravity == starting_gravity
            elif 20 <= step < 30:
                assert env.gravity == -12.0
            elif step >= 30:
                assert env.gravity == 0.9
        env.close()


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_mass.py
================================================
from functools import partial
from typing import ClassVar, Dict, List, TypeVar, Union

import numpy as np
from gym.envs.mujoco import MujocoEnv

V = TypeVar("V")


class ModifiedMassEnv(MujocoEnv):
    """
    Allows the mass of body parts to be changed.

    NOTE: Haven't yet checked how this affects the physics simulation! Might not be 100% working.
    """

    # IDEA: Use somethign like this to tell appart modifications which can be applied
    # on-the-fly on a given env to get multiple tasks, vs those that require creating a
    # new environment for each task.
    CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True
    BODY_NAMES: ClassVar[List[str]]

    def __init__(
        self,
        model_path: str,
        frame_skip: int,
        body_name_to_mass_scale: Dict[str, float] = None,
        **kwargs,
    ):
        super().__init__(
            model_path=model_path,
            frame_skip=frame_skip,
            **kwargs,
        )
        self.body_name_to_mass_scale = body_name_to_mass_scale or {}
        self.default_masses_dict: Dict[str, float] = {
            body_name: self.model.body_mass[i] for i, body_name in enumerate(self.model.body_names)
        }
        self.default_masses: np.ndarray = np.copy(self.model.body_mass)

        # dict(zip(body_parts, mass_scales))
        self.scale_masses(**self.body_name_to_mass_scale)
        # self.model.body_mass = self.get_and_modify_bodymass(body_part, mass_scale)
        # self.model._compute_subtree()
        # self.model.forward()

    def __init_subclass__(cls):
        super().__init_subclass__()
        # Add auto-generated properties for getting and setting the mass of the bodyparts.
        for body_part in cls.BODY_NAMES:
            property_name = f"{body_part}_mass"
            mass_property = property(
                fget=partial(cls.get_mass, body_part=body_part),
                fset=partial(cls._mass_setter, body_part),
            )
            setattr(cls, property_name, mass_property)

    def _update(self) -> None:
        """'Update' the model, if necessary, after a change has occured to the mass.

        TODO: Not sure if this is entirely correct
        """
        # self.model._compute_subtree()
        # self.model.forward()

    def reset_masses(self) -> None:
        """Resets the masses to their default values."""
        # NOTE: Use [:] to modify in-place, just in case there are any
        # pointer-shenanigans going on on the C side.
        self.model.body_mass[:] = self.default_masses
        # self.model._compute_subtree() #TODO: Not sure about this call
        # self.model.forward()

    def get_masses_dict(self) -> Dict[str, float]:
        return {
            body_name: self.model.body_masses[i]
            for i, body_name in enumerate(self.model.body_names)
        }

    def set_mass(self, **body_name_to_mass: Dict[str, Union[int, float]]) -> None:
        # Will raise an IndexError if the body part isnt found.
        # _set_mass(self, body_part=body_part, mass=mass)
        for body_part, mass in body_name_to_mass.items():
            idx = self.model.body_names.index(body_part)
            self.model.body_mass[idx] = mass

    def get_mass(self, body_part: str) -> float:
        # Will raise an IndexError if the body part isnt found.
        if body_part not in self.model.body_names:
            raise ValueError(
                f"No body named {body_part} in this mujoco model! (body names: "
                f"{self.model.body_names})."
            )
        idx = self.model.body_names.index(body_part)
        return self.model.body_mass[idx]

    def scale_masses(
        self,
        body_parts: List[str] = None,
        mass_scales: List[float] = None,
        **body_name_to_mass_scale,
    ) -> Dict[str, float]:
        """Scale the (original) mass of body parts of the Mujoco model.

        Returns a dictionary with the new masses.
        """
        new_masses: Dict[str, float] = {}
        body_parts = body_parts or []
        mass_scales = mass_scales or []
        body_name_to_mass_scale = body_name_to_mass_scale or {}

        self.reset_masses()

        body_name_to_mass_scale.update(zip(body_parts, mass_scales))

        for body_name, mass_scale in body_name_to_mass_scale.items():
            current_mass = self.get_mass(body_name)
            new_mass = mass_scale * current_mass
            self.set_mass(**{body_name: new_mass})

            new_masses[body_name] = new_mass

        # Not sure if we need to do this?
        self._update()
        return new_masses

    def get_and_modify_bodymass(self, body_name: str, scale: float):
        idx = self.model.body_names.index(body_name)
        temp = np.copy(self.model.body_mass)
        temp[idx] *= scale
        return temp

    @staticmethod
    def _mass_setter(body_part: str, env: MujocoEnv, mass: float) -> None:
        """Function used to set the mass of a body part. This is used as the setter of the
        generated `<body_part>_mass` properties.
        """
        # Will raise an IndexError if the body part isnt found.
        idx = env.model.body_names.index(body_part)
        env.model.body_mass[idx] = mass


# def _get_mass(env: MujocoEnv, /, body_part: str) -> float:
#     # Will raise an IndexError if the body part isnt found.
#     idx = env.model.body_names.index(body_part)
#     return env.model.body_mass[idx]


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_mass_test.py
================================================
""" TODO: Tests for the 'modified gravity' mujoco envs. """
import operator
from typing import ClassVar, List, Type

from gym.wrappers import TimeLimit

from sequoia.conftest import mujoco_required

pytestmark = mujoco_required


from .modified_mass import ModifiedMassEnv


class ModifiedMassEnvTests:
    Environment: ClassVar[Type[ModifiedMassEnv]]

    # names of the parts of the model which can be changed.
    body_names: ClassVar[List[str]]

    def test_generated_properties_change_the_actual_mass(self):
        env = self.Environment()
        for body_name in self.Environment.BODY_NAMES:
            # Get the value directly from the mujoco model.
            model_value = env.model.body_mass[env.model.body_names.index(body_name)]
            assert getattr(env, f"{body_name}_mass") == model_value
            new_value = model_value * 2
            setattr(env, f"{body_name}_mass", new_value)

            model_value = env.model.body_mass[env.model.body_names.index(body_name)]
            assert model_value == new_value

    def test_change_mass_each_step(self):
        env: ModifiedMassEnv = self.Environment()
        max_episode_steps = 200
        n_episodes = 3

        # NOTE: Interestingly, the renderer will show
        # `env.frame_skip * max_episode_steps` frames per episode, even when
        # "Ren[d]er every frame" is set to False.
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
        env: ModifiedMassEnv
        total_steps = 0

        for episode in range(n_episodes):
            initial_state = env.reset()
            done = False
            episode_steps = 0

            start_y = initial_state[1]
            moved_up = 0
            previous_state = initial_state
            state = initial_state

            body_part = self.Environment.BODY_NAMES[0]
            start_mass = env.get_mass(body_part)

            while not done:
                previous_state = state
                state, reward, done, info = env.step(env.action_space.sample())

                env.render("human")

                episode_steps += 1
                total_steps += 1

                env.set_mass(**{body_part: start_mass + 5 * total_steps / max_episode_steps})

                moved_up += state[1] > previous_state[1]
                print(f"Moving upward? {moved_up}")

        initial_z = env.init_qpos[1]
        final_z = env.sim.data.qpos[1]
        # TODO: Check that the change in mass had an impact

    def test_set_mass_with_task_schedule(self):
        body_part = "torso"
        original = self.Environment()
        starting_mass = original.get_mass("torso")
        task_schedule = {
            10: dict(),
            20: operator.methodcaller("set_mass", torso=starting_mass * 2),
            30: operator.methodcaller("set_mass", torso=starting_mass * 4),
        }
        from sequoia.common.gym_wrappers import MultiTaskEnvironment

        env = MultiTaskEnvironment(original, task_schedule=task_schedule)
        env.seed(123)
        env.reset()
        for step in range(100):
            _, _, done, _ = env.step(env.action_space.sample())
            # env.render()
            if done:
                env.reset()

            if 0 <= step < 10:
                assert env.get_mass(body_part) == starting_mass, step
            elif 10 <= step < 20:
                assert env.get_mass(body_part) == starting_mass, step
            elif 20 <= step < 30:
                assert env.get_mass(body_part) == starting_mass * 2, step
            elif step >= 30:
                assert env.get_mass(body_part) == starting_mass * 4, step
        env.close()


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_size.py
================================================
import hashlib
import inspect
import os
import tempfile
import xml.etree.ElementTree as ET
from copy import deepcopy
from logging import getLogger as get_logger
from pathlib import Path
from typing import ClassVar, Dict, List

from gym.envs.mujoco import MujocoEnv

logger = get_logger(__name__)


def change_size_in_xml(
    tree: ET.ElementTree, **body_name_to_size_scale: Dict[str, float]
) -> ET.ElementTree:
    tree = deepcopy(tree)
    for body_name, size_scale in body_name_to_size_scale.items():
        body = tree.find(f".//body[@name='{body_name}']")
        geom = tree.find(f".//geom[@name='{body_name}']")
        if geom is None:
            geom = tree.find(f".//geom[@name='{body_name}_geom']")
        assert geom is not None
        assert "size" in geom.attrib
        # print(body_name)
        # print("Old size: ", geom.attrib["size"])
        sizes: List[float] = [float(s) for s in geom.attrib["size"].split(" ")]
        new_sizes = [size * size_scale for size in sizes]
        geom.attrib["size"] = " ".join(map(str, new_sizes))
        # print("New size: ", geom.attrib['size'])
    return tree


def get_geom_sizes(tree: ET.ElementTree, body_name: str) -> List[float]:
    # body = tree.find(f".//body[@name='{body_name}']")
    geom = tree.find(f".//geom[@name='{body_name}']")
    if geom is None:
        geom = tree.find(f".//geom[@name='{body_name}_geom']")
    assert geom is not None
    assert "size" in geom.attrib
    # print(body_name)
    # print("Old size: ", geom.attrib["size"])
    sizes: List[float] = [float(s) for s in geom.attrib["size"].split(" ")]
    return sizes


class ModifiedSizeEnv(MujocoEnv):
    """
    Allows changing the size of the body parts.

    TODO: This currently can modify the geometry in-place (at least visually) with the
    `self.model.geom_size` ndarray, but the joints don't follow the change in length.
    """

    BODY_NAMES: ClassVar[List[str]]

    # IDEA: Use somethign like this to tell appart modifications which can be applied
    # on-the-fly on a given env to get multiple tasks, vs those that require creating a
    # new environment for each task.
    CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = False

    def __init__(
        self,
        model_path: str,
        frame_skip: int,
        # TODO: IF using one or more of these `Modified<XYZ>` buffers, then we need to
        # get each one a distinct argument name, which isn't ideal!
        body_parts: List[str] = None,  # Has to be the name of a geom, not of a body!
        size_scales: List[float] = None,
        body_name_to_size_scale: Dict[str, float] = None,
        **kwargs,
    ):
        body_parts = body_parts or []
        size_scales = size_scales or []
        body_name_to_size_scale = body_name_to_size_scale or {}
        body_name_to_size_scale.update(zip(body_parts, size_scales))

        if model_path.startswith("/"):
            full_path = model_path
        else:
            full_path = os.path.join(
                os.path.dirname(inspect.getsourcefile(MujocoEnv)), "assets", model_path
            )
        if not os.path.exists(full_path):
            raise IOError(f"File {full_path} does not exist")

        # find the body_part we want

        if any(scale_factor == 0 for scale_factor in size_scales):
            raise RuntimeError("Can't use a scale_factor of 0!")

        logger.debug(f"Default XML path: {full_path}")
        self.default_tree = ET.parse(full_path)
        self.tree = self.default_tree

        if body_name_to_size_scale:
            logger.debug(f"Changing parts: {body_name_to_size_scale}")
            self.tree = change_size_in_xml(self.default_tree, **body_name_to_size_scale)
            # create new xml
            # IDEA: Create an XML file with a unique name somewhere, and then write the
            hash_str = hashlib.md5((str(self) + str(body_name_to_size_scale)).encode()).hexdigest()
            temp_dir = Path(tempfile.gettempdir())
            new_xml_path = temp_dir / f"{hash_str}.xml"
            if not new_xml_path.parent.exists():
                new_xml_path.parent.mkdir(exist_ok=False, parents=True)
            self.tree.write(str(new_xml_path))
            logger.debug(f"Generated XML path: {new_xml_path}")

            # Update the value to be passed to the constructor:
            full_path = str(new_xml_path)

        self.body_name_to_size_scale = body_name_to_size_scale
        # load the modified xml
        super().__init__(model_path=full_path, frame_skip=frame_skip, **kwargs)


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_size_test.py
================================================
""" TODO: Tests for the 'modified size' mujoco envs. """
from typing import ClassVar, List, Type

import numpy as np
from gym.wrappers import TimeLimit

from sequoia.conftest import mujoco_required

pytestmark = mujoco_required

from .modified_size import ModifiedSizeEnv, get_geom_sizes


class ModifiedSizeEnvTests:
    Environment: ClassVar[Type[ModifiedSizeEnv]]

    def test_change_size_per_task(self):
        body_part = self.Environment.BODY_NAMES[0]

        nb_tasks = 2
        max_episode_steps = 200
        n_episodes = 2

        scale_factors: List[float] = [
            (0.5 + 2 * (task_id / nb_tasks)) for task_id in range(nb_tasks)
        ]
        default_tree = self.Environment().default_tree
        default_sizes: List[str] = get_geom_sizes(default_tree, body_part)

        task_envs: List[EnvType] = [
            # RenderEnvWrapper(
            TimeLimit(
                self.Environment(body_name_to_size_scale={body_part: scale_factor}),
                max_episode_steps=max_episode_steps,
            )
            # )
            for task_id, scale_factor in enumerate(scale_factors)
        ]

        for task_id, task_env in enumerate(task_envs):
            task_scale_factor = scale_factors[task_id]

            for episode in range(n_episodes):
                size = get_geom_sizes(task_env.tree, body_part)
                expected_size = [default_size * task_scale_factor for default_size in default_sizes]
                print(
                    f"default sizes: {default_sizes}, Size: {size}, "
                    f"task_scale_factor: {task_scale_factor}"
                )

                assert np.allclose(size, expected_size)

                state = task_env.reset()
                done = False
                steps = 0
                while not done:
                    obs, reward, done, info = task_env.step(task_env.action_space.sample())
                    steps += 1
                    # NOTE: Uncomment to visually inspect.
                    task_env.render("human")
            task_env.close()


================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_wall.py
================================================
"""
TODO: DO the same for the WallEnv from gym-extensions.
"""

# HalfCheetahWallEnv = lambda *args, **kwargs: WallEnvFactory(ModifiedHalfCheetahEnv)(
#     model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml",
#     ori_ind=-1,
#     *args,
#     **kwargs
# )


================================================
FILE: sequoia/settings/rl/envs/mujoco/mujoco_model_utils.py
================================================
from dataclasses import dataclass
from typing import Any, NamedTuple, Sequence, Tuple, Union
from xml.etree.ElementTree import Element

import numpy as np


def pos_to_str(pos: Tuple[float, ...]) -> str:
    return " ".join("0" if v == 0 else str(round(v, 5)) for v in pos)


def str_to_pos(pos_str: str) -> "Pos":
    return Pos(*[float(v) for v in pos_str.split()])


class Pos(NamedTuple):
    x: float
    y: float
    z: float

    def to_str(self) -> str:
        """Return the 'str' version of `self` to be placed in a 'pos' field in the XML."""
        return pos_to_str(self)

    @classmethod
    def from_str(cls, pos_str: str) -> "Pos":
        return cls(*[float(v) for v in pos_str.split()])

    def __mul__(self, value: Union[int, float, np.ndarray]) -> "Pos":
        if isinstance(value, (int, float)):
            value = [value for _ in range(len(self))]
        if not isinstance(value, (list, tuple, np.ndarray)):
            return NotImplemented
        assert len(value) == len(self)
        return type(self)(*[v * axis_scaling_coef for v, axis_scaling_coef in zip(self, value)])

    def __eq__(self, other: Union[Tuple[float, ...], np.ndarray]):
        if not isinstance(other, (list, tuple, np.ndarray)):
            return NotImplemented
        return np.isclose(np.asfarray(self), np.asfarray(other)).all()

    def __rmul__(self, value: Any):
        return self * value

    def __truediv__(self, other: Union[int, float, Sequence[float]]):
        if isinstance(other, (int, float)):
            other = [other for _ in range(len(self))]
        if not isinstance(other, (list, tuple, np.ndarray)):
            return NotImplemented
        assert len(other) == len(self)
        return type(self)(*[v / v_other for v, v_other in zip(self, other)])

    def __add__(self, other: Union[int, float, np.ndarray]) -> "Pos":
        if isinstance(other, (int, float)):
            other = [other for _ in range(len(self))]
        if not isinstance(other, (list, tuple, np.ndarray)):
            return NotImplemented
        assert len(other) == len(self)
        return type(self)(*[v + v_other for v, v_other in zip(self, other)])

    def __radd__(self, other: Any) -> "Pos":
        return self + other

    def __neg__(self) -> "Pos":
        return type(self)(*[-v for v in self])

    def __sub__(self, other: Union[int, float, np.ndarray]) -> "Pos":
        if isinstance(other, (int, float)):
            other = [other for _ in range(len(self))]
        if not isinstance(other, (list, tuple, np.ndarray)):
            return NotImplemented
        assert len(other) == len(self)
        return self + (-other)
        # return type(self)(*[v + v_other for v, v_other in zip(self, other)])

    def __rsub__(self, other: Any) -> "Pos":
        return (-self) + other

    @classmethod
    def of_element(cls, element: Element, field: str = "pos") -> "Pos":
        if field not in element.attrib:
            raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
        return cls.from_str(element.attrib[field])

    def set_in_element(self, element: Element, field: str = "pos") -> None:
        if field not in element.attrib:
            # NOTE: Refusing to set a new field for now.
            raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
        element.set(field, self.to_str())


class FromTo(NamedTuple):
    start: Pos
    end: Pos

    def to_str(self) -> str:
        """Return the 'str' version of `self` to be placed in a 'pos' field in the XML."""
        return self.start.to_str() + " " + self.end.to_str()

    @classmethod
    def from_str(cls, fromto: str) -> "FromTo":
        values = [float(v) for v in fromto.split()]
        assert len(values) == 6
        return cls(Pos(*values[:3]), Pos(*values[3:]))

    @classmethod
    def of_element(cls, element: Element, field: str = "fromto") -> "FromTo":
        if field not in element.attrib:
            raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
        return cls.from_str(element.attrib.get(field))

    def set_in_element(self, element: Element, field: str = "fromto") -> None:
        if field not in element.attrib:
            # NOTE: Refusing to set a new field for now.
            raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
        element.set(field, self.to_str())

    @property
    def center(self) -> Pos:
        return (self.start + self.end) / 2


import textwrap


@dataclass
class FromTo:
    from_x: float
    from_y: float
    from_z: float
    to_x: float
    to_y: float
    to_z: float

    def __str__(self):
        return " ".join([self.from_x, self.from_y, self.from_z, self.to_x, self.to_y, self.to_z])


from dataclasses import dataclass


@dataclass
class TorsoGeom:
    friction: float = 0.9
    fromto = FromTo(0, 0, 1.45, 0, 0, 1.05)
    name: str = "torso_geom"
    size: float = 0.05
    type: str = "capsule"

    def render_xml(self) -> str:
        return f"""<geom friction="{self.friction}" fromto="{self.fromto}" name="{self.name}" size="{self.size}" type="{self.type}"/>"""


@dataclass
class HoperV3Model:
    torso_geom: TorsoGeom

    def render_xml(self) -> str:
        return textwrap.dedent(
            """\
            <mujoco model="hopper">
            <compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
            <default>
                <joint armature="1" damping="1" limited="true"/>
                <geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
                <motor ctrllimited="true" ctrlrange="-.4 .4"/>
            </default>
            <option integrator="RK4" timestep="0.002"/>
            <visual>
                <map znear="0.02"/>
            </visual>
            <worldbody>
                <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
                <geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
                <body name="torso" pos="0 0 1.25">
                <camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
                <joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
                <joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
                <joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
                <geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
                <body name="thigh" pos="0 0 1.05">
                    <joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
                    <geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
                    <body name="leg" pos="0 0 0.35">
                    <joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
                    <geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
                    <body name="foot" pos="0.13/2 0 0.1">
                        <joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
                        <geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
                    </body>
                    </body>
                </body>
                </body>
            </worldbody>
            <actuator>
                <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
                <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
                <motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
            </actuator>
                <asset>
                    <texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
                        width="100" height="100"/>
                    <texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
                    <texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
                    <material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
                    <material name="geom" texture="texgeom" texuniform="true"/>
                </asset>
            </mujoco>
            """
        )


================================================
FILE: sequoia/settings/rl/envs/mujoco/walker2d.py
================================================
from typing import ClassVar, Dict, List, Tuple

from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.walker2d import Walker2dEnv as _Walker2dV2Env
from gym.envs.mujoco.walker2d_v3 import Walker2dEnv as _Walker2dV3Env

from .modified_gravity import ModifiedGravityEnv
from .modified_mass import ModifiedMassEnv
from .modified_size import ModifiedSizeEnv


class Walker2dV2Env(_Walker2dV2Env):
    """
    Simply allows changing of XML file, probably not necessary if we pull request the
    xml name as a kwarg in openai gym
    """

    BODY_NAMES: ClassVar[List[str]] = [
        "torso",
        "thigh",
        "leg",
        "foot",
        "thigh_left",
        "leg_left",
        "foot_left",
    ]

    def __init__(self, model_path: str = "walker2d.xml", frame_skip: int = 4):
        MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)


class Walker2dV3Env(_Walker2dV3Env):
    BODY_NAMES: ClassVar[List[str]] = [
        "torso",
        "thigh",
        "leg",
        "foot",
        "thigh_left",
        "leg_left",
        "foot_left",
    ]

    def __init__(
        self,
        model_path: str = "walker2d.xml",
        forward_reward_weight: float = 1.0,
        ctrl_cost_weight: float = 1e-3,
        healthy_reward: float = 1.0,
        terminate_when_unhealthy: bool = True,
        healthy_z_range: Tuple[float, float] = (0.8, 2.0),
        healthy_angle_range: Tuple[float, float] = (-1.0, 1.0),
        reset_noise_scale: float = 5e-3,
        exclude_current_positions_from_observation: bool = True,
        xml_file: str = None,
        frame_skip: int = 4,
    ):
        if frame_skip != 4:
            raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
        super().__init__(
            xml_file=xml_file or model_path,
            forward_reward_weight=forward_reward_weight,
            ctrl_cost_weight=ctrl_cost_weight,
            healthy_reward=healthy_reward,
            terminate_when_unhealthy=terminate_when_unhealthy,
            healthy_z_range=healthy_z_range,
            healthy_angle_range=healthy_angle_range,
            reset_noise_scale=reset_noise_scale,
            exclude_current_positions_from_observation=exclude_current_positions_from_observation,
        )


class Walker2dGravityEnv(ModifiedGravityEnv, Walker2dV2Env):
    # NOTE: This environment could be used in ContinualRL!
    def __init__(
        self,
        model_path: str = "walker2d.xml",
        frame_skip: int = 4,
        gravity: float = -9.81,
    ):
        super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity)


class ContinualWalker2dV2Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, Walker2dV2Env):
    def __init__(
        self,
        model_path: str = "walker2d.xml",
        frame_skip: int = 4,
        gravity=-9.81,
        body_name_to_size_scale: Dict[str, float] = None,
        body_name_to_mass_scale: Dict[str, float] = None,
    ):
        super().__init__(
            model_path=model_path,
            frame_skip=frame_skip,
            gravity=gravity,
            # body_parts=body_parts,
            # size_scales=size_scales,
            body_name_to_size_scale=body_name_to_size_scale,
            body_name_to_mass_scale=body_name_to_mass_scale,
        )


class ContinualWalker2dV3Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, Walker2dV3Env):
    # def __init__(self, model_path, frame_skip, gravity=-9.81, **kwargs):
    #     super().__init__(model_path, frame_skip, gravity=gravity, **kwargs)
    def __init__(
        self,
        model_path: str = "walker2d.xml",
        forward_reward_weight: float = 1.0,
        ctrl_cost_weight: float = 1e-3,
        healthy_reward: float = 1.0,
        terminate_when_unhealthy: bool = True,
        healthy_z_range: Tuple[float, float] = (0.8, 2.0),
        healthy_angle_range: Tuple[float, float] = (-1.0, 1.0),
        reset_noise_scale: float = 5e-3,
        exclude_current_positions_from_observation: bool = True,
        gravity=-9.81,
        body_name_to_size_scale: Dict[str, float] = None,
        body_name_to_mass_scale: Dict[str, float] = None,
        xml_file: str = None,
        frame_skip: int = 4,
    ):
        if frame_skip != 4:
            raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
        super().__init__(
            model_path=model_path,
            frame_skip=frame_skip,
            xml_file=xml_file or model_path,
            forward_reward_weight=forward_reward_weight,
            ctrl_cost_weight=ctrl_cost_weight,
            healthy_reward=healthy_reward,
            terminate_when_unhealthy=terminate_when_unhealthy,
            healthy_z_range=healthy_z_range,
            healthy_angle_range=healthy_angle_range,
            reset_noise_scale=reset_noise_scale,
            exclude_current_positions_from_observation=exclude_current_positions_from_observation,
            body_name_to_size_scale=body_name_to_size_scale,
            body_name_to_mass_scale=body_name_to_mass_scale,
            gravity=gravity,
        )


================================================
FILE: sequoia/settings/rl/envs/mujoco/walker2d_test.py
================================================
from typing import ClassVar, Type

from sequoia.conftest import mujoco_required

from .modified_gravity_test import ModifiedGravityEnvTests
from .modified_mass_test import ModifiedMassEnvTests
from .modified_size_test import ModifiedSizeEnvTests
from .walker2d import ContinualWalker2dV2Env, ContinualWalker2dV3Env

pytestmark = mujoco_required


class TestContinualWalker2dV2Env(
    ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests
):
    Environment: ClassVar[Type[ContinualWalker2dV2Env]] = ContinualWalker2dV2Env


class TestContinualWalker2dV3Env(
    ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests
):
    Environment: ClassVar[Type[ContinualWalker2dV3Env]] = ContinualWalker2dV3Env


================================================
FILE: sequoia/settings/rl/envs/variant_spec.py
================================================
from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar, Union

import gym
from gym.envs.registration import EnvSpec, load

EnvType = TypeVar("EnvType", bound=gym.Env)
_EntryPoint = Union[str, Callable[..., gym.Env]]


class EnvVariantSpec(EnvSpec, Generic[EnvType]):
    def __init__(
        self,
        id: str,
        base_spec: EnvSpec,
        entry_point: Union[str, Callable[..., EnvType]] = None,
        reward_threshold: int = None,
        nondeterministic: bool = False,
        max_episode_steps=None,
        kwargs=None,
    ):
        super().__init__(
            id_requested=id,
            entry_point=entry_point,
            reward_threshold=reward_threshold,
            nondeterministic=nondeterministic,
            max_episode_steps=max_episode_steps,
            kwargs=kwargs,
        )
        self.base_spec = base_spec

    def make(self, **kwargs) -> EnvType:
        return super().make(**kwargs)

    @classmethod
    def of(
        cls,
        original: EnvSpec,
        *,
        new_id: str,
        new_reward_threshold: Optional[float] = None,
        new_nondeterministic: Optional[bool] = None,
        new_max_episode_steps: Optional[int] = None,
        new_kwargs: Dict[str, Any] = None,
        new_entry_point: Union[str, Callable[..., gym.Env]] = None,
        wrappers: Optional[List[Callable[[gym.Env], gym.Env]]] = None,
    ) -> "EnvVariantSpec":
        """Returns a new env spec which uses additional wrappers.

        NOTE: The `new_kwargs` update the current kwargs, rather than replacing them.
        """
        new_spec_kwargs = original.kwargs
        new_spec_kwargs.update(new_kwargs or {})
        # Replace the entry-point if desired:
        new_spec_entry_point: Union[str, Callable[..., EnvType]] = (
            new_entry_point or original.entry_point
        )

        new_reward_threshold = (
            new_reward_threshold if new_reward_threshold is not None else original.reward_threshold
        )
        new_nondeterministic = (
            new_nondeterministic if new_nondeterministic is not None else original.nondeterministic
        )
        new_max_episode_steps = (
            new_max_episode_steps
            if new_max_episode_steps is not None
            else original.max_episode_steps
        )

        # Add wrappers if desired.
        if wrappers:
            # Get the callable that creates the env.
            if callable(original.entry_point):
                env_fn = original.entry_point
            else:
                env_fn = load(original.entry_point)
            # @lebrice Not sure if there is a cleaner way to do this, maybe using
            # functools.reduce or functools.partial?
            def _new_entry_point(**kwargs) -> gym.Env:
                env = env_fn(**kwargs)
                for wrapper in wrappers:
                    env = wrapper(env)
                return env

            new_spec_entry_point = _new_entry_point

        return cls(
            new_id,
            base_spec=original,
            entry_point=new_spec_entry_point,
            reward_threshold=new_reward_threshold,
            nondeterministic=new_nondeterministic,
            max_episode_steps=new_max_episode_steps,
            kwargs=new_spec_kwargs,
        )


================================================
FILE: sequoia/settings/rl/incremental/__init__.py
================================================
from .setting import IncrementalRLSetting
from .tasks import make_incremental_task


================================================
FILE: sequoia/settings/rl/incremental/objects.py
================================================
from dataclasses import dataclass
from typing import Optional, Sequence, TypeVar, Union

from torch import Tensor

from sequoia.settings.assumptions.incremental import IncrementalAssumption

from ..discrete import DiscreteTaskAgnosticRLSetting

# IncrementalAssumption, DiscreteTaskAgnosticRLSetting


@dataclass(frozen=True)
class Observations(DiscreteTaskAgnosticRLSetting.Observations, IncrementalAssumption.Observations):
    """Observations from a Continual Reinforcement Learning environment."""

    x: Tensor
    task_labels: Optional[Tensor] = None
    # The 'done' that is normally returned by the 'step' method.
    # We add this here in case a method were to iterate on the environments in the
    # dataloader-style so they also have access to those (i.e. for the BaseMethod).
    done: Optional[Union[bool, Sequence[bool]]] = None


@dataclass(frozen=True)
class Actions(DiscreteTaskAgnosticRLSetting.Actions, IncrementalAssumption.Actions):
    """Actions to be sent to a Continual Reinforcement Learning environment."""

    y_pred: Tensor


@dataclass(frozen=True)
class Rewards(DiscreteTaskAgnosticRLSetting.Rewards, IncrementalAssumption.Rewards):
    """Rewards obtained from a Continual Reinforcement Learning environment."""

    y: Tensor


ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)


================================================
FILE: sequoia/settings/rl/incremental/results.py
================================================
from dataclasses import dataclass
from typing import ClassVar, TypeVar

from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.incremental_results import IncrementalResults

MetricType = TypeVar("MetricsType", bound=EpisodeMetrics)


@dataclass
class IncrementalRLResults(IncrementalResults[MetricType]):
    # Higher mean reward / episode => better
    lower_is_better: ClassVar[bool] = False

    objective_name: ClassVar[str] = "Mean reward per episode"

    # Minimum runtime considered (in hours).
    # (No extra points are obtained for going faster than this.)
    min_runtime_hours: ClassVar[float] = 1.5
    # Maximum runtime allowed (in hours).
    max_runtime_hours: ClassVar[float] = 12.0


================================================
FILE: sequoia/settings/rl/incremental/setting.py
================================================
import itertools
import operator
import sys
import warnings
from dataclasses import dataclass, fields
from functools import partial
from itertools import islice
from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Type, Union

import gym
import numpy as np
from gym import spaces
from gym.envs.registration import EnvSpec
from gym.utils import colorize
from gym.vector.utils import batch_space
from simple_parsing import list_field
from simple_parsing.helpers import choice
from typing_extensions import Final

from sequoia.common.gym_wrappers import MultiTaskEnvironment, TransformObservation
from sequoia.common.gym_wrappers.utils import is_monsterkong_env
from sequoia.common.metrics import EpisodeMetrics
from sequoia.common.spaces import Sparse
from sequoia.common.spaces.typed_dict import TypedDictSpace
from sequoia.common.transforms import Transforms
from sequoia.settings.assumptions.iid_results import TaskResults
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import Method
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.settings.rl.envs import (
    METAWORLD_INSTALLED,
    MTENV_INSTALLED,
    MUJOCO_INSTALLED,
    MetaWorldEnv,
    MTEnv,
    metaworld_envs,
    mtenv_envs,
)
from sequoia.settings.rl.wrappers.task_labels import FixedTaskLabelWrapper
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import constant, dict_union, pairwise

from ..discrete.setting import DiscreteTaskAgnosticRLSetting
from ..discrete.setting import supported_envs as _parent_supported_envs
from .objects import Actions, Observations, Rewards  # type: ignore
from .results import IncrementalRLResults
from .tasks import IncrementalTask, is_supported, make_incremental_task, sequoia_registry

logger = get_logger(__name__)

# A callable that returns an env.
EnvFactory = Callable[[], gym.Env]

# TODO: Move this 'passing custom env for each task' feature up into DiscreteTaskAgnosticRL.
# TODO: Design a better mechanism for extending this task creation. Currently, this dictionary lists
# out the 'supported envs' (envs for which we have an explicit way of creating tasks). However when
# the dataset is set to "MT10" for example, then that does something different: It hard-sets some
# of the values of the fields on the setting!
supported_envs: Dict[str, Union[str, EnvSpec]] = dict_union(
    _parent_supported_envs,
    {
        spec.id: spec
        for env_id, spec in sequoia_registry.env_specs.items()
        if spec.id not in _parent_supported_envs and is_supported(env_id)
    },
)
if METAWORLD_INSTALLED:
    supported_envs["MT10"] = "MT10"
    supported_envs["MT50"] = "MT50"
    supported_envs["CW10"] = "CW10"
    supported_envs["CW20"] = "CW20"
if MUJOCO_INSTALLED:
    for env_name, modification, version in itertools.product(
        ["HalfCheetah", "Hopper", "Walker2d"], ["bodyparts", "gravity"], ["v2", "v3"]
    ):
        env_id = f"LPG-FTW-{modification}-{env_name}-{version}"
        supported_envs[env_id] = env_id


available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs}


@dataclass
class IncrementalRLSetting(IncrementalAssumption, DiscreteTaskAgnosticRLSetting):
    """Continual RL setting in which:
    - Changes in the environment's context occur suddenly (same as in Discrete, Task-Agnostic RL)
    - Task boundary information (and task labels) are given at training time
    - Task boundary information is given at test time, but task identity is not.
    """

    Observations: ClassVar[Type[Observations]] = Observations
    Actions: ClassVar[Type[Actions]] = Actions
    Rewards: ClassVar[Type[Rewards]] = Rewards

    # The function used to create the tasks for the chosen env.
    _task_sampling_function: ClassVar[Callable[..., IncrementalTask]] = make_incremental_task
    Results: ClassVar[Type[Results]] = IncrementalRLResults

    # Class variable that holds the dict of available environments.
    available_datasets: ClassVar[Dict[str, str]] = available_datasets
    # Which dataset/environment to use for training, validation and testing.
    dataset: str = choice(available_datasets, default="CartPole-v0")

    # # The number of tasks. By default 0, which means that it will be set
    # # depending on other fields in __post_init__, or eventually be just 1.
    # nb_tasks: int = field(0, alias=["n_tasks", "num_tasks"])

    # (Copied from the assumption, just for clarity:)
    # TODO: Shouldn't these kinds of properties be on the class, rather than on the
    # instance?

    # Wether the task boundaries are smooth or sudden.
    smooth_task_boundaries: Final[bool] = constant(False)
    # Wether to give access to the task labels at train time.
    task_labels_at_train_time: Final[bool] = constant(True)
    # Wether to give access to the task labels at test time.
    task_labels_at_test_time: bool = False

    # NOTE: Specifying the `type` to use for the argparse argument, because of a bug in
    # simple-parsing that makes this not work correctly atm.
    train_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str)
    val_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str)
    test_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str)

    def __post_init__(self):
        defaults = {f.name: f.default for f in fields(self)}
        # NOTE: These benchmark functions don't just create the datasets, they actually set most of
        # the fields too!
        if isinstance(self.dataset, str) and self.dataset.startswith("LPG-FTW"):
            self.train_envs, self.val_envs, self.test_envs = make_lpg_ftw_datasets(self.dataset)
            # Use fewer tasks, if a custom number was passed. (NOTE: This is not ideal, same as
            # everywhere else that has to check against the default value)
            if self.nb_tasks not in {None, defaults["nb_tasks"]}:
                logger.info(
                    f"Using a custom number of tasks ({self.nb_tasks}) instead of the default "
                    f"({len(self.train_envs)})."
                )
                self.train_envs = self.train_envs[: self.nb_tasks]
                self.val_envs = self.val_envs[: self.nb_tasks]
                self.test_envs = self.test_envs[: self.nb_tasks]

            self.nb_tasks = len(self.train_envs)
            self.max_episode_steps = self.max_episode_steps or 1_000
            self.train_steps_per_task = 100_000
            self.train_max_steps = self.nb_tasks * self.train_steps_per_task
            self.test_steps_per_task = 10_000
            self.test_max_steps = self.nb_tasks * self.test_steps_per_task

            task_label_space = spaces.Discrete(self.nb_tasks)
            train_task_label_space = task_label_space
            if not self.task_labels_at_train_time:
                train_task_label_space = Sparse(train_task_label_space, sparsity=1.0)
            # This should be ok for now.
            val_task_label_space = train_task_label_space

            test_task_label_space = task_label_space
            if not self.task_labels_at_test_time:
                test_task_label_space = Sparse(test_task_label_space, sparsity=1.0)

            train_seed: Optional[int] = None
            valid_seed: Optional[int] = None
            test_seed: Optional[int] = None
            if self.config and self.config.seed is not None:
                train_seed = self.config.seed
                valid_seed = train_seed + 123
                test_seed = train_seed + 456

            self.train_envs = [
                partial(
                    create_env,
                    env_fn=env_fn,
                    wrappers=[
                        partial(
                            FixedTaskLabelWrapper,
                            task_label=(i if self.task_labels_at_train_time else None),
                            task_label_space=train_task_label_space,
                        )
                    ],
                    seed=train_seed,
                )
                for i, env_fn in enumerate(self.train_envs)
            ]

            self.val_envs = [
                partial(
                    create_env,
                    env_fn=env_fn,
                    wrappers=[
                        partial(
                            FixedTaskLabelWrapper,
                            task_label=(i if self.task_labels_at_train_time else None),
                            task_label_space=val_task_label_space,
                        )
                    ],
                    seed=valid_seed,
                )
                for i, env_fn in enumerate(self.train_envs)
            ]

            self.test_envs = [
                partial(
                    create_env,
                    env_fn=env_fn,
                    wrappers=[
                        partial(
                            FixedTaskLabelWrapper,
                            task_label=(i if self.task_labels_at_test_time else None),
                            task_label_space=test_task_label_space,
                        )
                    ],
                    seed=test_seed,
                )
                for i, env_fn in enumerate(self.train_envs)
            ]

        # Meta-World datasets:
        if self.dataset in ["MT10", "MT50", "CW10", "CW20"]:

            from metaworld import MT10, MT50, MetaWorldEnv, Task

            benchmarks = {
                "MT10": MT10,
                "MT50": MT50,
                "CW10": MT50,
                "CW20": MT50,
            }
            benchmark_class = benchmarks[self.dataset]
            logger.info(
                f"Creating metaworld benchmark {benchmark_class}, this might take a "
                f"while (~15 seconds)."
            )
            # NOTE: Saving this attribute on `self` for the time being so that it can be inspected
            # by the tests if needed. However it would be best to move this benchmark stuff into a
            # function, same as with LPG-FTW.
            benchmark = benchmark_class(seed=self.config.seed if self.config else None)
            self._benchmark = benchmark
            envs: Dict[str, Type[MetaWorldEnv]] = benchmark.train_classes
            env_tasks: Dict[str, List[Task]] = {
                env_name: [task for task in benchmark.train_tasks if task.env_name == env_name]
                for env_name, env_class in benchmark.train_classes.items()
            }
            train_env_tasks: Dict[str, List[Task]] = {}
            val_env_tasks: Dict[str, List[Task]] = {}
            test_env_tasks: Dict[str, List[Task]] = {}
            test_fraction = 0.1
            val_fraction = 0.1
            for env_name, env_tasks in env_tasks.items():
                n_tasks = len(env_tasks)
                n_val_tasks = int(max(1, n_tasks * val_fraction))
                n_test_tasks = int(max(1, n_tasks * test_fraction))
                n_train_tasks = len(env_tasks) - n_val_tasks - n_test_tasks
                if n_train_tasks <= 1:
                    # Can't create train, val and test tasks.
                    raise RuntimeError(f"There aren't enough tasks for env {env_name} ({n_tasks}) ")
                tasks_iterator = iter(env_tasks)
                train_env_tasks[env_name] = list(islice(tasks_iterator, n_train_tasks))
                val_env_tasks[env_name] = list(islice(tasks_iterator, n_val_tasks))
                test_env_tasks[env_name] = list(islice(tasks_iterator, n_test_tasks))
                assert train_env_tasks[env_name]
                assert val_env_tasks[env_name]
                assert test_env_tasks[env_name]

            max_train_steps_per_task = 1_000_000
            if self.dataset in ["CW10", "CW20"]:
                # TODO: Raise a warning if the number of tasks is non-default and set to
                # something different than in the benchmark
                # Re-create the [ContinualWorld benchmark](@TODO: Add citation here)
                version = 2
                env_names = [
                    f"hammer-v{version}",
                    f"push-wall-v{version}",
                    f"faucet-close-v{version}",
                    f"push-back-v{version}",
                    f"stick-pull-v{version}",
                    f"handle-press-side-v{version}",
                    f"push-v{version}",
                    f"shelf-place-v{version}",
                    f"window-close-v{version}",
                    f"peg-unplug-side-v{version}",
                ]
                if (
                    self.train_steps_per_task not in [defaults["train_steps_per_task"], None]
                    and self.train_steps_per_task > max_train_steps_per_task
                ):
                    raise RuntimeError(
                        f"Can't use more than {max_train_steps_per_task} steps per "
                        f"task in the {self.dataset} benchmark!"
                    )

                # TODO: Decide the number of test steps.
                # NOTE: Should we allow using fewer steps?
                # NOTE: The default value for this field is 10_000 currently, so this
                # check doesn't do anything.
                if self.dataset == "CW20":
                    # CW20 does tasks [0 -> 10] and then [0 -> 10] again.
                    env_names = env_names * 2
                train_env_names = env_names
                val_env_names = env_names
                test_env_names = env_names
            else:
                train_env_names = list(train_env_tasks.keys())
                val_env_names = list(val_env_tasks.keys())
                test_env_names = list(test_env_tasks.keys())

            self.nb_tasks = len(train_env_names)
            if self.train_max_steps not in [defaults["train_max_steps"], None]:
                self.train_steps_per_task = self.train_max_steps // self.nb_tasks
            elif self.train_steps_per_task is None:
                self.train_steps_per_task = max_train_steps_per_task
                self.train_max_steps = self.nb_tasks * self.train_steps_per_task

            if self.test_max_steps in [defaults["test_max_steps"], None]:
                if self.test_steps_per_task is None:
                    self.test_steps_per_task = 10_000
                self.test_max_steps = self.test_steps_per_task * self.nb_tasks

            # TODO: Double-check that the train/val/test wrappers are added to each env.
            self.train_envs = [
                partial(
                    make_metaworld_env,
                    env_class=envs[env_name],
                    tasks=train_env_tasks[env_name],
                )
                for env_name in train_env_names
            ]
            self.val_envs = [
                partial(
                    make_metaworld_env,
                    env_class=envs[env_name],
                    tasks=val_env_tasks[env_name],
                )
                for env_name in val_env_names
            ]
            self.test_envs = [
                partial(
                    make_metaworld_env,
                    env_class=envs[env_name],
                    tasks=test_env_tasks[env_name],
                )
                for env_name in test_env_names
            ]

        # if is_monsterkong_env(self.dataset):
        #     if self.force_pixel_observations:
        #         # Add this to the kwargs that will be passed to gym.make, to make sure that
        #         # we observe pixels, and not state.
        #         self.base_env_kwargs["observe_state"] = False
        #     elif self.force_state_observations:
        #         self.base_env_kwargs["observe_state"] = True

        self._using_custom_envs_foreach_task: bool = False
        if self.train_envs:
            self._using_custom_envs_foreach_task = True

            if self.dataset == defaults["dataset"]:
                # avoid the `dataset` key keeping the default value of "CartPole-v0" when we pass
                # envs for each task (and no value for the `dataset` argument).
                self.dataset = None

            # TODO: Raise a warning if we're going to overwrite a non-default nb_tasks?
            self.nb_tasks = len(self.train_envs)
            assert self.train_steps_per_task or self.train_max_steps
            if self.train_steps_per_task is None:
                self.train_steps_per_task = self.train_max_steps // self.nb_tasks
            # TODO: Should we use the task schedules to tell the length of each task?
            if self.test_steps_per_task in [defaults["test_steps_per_task"], None]:
                self.test_steps_per_task = self.test_max_steps // self.nb_tasks
            assert self.test_steps_per_task
            assert self.train_steps_per_task == self.train_max_steps // self.nb_tasks, (
                self.train_max_steps,
                self.train_steps_per_task,
                self.nb_tasks,
            )

            task_schedule_keys = np.linspace(
                0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
            ).tolist()
            self.train_task_schedule = self.train_task_schedule or {
                key: {} for key in task_schedule_keys
            }
            self.val_task_schedule = self.train_task_schedule.copy()

            assert self.test_steps_per_task == self.test_max_steps // self.nb_tasks, (
                self.test_max_steps,
                self.test_steps_per_task,
                self.nb_tasks,
            )
            test_task_schedule_keys = np.linspace(
                0, self.test_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
            ).tolist()
            self.test_task_schedule = self.test_task_schedule or {
                key: {} for key in test_task_schedule_keys
            }

            if not self.val_envs:
                # TODO: Use a wrapper that sets a different random seed?
                self.val_envs = self.train_envs.copy()
            if not self.test_envs:
                # TODO: Use a wrapper that sets a different random seed?
                self.test_envs = self.train_envs.copy()
            if (
                any(self.train_task_schedule.values())
                or any(self.val_task_schedule.values())
                or any(self.test_task_schedule.values())
            ):
                raise RuntimeError(
                    "Can't use a non-empty task schedule when passing the " "train/valid/test envs."
                )

            self.train_dataset: Union[str, Callable[[], gym.Env]] = self.train_envs[0]
            self.val_dataset: Union[str, Callable[[], gym.Env]] = self.val_envs[0]
            self.test_dataset: Union[str, Callable[[], gym.Env]] = self.test_envs[0]

            # TODO: Add wrappers with the fixed task id for each env, if necessary, right?
        else:
            if self.val_envs or self.test_envs:
                raise RuntimeError(
                    "Can't pass `val_envs` or `test_envs` without passing `train_envs`."
                )

        # Call super().__post_init__() (delegates up the chain: IncrementalAssumption->DiscreteRL->ContinualRL)
        # NOTE: This deep inheritance isn't ideal. Should probably use composition instead somehow.
        super().__post_init__()

        if self._using_custom_envs_foreach_task:
            # TODO: Use 'no-op' task schedules for now.
            # self.train_task_schedule.clear()
            # self.val_task_schedule.clear()
            # self.test_task_schedule.clear()
            pass

            # TODO: Check that all the envs have the same observation spaces!
            # (If possible, find a way to check this without having to instantiate all
            # the envs.)

        # TODO: If the dataset has a `max_path_length` attribute, then it's probably
        # a Mujoco / metaworld / etc env, and so we set a limit on the episode length to
        # avoid getting an error.
        max_path_length: Optional[int] = getattr(self._temp_train_env, "max_path_length", None)
        if self.max_episode_steps is None and max_path_length is not None:
            assert max_path_length > 0
            logger.info(
                f"Setting the max episode steps to {max_path_length} because a 'max_path_length' "
                f"attribute is present on the train env."
            )
            self.max_episode_steps = max_path_length

        # if self.dataset == "MetaMonsterKong-v0":
        #     # TODO: Limit the episode length in monsterkong?
        #     # TODO: Actually end episodes when reaching a task boundary, to force the
        #     # level to change?
        #     self.max_episode_steps = self.max_episode_steps or 500

        # FIXME: Really annoying little bugs with these three arguments!
        # self.nb_tasks = self.max_steps // self.steps_per_task

    @property
    def current_task_id(self) -> int:
        return self._current_task_id

    @current_task_id.setter
    def current_task_id(self, value: int) -> None:
        if value != self._current_task_id:
            # Set those to False so we re-create the wrappers for each task.
            self._has_setup_fit = False
            self._has_setup_validate = False
            self._has_setup_test = False
            # TODO: No idea what the difference is between `predict` and test.
            self._has_setup_predict = False
            # TODO: There are now also teardown hooks, maybe use them?
        self._current_task_id = value

    @property
    def train_task_lengths(self) -> List[int]:
        """Gives the length of each training task (in steps for now)."""
        return [
            task_b_step - task_a_step
            for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys()))
        ]

    @property
    def train_phase_lengths(self) -> List[int]:
        """Gives the length of each training 'phase', i.e. the maximum number of (steps
        for now) that can be taken in the training environment, in a single call to .fit
        """
        return [
            task_b_step - task_a_step
            for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys()))
        ]

    @property
    def current_train_task_length(self) -> int:
        """Deprecated field, gives back the max number of steps per task."""
        if self.stationary_context:
            return sum(self.train_task_lengths)
        return self.train_task_lengths[self.current_task_id]

    @property
    def task_label_space(self) -> gym.Space:
        # TODO: Explore an alternative design for the task sampling, based more around
        # gym spaces rather than the generic function approach that's currently used?
        # IDEA: Might be cleaner to put this in the assumption class
        task_label_space = spaces.Discrete(self.nb_tasks)
        if not self.task_labels_at_train_time or not self.task_labels_at_test_time:
            sparsity = 1
            if self.task_labels_at_train_time ^ self.task_labels_at_test_time:
                # We have task labels "50%" of the time, ish:
                sparsity = 0.5
            task_label_space = Sparse(task_label_space, sparsity=sparsity)
        return task_label_space

    def setup(self, stage: str = None) -> None:
        # Called before the start of each task during training, validation and
        # testing.
        super().setup(stage=stage)
        # What's done in ContinualRLSetting:
        # if stage in {"fit", None}:
        #     self.train_wrappers = self.create_train_wrappers()
        #     self.valid_wrappers = self.create_valid_wrappers()
        # elif stage in {"test", None}:
        #     self.test_wrappers = self.create_test_wrappers()
        if self._using_custom_envs_foreach_task:
            logger.debug(
                f"Using custom environments from `self.[train/val/test]_envs` for task "
                f"{self.current_task_id}."
            )

            if self.stationary_context:
                from sequoia.settings.rl.discrete.multienv_wrappers import (
                    ConcatEnvsWrapper,
                    RandomMultiEnvWrapper,
                    RoundRobinWrapper,
                )

                # NOTE: Here is how this supports passing custom envs for each task: We
                # just switch out the value of these properties, and let the
                # `train/val/test_dataloader` methods work as usual!
                wrapper_type = RandomMultiEnvWrapper
                if self.task_labels_at_train_time or "pytest" in sys.modules:
                    # A RoundRobin wrapper can be used when task labels are available,
                    # because the task labels are available anyway, so it doesn't matter
                    # if the Method figures out the pattern in the task IDs.
                    # A RoundRobinWrapper is also used during testing, because it
                    # makes it easier to check that things are working correctly: for example that
                    # each task is visited equally, even when the number of total steps is small.
                    wrapper_type = RoundRobinWrapper

                # NOTE: Not instantiating all the train/val/test envs here. Instead, the multienv
                # wrapper will lazily instantiate the envs as needed.
                # self.train_envs = instantiate_all_envs_if_needed(self.train_envs)
                # self.val_envs = instantiate_all_envs_if_needed(self.val_envs)
                # self.test_envs = instantiate_all_envs_if_needed(self.test_envs)
                self.train_dataset = wrapper_type(
                    self.train_envs, add_task_ids=self.task_labels_at_train_time
                )
                self.val_dataset = wrapper_type(
                    self.val_envs, add_task_ids=self.task_labels_at_train_time
                )
                self.test_dataset = ConcatEnvsWrapper(
                    self.test_envs, add_task_ids=self.task_labels_at_test_time
                )
            elif self.known_task_boundaries_at_train_time:
                self.train_dataset = self.train_envs[self.current_task_id]
                self.val_dataset = self.val_envs[self.current_task_id]
                # TODO: The test loop goes through all the envs, hence this doesn't really
                # work.
                self.test_dataset = self.test_envs[self.current_task_id]
            else:
                self.train_dataset = ConcatEnvsWrapper(
                    self.train_envs, add_task_ids=self.task_labels_at_train_time
                )
                self.val_dataset = ConcatEnvsWrapper(
                    self.val_envs, add_task_ids=self.task_labels_at_train_time
                )
                self.test_dataset = ConcatEnvsWrapper(
                    self.test_envs, add_task_ids=self.task_labels_at_test_time
                )
            # Check that the observation/action spaces are all the same for all
            # the train/valid/test envs
            self._check_all_envs_have_same_spaces(
                envs_or_env_functions=self.train_envs,
                wrappers=self.train_wrappers,
            )
            # TODO: Inconsistent naming between `val_envs` and `valid_wrappers` etc.
            self._check_all_envs_have_same_spaces(
                envs_or_env_functions=self.val_envs,
                wrappers=self.val_wrappers,
            )
            self._check_all_envs_have_same_spaces(
                envs_or_env_functions=self.test_envs,
                wrappers=self.test_wrappers,
            )
        else:
            # TODO: Should we populate the `self.train_envs`, `self.val_envs` and
            # `self.test_envs` fields here as well, just to be consistent?
            # base_env = self.dataset
            # def task_env(task_index: int) -> Callable[[], MultiTaskEnvironment]:
            #     return self._make_env(
            #         base_env=base_env,
            #         wrappers=[],
            #     )
            # self.train_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
            # self.val_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
            # self.test_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
            # assert False, self.train_task_schedule
            pass

    def test_dataloader(self, batch_size: Optional[int] = None, num_workers: Optional[int] = None):
        if not self._using_custom_envs_foreach_task:
            return super().test_dataloader(batch_size=batch_size, num_workers=num_workers)

        # IDEA: Pretty hacky, but might be cleaner than adding fields for the moment.
        test_max_steps = self.test_max_steps
        test_max_episodes = self.test_max_episodes
        self.test_max_steps = test_max_steps // self.nb_tasks
        if self.test_max_episodes:
            self.test_max_episodes = test_max_episodes // self.nb_tasks
        # self.test_env = self.TestEnvironment(self.test_envs[self.current_task_id])

        task_test_env = super().test_dataloader(batch_size=batch_size, num_workers=num_workers)

        self.test_max_steps = test_max_steps
        self.test_max_episodes = test_max_episodes
        return task_test_env

    def test_loop(self, method: Method["IncrementalRLSetting"]):
        if not self._using_custom_envs_foreach_task:
            return super().test_loop(method)

        # TODO: If we're using custom envs for each task, then the test loop needs to be
        # re-organized.
        # raise NotImplementedError(
        #     f"TODO: Need to add a wrapper that can switch between envs, or "
        #     f"re-write the test loop."
        # )
        assert self.nb_tasks == len(self.test_envs), "assuming this for now."
        test_envs = []
        for task_id in range(self.nb_tasks):
            # TODO: Make sure that self.test_dataloader() uses the right number of steps
            # per test task (current hard-set to self.test_max_steps).
            task_test_env = self.test_dataloader()
            test_envs.append(task_test_env)

        # TODO: Move these wrappers to sequoia/common/gym_wrappers/multienv_wrappers or something,
        # and then import them correctly at the top of this file.
        from ..discrete.multienv_wrappers import ConcatEnvsWrapper

        task_label_space = spaces.Discrete(self.nb_tasks)
        if self.batch_size is not None:
            task_label_space = batch_space(task_label_space, self.batch_size)
        if not self.task_labels_at_test_time:
            task_label_space = Sparse(task_label_space, sparsity=1)

        test_envs_with_task_ids = [
            FixedTaskLabelWrapper(
                env=test_env,
                task_label=(i if self.task_labels_at_test_time else None),
                task_label_space=task_label_space,
            )
            for i, test_env in enumerate(test_envs)
        ]

        # NOTE: This check is a bit redundant here, since IncrementalRLSetting always has task
        # boundaries, but this might be useful if moving this to DiscreteTaskIncrementalRL

        on_task_switch_callback: Optional[Callable[[Optional[int]], None]]
        if self.known_task_boundaries_at_test_time:
            on_task_switch_callback = getattr(method, "on_task_switch", None)

        # NOTE: Not adding a task id here, since we instead add the fixed task id for each test env.
        # NOTE: Not adding task ids with this, doing it instead with a dedicated wrapper for each env above.
        joined_test_env = ConcatEnvsWrapper(
            test_envs_with_task_ids,
            add_task_ids=False,
            on_task_switch_callback=on_task_switch_callback,
        )
        # TODO: Use this 'joined' test environment in this test loop somehow.
        # IDEA: Hacky way to do it: (I don't think this will work as-is though)
        _test_dataloader_method = self.test_dataloader
        self.test_dataloader = lambda *args, **kwargs: joined_test_env
        super().test_loop(method)
        self.test_dataloader = _test_dataloader_method

        test_loop_results = DiscreteTaskAgnosticRLSetting.Results()
        for task_id, test_env in enumerate(test_envs):
            # TODO: The results are still of the wrong type, because we aren't changing
            # the type of test environment or the type of Results
            results_of_wrong_type: IncrementalRLResults = test_env.get_results()
            # For now this weird setup means that there will be only one 'result'
            # object in this that actually has metrics:
            # assert results_of_wrong_type.task_results[task_id].metrics
            all_metrics: List[EpisodeMetrics] = sum(
                [result.metrics for result in results_of_wrong_type.task_results], []
            )
            n_metrics_in_each_result = [
                len(result.metrics) for result in results_of_wrong_type.task_results
            ]
            # assert all(n_metrics == 0 for i, n_metrics in enumerate(n_metrics_in_each_result) if i != task_id), (n_metrics_in_each_result, task_id)
            # TODO: Also transfer the other properties like runtime, online performance,
            # etc?
            # TODO: Maybe add addition for these?
            # task_result = sum(results_of_wrong_type.task_results)
            task_result = TaskResults(metrics=all_metrics)
            # task_result: TaskResults[EpisodeMetrics] = results_of_wrong_type.task_results[task_id]
            test_loop_results.task_results.append(task_result)
        return test_loop_results

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        In this Incremental-RL Setting, fit is called once per task.
        (Same as ClassIncrementalSetting in SL).
        """
        return self.nb_tasks

    @staticmethod
    def _make_env(
        base_env: Union[str, gym.Env, Callable[[], gym.Env]],
        wrappers: List[Callable[[gym.Env], gym.Env]] = None,
        **base_env_kwargs: Dict,
    ) -> gym.Env:
        """Helper function to create a single (non-vectorized) environment.

        This is also used to create the env whenever `self.dataset` is a string that
        isn't registered in gym. This happens for example when using an environment from
        meta-world (or mtenv).
        """
        # Check if the env is registed in a known 'third party' gym-like package, and if
        # needed, create the base env in the way that package requires.
        if isinstance(base_env, str):
            env_id = base_env

            # Check if the id belongs to mtenv
            if MTENV_INSTALLED and env_id in mtenv_envs:
                from mtenv import make as mtenv_make

                # This is super weird. Don't undestand at all
                # why they are doing this. Makes no sense to me whatsoever.
                base_env = mtenv_make(env_id, **base_env_kwargs)

                # Add a wrapper that will remove the task information, because we use
                # the same MultiTaskEnv wrapper for all the environments.
                wrappers.insert(0, MTEnvAdapterWrapper)

            if METAWORLD_INSTALLED and env_id in metaworld_envs:
                # TODO: Should we use a particular benchmark here?
                # For now, we find the first benchmark that has an env with this name.
                import metaworld

                for benchmark_class in [metaworld.ML10]:
                    benchmark = benchmark_class()
                    if env_id in benchmark.train_classes.keys():
                        # TODO: We can either let the base_env be an env type, or
                        # actually instantiate it.
                        base_env: Type[MetaWorldEnv] = benchmark.train_classes[env_id]
                        # NOTE: (@lebrice) Here I believe it's better to just have the
                        # constructor, that way we re-create the env for each task.
                        # I think this might be better, as I don't know for sure that
                        # the `set_task` can be called more than once in metaworld.
                        # base_env = base_env_type()
                        break
                else:
                    raise NotImplementedError(
                        f"Can't find a metaworld benchmark that uses env {env_id}"
                    )

        return ContinualRLSetting._make_env(
            base_env=base_env,
            wrappers=wrappers,
            **base_env_kwargs,
        )

    def create_task_schedule(
        self,
        temp_env: gym.Env,
        change_steps: List[int],
        seed: int = None,
    ) -> Dict[int, Dict]:
        task_schedule: Dict[int, Dict] = {}
        if self._using_custom_envs_foreach_task:
            # If custom envs were passed to be used for each task, then we don't create
            # a "task schedule", because the only reason we're using a task schedule is
            # when we want to change something about the 'base' env in order to get
            # multiple tasks.
            # Create a task schedule dict, just to fit in?
            for i, task_step in enumerate(change_steps):
                task_schedule[task_step] = {}
            return task_schedule

        # TODO: Make it possible to use something other than steps as keys in the task
        # schedule, something like a NamedTuple[int, DeltaType], e.g. Episodes(10) or
        # Steps(10), something like that!
        # IDEA: Even fancier, we could use a TimeDelta to say "do one hour of task 0"!!
        for step in change_steps:
            # TODO: Add a `stage` argument (an enum or something with 'train', 'valid'
            # 'test' as values, and pass it to this function. Tasks should be the same
            # in train/valid for now, given the same task Id.
            # TODO: When the Results become able to handle a different ordering of tasks
            # at train vs test time, allow the test task schedule to have different
            # ordering than train / valid.
            task = type(self)._task_sampling_function(
                temp_env,
                step=step,
                change_steps=change_steps,
                seed=seed,
            )
            task_schedule[step] = task

        return task_schedule

    def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Create and return the wrappers to apply to the train environment of the current task."""
        wrappers: List[Callable[[gym.Env], gym.Env]] = []

        # TODO: Clean this up a bit?
        if self._using_custom_envs_foreach_task:
            # TODO: Maybe do something different here, since we don't actually want to
            # add a CL wrapper at all in this case?
            assert not any(self.train_task_schedule.values())
            base_env = self.train_envs[self.current_task_id]
        else:
            base_env = self.train_dataset
        # assert False, super().create_train_wrappers()
        if self.stationary_context:
            task_schedule_slice = self.train_task_schedule.copy()
            assert len(task_schedule_slice) >= 2
            assert self.nb_tasks == len(self.train_task_schedule) - 1
            # Need to pop the last task, so that we don't sample it by accident!
            max_step = max(task_schedule_slice)
            last_task = task_schedule_slice.pop(max_step)
            # TODO: Shift the second-to-last task to the last step
            last_boundary = max(task_schedule_slice)
            second_to_last_task = task_schedule_slice.pop(last_boundary)
            task_schedule_slice[max_step] = second_to_last_task
            if 0 not in task_schedule_slice:
                assert self.nb_tasks == 1
                task_schedule_slice[0] = second_to_last_task
            # assert False, (max_step, last_boundary, last_task, second_to_last_task)
        else:
            current_task = list(self.train_task_schedule.values())[self.current_task_id]
            task_length = self.train_max_steps // self.nb_tasks
            task_schedule_slice = {
                0: current_task,
                task_length: current_task,
            }
        return self._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule_slice,
            # TODO: Removing this, but we have to check that it doesn't change when/how
            # the task boundaries are given to the Method.
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.train_transforms,
            starting_step=0,
            max_steps=max(task_schedule_slice.keys()),
            new_random_task_on_reset=self.stationary_context,
        )

    def create_valid_wrappers(self):
        if self._using_custom_envs_foreach_task:
            # TODO: Maybe do something different here, since we don't actually want to
            # add a CL wrapper at all in this case?
            assert not any(self.val_task_schedule.values())
            base_env = self.val_envs[self.current_task_id]
        else:
            base_env = self.val_dataset
        # assert False, super().create_train_wrappers()
        if self.stationary_context:
            task_schedule_slice = self.val_task_schedule
        else:
            current_task = list(self.val_task_schedule.values())[self.current_task_id]
            task_length = self.train_max_steps // self.nb_tasks
            task_schedule_slice = {
                0: current_task,
                task_length: current_task,
            }
        return self._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule_slice,
            # TODO: Removing this, but we have to check that it doesn't change when/how
            # the task boundaries are given to the Method.
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.val_transforms,
            starting_step=0,
            max_steps=max(task_schedule_slice.keys()),
            new_random_task_on_reset=self.stationary_context,
        )

    def create_test_wrappers(self):
        if self._using_custom_envs_foreach_task:
            # TODO: Maybe do something different here, since we don't actually want to
            # add a CL wrapper at all in this case?
            assert not any(self.test_task_schedule.values())
            base_env = self.test_envs[self.current_task_id]
        else:
            base_env = self.test_dataset
        # assert False, super().create_train_wrappers()
        task_schedule_slice = self.test_task_schedule
        # if self.stationary_context:
        # else:
        #     current_task = list(self.test_task_schedule.values())[self.current_task_id]
        #     task_length = self.test_max_steps // self.nb_tasks
        #     task_schedule_slice = {
        #         0: current_task,
        #         task_length: current_task,
        #     }
        return self._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule_slice,
            # TODO: Removing this, but we have to check that it doesn't change when/how
            # the task boundaries are given to the Method.
            # sharp_task_boundaries=self.known_task_boundaries_at_train_time,
            task_labels_available=self.task_labels_at_train_time,
            transforms=self.transforms + self.test_transforms,
            starting_step=0,
            max_steps=self.test_max_steps,
            new_random_task_on_reset=self.stationary_context,
        )

    def _check_all_envs_have_same_spaces(
        self,
        envs_or_env_functions: List[Union[str, gym.Env, Callable[[], gym.Env]]],
        wrappers: List[Callable[[gym.Env], gym.Wrapper]],
    ) -> None:
        """Checks that all the environments in the list have the same
        observation/action spaces.
        """

        first_env = self._make_env(
            base_env=envs_or_env_functions[0], wrappers=wrappers, **self.base_env_kwargs
        )
        if not isinstance(envs_or_env_functions[0], gym.Env):
            # NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting.
            # first_env.close()
            pass

        for task_id, task_env_id_or_function in zip(
            range(1, len(envs_or_env_functions)), envs_or_env_functions[1:]
        ):
            task_env = self._make_env(
                base_env=task_env_id_or_function,
                wrappers=wrappers,
                **self.base_env_kwargs,
            )
            if not isinstance(task_env_id_or_function, gym.Env):
                # NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting.
                # task_env.close()
                pass

            def warn_spaces_are_different(
                task_id: int, kind: str, first_env: gym.Env, task_env: gym.Env
            ) -> None:
                task_space = (
                    task_env.observation_space if kind == "observation" else task_env.action_space
                )
                first_space = (
                    first_env.observation_space if kind == "observation" else first_env.action_space
                )
                warnings.warn(
                    RuntimeWarning(
                        colorize(
                            f"Env at task {task_id} doesn't have the same {kind} "
                            f"space as the environment of the first task: \n"
                            f"{task_space} \n"
                            f"!=\n"
                            f"{first_space} \n"
                            f"This isn't fully supported yet. Don't expect this to work.",
                            "yellow",
                        )
                    )
                )

            if task_env.observation_space != first_env.observation_space:
                if (
                    isinstance(task_env.observation_space, spaces.Box)
                    and isinstance(first_env.observation_space, spaces.Box)
                    and task_env.observation_space.shape == first_env.observation_space.shape
                ) or (
                    isinstance(task_env.observation_space, TypedDictSpace)
                    and isinstance(first_env.observation_space, TypedDictSpace)
                    and "x" in task_env.observation_space.spaces
                    and "x" in first_env.observation_space.spaces
                    and task_env.observation_space.x.shape == first_env.observation_space.x.shape
                ):
                    warnings.warn(
                        RuntimeWarning(
                            f"The shape of the observation space is the same, but the bounds are "
                            f"different between the first env and the env of task {task_id}!"
                        )
                    )
                else:
                    warn_spaces_are_different(task_id, "observation", first_env, task_env)

            if task_env.action_space != first_env.action_space:
                warn_spaces_are_different(task_id, "action", first_env, task_env)

    def _make_wrappers(
        self,
        base_env: Union[str, gym.Env, Callable[[], gym.Env]],
        task_schedule: Dict[int, Dict],
        # sharp_task_boundaries: bool,
        task_labels_available: bool,
        transforms: List[Transforms],
        starting_step: int,
        max_steps: int,
        new_random_task_on_reset: bool,
    ) -> List[Callable[[gym.Env], gym.Env]]:
        if self._using_custom_envs_foreach_task:
            if any(task_schedule.values()):
                logger.warning(
                    RuntimeWarning(
                        f"Ignoring task schedule {task_schedule}, since custom envs were "
                        f"passed for each task!"
                    )
                )
            task_schedule = None

        wrappers = super()._make_wrappers(
            base_env=base_env,
            task_schedule=task_schedule,
            task_labels_available=task_labels_available,
            transforms=transforms,
            starting_step=starting_step,
            max_steps=max_steps,
            new_random_task_on_reset=new_random_task_on_reset,
        )

        if self._using_custom_envs_foreach_task:
            # If the user passed a specific env to use for each task, then there won't
            # be a MultiTaskEnv wrapper in `wrappers`, since the task schedule is
            # None/empty.
            # Instead, we will add a Wrapper that always gives the task ID of the
            # current task.

            # TODO: There are some 'unused' args above: `starting_step`, `max_steps`,
            # `new_random_task_on_reset` which are still passed to the super() call, but
            # just unused.
            if new_random_task_on_reset:
                pass
                # raise NotImplementedError(
                #     "TODO: Add a MultiTaskEnv wrapper of some sort that alternates "
                #     " between the source envs."
                # )
            else:
                assert not task_schedule
                task_label = self.current_task_id
                task_label_space = spaces.Discrete(self.nb_tasks)
                if not task_labels_available:
                    task_label = None
                    task_label_space = Sparse(task_label_space, sparsity=1.0)

                wrappers.append(
                    partial(
                        FixedTaskLabelWrapper,
                        task_label=task_label,
                        task_label_space=task_label_space,
                    )
                )

        if is_monsterkong_env(base_env):
            # TODO: Need to register a MetaMonsterKong-State-v0 or something like that!
            # TODO: Maybe add another field for 'force_state_observations' ?
            # if self.force_pixel_observations:
            pass

        return wrappers


class MTEnvAdapterWrapper(TransformObservation):
    # TODO: For now, we remove the task id portion of the space and of the observation
    # dicts.
    def __init__(self, env: MTEnv, f: Callable = operator.itemgetter("env_obs")):
        super().__init__(env=env, f=f)
        # self.observation_space = self.env.observation_space["env_obs"]

    # def observation(self, observation):
    #     return observation["env_obs"]


def make_metaworld_env(env_class: Type[MetaWorldEnv], tasks: List["Task"]) -> MetaWorldEnv:
    env = env_class()
    env.set_task(tasks[0])
    # TODO: Could maybe replace this with the 'RoundRobin' or 'Random' wrapper from
    # `multienv_wrappers.py` by making it appear like it's multiple envs, but actually
    # share the env instance
    env = MultiTaskEnvironment(
        env,
        task_schedule={i: operator.methodcaller("set_task", task) for i, task in enumerate(tasks)},
        new_random_task_on_reset=True,
        add_task_dict_to_info=False,
        add_task_id_to_obs=False,
    )
    return env


def wrap(env_or_env_fn: Union[gym.Env, EnvFactory], wrappers: List[gym.Wrapper] = None) -> gym.Env:
    env: gym.Env = env_or_env_fn if isinstance(env_or_env_fn, gym.Env) else env_or_env_fn()
    wrappers = wrappers or []
    for wrapper in wrappers:
        env = wrapper(env)
    return env


def create_env(
    env_fn: Union[Type[gym.Env], Callable[[], gym.Env]],
    kwargs: Dict = None,
    wrappers: List[Callable[[gym.Env], gym.Env]] = None,
    seed: int = None,
) -> gym.Env:
    """
    1. Create an env instance by calling `env_fn`;
    2. Wrap it with the wrappers in `wrappers`, if any;
    3. seed it with `seed` if it is not None.
    """
    env = env_fn(**(kwargs or {}))
    wrappers = wrappers or []
    for wrapper in wrappers:
        env = wrapper(env)
    if seed is not None:
        env.seed(seed)
    return env


def make_lpg_ftw_datasets(
    dataset: str,
) -> Tuple[List[EnvFactory], List[EnvFactory], List[EnvFactory]]:
    # IDEA: "LPG-FTW-{bodyparts|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}",
    # TODO: Instead of doing what I'm doing here, we could instead add an argument that gets
    # passed to the task creation function, for instance to get only a bodysize task, or
    # only a gravity task, etc.
    train_envs: List[EnvFactory] = []
    valid_envs: List[EnvFactory] = []
    test_envs: List[EnvFactory] = []

    name_parts = dataset.split("-")
    if len(name_parts) != 5:
        raise ValueError(
            "Expected the name to follow this format: \n"
            "\t 'LPG-FTW-{bodyparts|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}' \n"
            f"but got {dataset}"
        )
    _, _, modification_type, env_name, version = name_parts

    # NOTE: From the LPG-FTW repo:
    # > "500 for halfcheetah, 600 for hopper, 700 for walker"
    task_creation_seeds = {"HalfCheetah": 500, "Hopper": 600, "Walker2d": 700}
    task_creation_seed = task_creation_seeds[env_name]
    rng = np.random.default_rng(task_creation_seed)

    from sequoia.settings.rl.envs.mujoco import (
        ContinualHalfCheetahV2Env,
        ContinualHalfCheetahV3Env,
        ContinualHopperV2Env,
        ContinualHopperV3Env,
        ContinualWalker2dV2Env,
        ContinualWalker2dV3Env,
    )

    env_classes: Dict[str, Dict[str, Type[gym.Env]]] = {
        "HalfCheetah": {
            "v2": ContinualHalfCheetahV2Env,
            "v3": ContinualHalfCheetahV3Env,
        },
        "Hopper": {"v2": ContinualHopperV2Env, "v3": ContinualHopperV3Env},
        "Walker2d": {"v2": ContinualWalker2dV2Env, "v3": ContinualWalker2dV3Env},
    }
    env_class = env_classes[env_name][version]
    # NOTE: Could also get the list of all geoms from the BODY_NAMES property on the classes above,
    # but the LPG-FTW repo actually uses a subset of those:
    bodyparts_for_env: Dict[str, List[str]] = {
        "HalfCheetah": ["torso", "fthigh", "fshin", "ffoot"],
        "Hopper": ["torso", "thigh", "leg", "foot"],
        "Walker2d": ["torso", "thigh", "leg", "foot"],
    }

    # From the paper: "We created T_max=20 tasks for HalfCheetah and Hopper domains, and
    # T_max=50 tasks for Walker2d domains."
    # NOTE: Here if `nb_tasks` is None, we use the default number of tasks from the paper.
    nb_tasks = 20 if env_name in ["HalfCheetah", "Hopper"] else 50

    task_params: List[Dict] = []
    values = []
    for task_id in range(nb_tasks):
        # NOTE: Could also support a different type of modification per task, by passing a list of
        # types of modifications to use!
        if modification_type == "gravity":
            # This is a function that will be called for each task, and must produce a set of
            # (distinct, reproducible) keyword arguments for the given task.
            original_gravity = -9.81
            task_gravity = round(((rng.random() + 0.5) * original_gravity), 4)
            task_kwargs = {"gravity": task_gravity}
            values.append(task_gravity)

        elif modification_type == "bodyparts":

            body_names = bodyparts_for_env[env_name]
            scale_factors = (rng.random(len(body_names)) + 0.5).round(4)
            values.append(scale_factors)
            body_name_to_size_scale = dict(zip(body_names, scale_factors))

            # between 0.5 and 1.5, with 4 digits of precision.
            # NOTE: Scale the mass by the same factor as the size.
            task_kwargs = {
                "body_name_to_size_scale": body_name_to_size_scale,
                "body_name_to_mass_scale": body_name_to_size_scale.copy(),
            }
        else:
            raise NotImplementedError(
                f"Unsupported modification type: '{modification_type}'! Supported values are "
                f"'bodyparts', 'gravity'."
            )
        logger.info(f"Arguments for task {task_id}: {task_kwargs}")
        task_params.append(task_kwargs)

    values = np.array(values)
    logger.debug(values.tolist())
    # assert False
    # logger.info("Task parameters:")
    # logger.info(json.dumps(task_params, indent="\t"))
    # NOTE: All envs in LPG-FTW use max_episode_steps of 1000.
    # max_episode_steps = 1000
    # wrappers = [partial(TimeLimit, max_episode_steps=max_episode_steps)]

    for task_id, task_kwargs in enumerate(task_params):
        # Function that will create the env with the given task.
        base_env_fn = partial(env_class, **task_kwargs)
        train_envs.append(base_env_fn)
        valid_envs.append(base_env_fn)
        test_envs.append(base_env_fn)

    return train_envs, valid_envs, test_envs


================================================
FILE: sequoia/settings/rl/incremental/setting_test.py
================================================
import dataclasses
import enum
import functools
import inspect
import math
import random
from typing import Any, ClassVar, Dict, NamedTuple, Optional, Type

import gym
import numpy as np
import pytest
from gym import spaces
from gym.envs.classic_control import CartPoleEnv

from sequoia.common.config import Config
from sequoia.common.gym_wrappers import RenderEnvWrapper
from sequoia.common.spaces import Image, Sparse
from sequoia.conftest import (
    metaworld_required,
    monsterkong_required,
    mtenv_required,
    mujoco_required,
    slow,
    xfail_param,
)
from sequoia.methods.random_baseline import RandomBaselineMethod
from sequoia.settings.assumptions.incremental_test import OtherDummyMethod
from sequoia.settings.rl import TaskIncrementalRLSetting
from sequoia.settings.rl.continual.setting_test import all_different_from_next
from sequoia.settings.rl.setting_test import DummyMethod

from ..discrete.setting_test import (
    TestDiscreteTaskAgnosticRLSetting as DiscreteTaskAgnosticRLSettingTests,
)
from .setting import IncrementalRLSetting


class TestIncrementalRLSetting(DiscreteTaskAgnosticRLSettingTests):
    Setting: ClassVar[Type[Setting]] = IncrementalRLSetting
    dataset: pytest.fixture

    @pytest.fixture()
    def setting_kwargs(self, dataset: str, nb_tasks: int, config: Config):
        """Fixture used to pass keyword arguments when creating a Setting."""
        kwargs = {"dataset": dataset, "nb_tasks": nb_tasks, "max_episode_steps": 100}
        if dataset.lower().startswith(("walker2d", "hopper", "halfcheetah", "continual")):
            # kwargs["train_max_steps"] = 5_000
            # kwargs["max_episode_steps"] = 100
            pass
        # NOTE: Using 0 workers so I can parallelize the tests without killing my PC.
        config.num_workers = 0
        kwargs["config"] = config
        return kwargs

    def test_passing_supported_dataset(self, setting_kwargs: Dict):
        # Override this test because envs can be passed for each task.
        setting = self.Setting(**setting_kwargs)
        assert setting.train_task_schedule
        if setting.train_envs:
            # Passing the dataset created custom envs for each task (e.g. MT10, CW10, LPG-FTW-(...).
            # The task schedule should have keys for the task boundary steps, but values should be
            # empty dictionaries.
            assert not any(setting.train_task_schedule.values())
        else:
            # Passing the dataset created a task schedule.
            assert all(setting.train_task_schedule.values()), "Should have non-empty tasks."

    def validate_results(
        self,
        setting: IncrementalRLSetting,
        method: DummyMethod,
        results: IncrementalRLSetting.Results,
    ) -> None:
        """Check that the results make sense.
        The Dummy Method used also keeps useful attributes, which we check here.
        """
        assert results
        assert results.objective
        assert len(results.task_sequence_results) == setting.nb_tasks
        assert results.average_final_performance == sum(
            results.task_sequence_results[-1].average_metrics_per_task
        )
        t = setting.nb_tasks
        p = setting.phases
        assert setting.known_task_boundaries_at_train_time
        assert setting.known_task_boundaries_at_test_time
        assert setting.task_labels_at_train_time
        # assert not setting.task_labels_at_test_time
        assert not setting.stationary_context
        if setting.nb_tasks == 1:
            assert not method.received_task_ids
            assert not method.received_while_training
        else:
            assert method.received_task_ids == sum(
                [
                    [t_i] + [t_j if setting.task_labels_at_test_time else None for t_j in range(t)]
                    for t_i in range(t)
                ],
                [],
            )
            assert method.received_while_training == sum(
                [[True] + [False for _ in range(t)] for t_i in range(t)], []
            )

    def test_tasks_are_different(self, setting_kwargs: Dict[str, Any], config: Config):
        """Check that the tasks different from the next.

        NOTE: Overriding this test because task schedules are empty when using custom envs for each
        task.
        """
        config = setting_kwargs.pop("config", config)
        assert config.seed is not None
        setting = self.Setting(**setting_kwargs, config=config)

        # Check that each task is different from the next.
        # NOTE: When custom datasets are used for each task then the task schedules' values are
        # empty, we have to change the test condition a little bit here.
        if setting.train_envs:
            # The dataset being used resulted in creating an env per task, rather than just using
            # one env with a task schedule.
            # Make sure that the fn for creating the env of each task is unique.
            assert all_different_from_next(setting.train_envs)
            assert all_different_from_next(setting.val_envs)
            assert all_different_from_next(setting.test_envs)
        else:
            # Check that each task is different from the next.
            assert all_different_from_next(setting.train_task_schedule.values())
            assert all_different_from_next(setting.val_task_schedule.values())
            assert all_different_from_next(setting.test_task_schedule.values())

    def test_number_of_tasks(self):
        setting = self.Setting(
            dataset="CartPole-v0",
            monitor_training_performance=True,
            nb_tasks=10,
            train_max_steps=10_000,
            test_max_steps=1000,
        )
        assert setting.nb_tasks == 10

    def test_max_number_of_steps_per_task_is_respected(self):
        setting = self.Setting(
            dataset="CartPole-v0",
            monitor_training_performance=True,
            # train_steps_per_task=500,
            nb_tasks=2,
            train_max_steps=1000,
            test_max_steps=1000,
        )
        for task_id in range(setting.phases):
            setting.current_task_id = task_id
            train_env = setting.train_dataloader()
            total_steps = 0
            while total_steps < setting.steps_per_phase:
                print(total_steps)
                obs = train_env.reset()

                done = False
                while not done:
                    if total_steps == setting.current_train_task_length:
                        assert train_env.is_closed()
                        with pytest.raises(gym.error.ClosedEnvironmentError):
                            obs, reward, done, info = train_env.step(
                                train_env.action_space.sample()
                            )
                        return
                    else:
                        obs, reward, done, info = train_env.step(train_env.action_space.sample())
                    total_steps += 1

            assert total_steps == setting.steps_per_phase

            with pytest.raises(gym.error.ClosedEnvironmentError):
                train_env.reset()

    @monsterkong_required
    @pytest.mark.timeout(120)
    @pytest.mark.parametrize(
        "state",
        [False, xfail_param(True, reason="TODO: MonsterkongState doesn't work?")],
    )
    def test_monsterkong(self, state: bool):
        """Checks that the MonsterKong env works fine with pixel and state input."""
        setting = self.Setting(
            dataset="StateMetaMonsterKong-v0" if state else "PixelMetaMonsterKong-v0",
            # force_state_observations=state,
            # force_pixel_observations=(not state),
            nb_tasks=5,
            train_max_steps=500,
            test_max_steps=500,
            # train_steps_per_task=100,
            # test_steps_per_task=100,
            train_transforms=[],
            test_transforms=[],
            val_transforms=[],
            max_episode_steps=10,
        )

        if state:
            # State-based monsterkong: We observe a flattened version of the game state
            # (20 x 20 grid + player cell and goal cell, IIRC.)
            assert setting.observation_space.x == spaces.Box(
                0, 292, (402,), np.int16
            ), setting._temp_train_env.observation_space
        else:
            assert setting.observation_space.x == Image(0, 255, (64, 64, 3), np.uint8)

        if setting.task_labels_at_test_time:
            assert setting.observation_space.task_labels == spaces.Discrete(5)
        else:
            assert setting.task_labels_at_train_time
            assert setting.observation_space.task_labels == Sparse(
                spaces.Discrete(5),
                sparsity=0.5,  # 0.5 since we have task labels at train time.
            )

        assert setting.test_max_steps == 500
        with setting.train_dataloader() as env:
            obs = env.reset()
            assert obs in setting.observation_space

        method = DummyMethod()
        results = setting.apply(method)

        self.validate_results(setting, method, results)

    @mujoco_required
    @pytest.mark.parametrize("seed", [None, 123, 456])
    @pytest.mark.parametrize("version", ["v2", "v3"])
    @pytest.mark.parametrize("env_name", ["HalfCheetah", "Hopper", "Walker2d"])
    @pytest.mark.parametrize("modification", ["bodyparts", "gravity"])
    def test_LPG_FTW_datasets(
        self,
        env_name: str,
        modification: str,
        version: str,
        config: Config,
        seed: int,
    ):
        """Test using a dataset from the LPG-FTW paper / repo (continual mujoco variants).

        TODO: Check that:
        - the task sequence is always the same (uses the same seed), regardless of what seed is
          passed;
        - The envs are created correctly;
        - The number of tasks / train steps / test steps / etc is set to the right values.
        """
        # LPG-FTW-{bodysize|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}
        dataset = f"LPG-FTW-{modification}-{env_name}-{version}"

        # NOTE: Set the seed in the config, preserving the other values:
        config = dataclasses.replace(config, seed=seed)
        nb_tasks: Optional[int] = None  # Using the default number of tasks for that setting for now
        setting: TaskIncrementalRLSetting = self.Setting(
            dataset=dataset,
            nb_tasks=nb_tasks,
            config=config,
        )

        if nb_tasks is not None:
            assert setting.nb_tasks == nb_tasks
        else:
            assert setting.nb_tasks == 20 if env_name in ["HalfCheetah", "Hopper"] else 50

        assert setting.train_steps_per_task == 100_000
        assert setting.train_max_steps == setting.train_steps_per_task * setting.nb_tasks
        assert setting.test_steps_per_task == 10_000
        assert setting.test_max_steps == setting.test_steps_per_task * setting.nb_tasks
        assert setting.config == config

        expected_values = {
            "bodyparts": {
                "HalfCheetah": np.array(
                    [
                        [1.0667, 1.354, 1.1454, 0.9112],
                        [0.968, 1.3214, 0.8125, 1.2862],
                        [0.9356, 0.7476, 0.9421, 1.397],
                        [1.057, 1.0286, 0.776, 1.3749],
                        [0.7592, 1.3059, 0.6209, 0.9313],
                        [0.8497, 1.016, 0.869, 0.9722],
                        [0.6936, 0.7496, 0.9946, 0.7713],
                        [0.9878, 1.1394, 1.438, 1.3296],
                        [1.1359, 1.1118, 1.4415, 1.3868],
                        [0.5468, 0.9953, 1.3474, 1.3668],
                        [0.7779, 0.5924, 0.8996, 0.8196],
                        [0.9775, 0.7775, 1.3211, 1.1515],
                        [0.6026, 0.833, 0.9688, 1.4437],
                        [0.6035, 1.161, 1.0771, 0.7065],
                        [1.0629, 1.4446, 0.9937, 0.5573],
                        [1.2337, 0.522, 1.0446, 0.86],
                        [0.7313, 1.35, 1.2919, 0.6101],
                        [1.0026, 0.5937, 0.6216, 1.3764],
                        [0.6369, 0.8332, 1.0068, 1.1956],
                        [1.1337, 0.8872, 1.0393, 1.4391],
                    ]
                ),
                "Hopper": np.array(
                    [
                        [0.7135, 0.5054, 1.3158, 1.3817],
                        [1.2478, 1.4622, 0.8828, 0.7484],
                        [0.5758, 1.4022, 1.0022, 1.2518],
                        [1.4175, 0.5328, 0.8692, 0.6997],
                        [0.6962, 1.3126, 1.2338, 1.4018],
                        [1.4837, 1.0798, 0.7868, 0.8489],
                        [1.3545, 0.7424, 1.2719, 1.0976],
                        [0.6088, 0.516, 0.8584, 1.0396],
                        [1.19, 0.6938, 0.5663, 0.8589],
                        [0.8211, 1.3241, 0.9745, 1.345],
                        [0.6572, 1.0763, 1.3601, 0.659],
                        [0.7739, 0.7299, 0.6518, 1.469],
                        [1.0556, 0.7345, 0.532, 1.0279],
                        [1.2296, 0.6701, 1.4398, 1.0611],
                        [0.6225, 1.0743, 0.827, 0.6753],
                        [0.7325, 0.809, 1.2254, 0.9415],
                        [1.4439, 0.9964, 1.4649, 1.333],
                        [0.5189, 0.9123, 1.1166, 1.3882],
                        [1.0468, 1.4162, 1.4152, 1.4333],
                        [1.1143, 1.2726, 1.0209, 1.0729],
                    ]
                ),
                "Walker2d": np.array(
                    [
                        [0.7567, 0.756, 1.4277, 0.9565],
                        [1.4109, 0.5937, 0.7606, 0.6839],
                        [1.0276, 1.2041, 1.4451, 0.8439],
                        [0.9755, 0.8187, 0.591, 0.583],
                        [1.2181, 0.8519, 0.5878, 0.9935],
                        [0.8885, 1.2908, 1.3013, 1.1454],
                        [1.0147, 0.7442, 1.236, 0.5236],
                        [1.1978, 0.5307, 1.4067, 1.1635],
                        [0.9529, 0.8574, 0.6655, 0.5294],
                        [0.8051, 1.1687, 0.8499, 1.3864],
                        [1.2848, 0.8866, 0.5215, 1.0251],
                        [1.2241, 0.7499, 1.1479, 0.5744],
                        [1.2354, 0.5853, 1.1212, 0.5174],
                        [0.7968, 0.7717, 1.2285, 0.8687],
                        [1.0544, 0.5814, 0.8588, 0.687],
                        [1.0695, 0.6469, 0.8567, 0.6682],
                        [1.2904, 0.8367, 1.228, 0.8606],
                        [1.0343, 0.7646, 0.515, 1.3386],
                        [1.1157, 1.2064, 1.0026, 0.9877],
                        [0.6621, 0.809, 1.0466, 0.5361],
                        [0.9291, 0.6168, 0.9013, 1.4358],
                        [1.048, 0.8483, 0.8586, 1.1867],
                        [1.327, 1.0487, 1.4479, 0.9426],
                        [1.2382, 0.8678, 1.0034, 1.2412],
                        [0.5863, 1.4389, 0.934, 1.3923],
                        [1.1379, 1.154, 0.5595, 0.5955],
                        [1.3881, 1.3309, 0.5342, 1.1085],
                        [0.8394, 1.0508, 0.9655, 0.7755],
                        [0.7494, 0.6891, 0.6979, 1.3249],
                        [1.1108, 1.3998, 0.7783, 0.599],
                        [0.8687, 0.5902, 1.212, 0.6375],
                        [0.5668, 0.981, 0.5026, 1.0739],
                        [0.9416, 1.4424, 1.0721, 0.9112],
                        [1.2981, 1.0119, 1.2722, 0.9808],
                        [1.4171, 1.1066, 0.6053, 1.2302],
                        [1.1096, 1.0246, 1.3117, 0.5727],
                        [0.8082, 0.875, 0.9299, 1.2194],
                        [1.0526, 0.961, 1.0492, 1.2552],
                        [1.46, 0.8331, 0.934, 0.5725],
                        [1.3832, 1.4736, 1.2651, 0.7956],
                        [0.68, 1.2663, 1.4183, 0.9284],
                        [1.2713, 0.6865, 0.8331, 1.0081],
                        [1.4115, 0.5781, 0.9823, 0.8094],
                        [1.4614, 0.5998, 1.2237, 1.3794],
                        [1.2385, 1.2489, 0.7521, 0.818],
                        [1.077, 1.2589, 0.748, 1.1483],
                        [0.7855, 1.1619, 0.5537, 1.2367],
                        [1.4765, 1.1728, 0.9052, 1.3113],
                        [1.1144, 0.9986, 1.3052, 0.9948],
                        [1.1542, 1.3616, 0.7465, 0.8679],
                    ]
                ),
            },
            "gravity": {
                "HalfCheetah": np.array(
                    [
                        -10.4648,
                        -13.2825,
                        -11.236,
                        -8.9384,
                        -9.4964,
                        -12.9626,
                        -7.9709,
                        -12.6178,
                        -9.1777,
                        -7.3343,
                        -9.2424,
                        -13.7041,
                        -10.3694,
                        -10.091,
                        -7.6124,
                        -13.4874,
                        -7.4477,
                        -12.8111,
                        -6.0907,
                        -9.1363,
                    ]
                ),
                "Hopper": np.array(
                    [
                        -6.999,
                        -4.9579,
                        -12.9078,
                        -13.5543,
                        -12.2405,
                        -14.3439,
                        -8.6606,
                        -7.3419,
                        -5.6488,
                        -13.7555,
                        -9.8317,
                        -12.2801,
                        -13.9059,
                        -5.2266,
                        -8.5266,
                        -6.8638,
                        -6.83,
                        -12.8763,
                        -12.104,
                        -13.7512,
                    ]
                ),
                "Walker2d": np.array(
                    [
                        -7.4229,
                        -7.4163,
                        -14.006,
                        -9.3835,
                        -13.8414,
                        -5.8243,
                        -7.461,
                        -6.7093,
                        -10.0807,
                        -11.8119,
                        -14.1762,
                        -8.2791,
                        -9.57,
                        -8.031,
                        -5.7979,
                        -5.7189,
                        -11.9495,
                        -8.3575,
                        -5.7666,
                        -9.7467,
                        -8.7165,
                        -12.6623,
                        -12.7656,
                        -11.2362,
                        -9.9544,
                        -7.3011,
                        -12.1249,
                        -5.1366,
                        -11.7508,
                        -5.2058,
                        -13.8,
                        -11.4139,
                        -9.3481,
                        -8.4107,
                        -6.5289,
                        -5.1934,
                        -7.898,
                        -11.4647,
                        -8.3374,
                        -13.6001,
                        -12.6038,
                        -8.6978,
                        -5.1157,
                        -10.0563,
                        -12.0081,
                        -7.3568,
                        -11.2612,
                        -5.6351,
                        -12.1197,
                        -5.7417,
                    ]
                ),
            },
        }

        def _unwrap_partials(env_fn: functools.partial) -> functools.partial:
            from gym.envs.mujoco import MujocoEnv

            # 'unwrap' the env fn:
            while isinstance(env_fn, functools.partial):
                # We want to recover the 'base' env factory (the function that actually creates
                # the modified mujoco env.)
                # NOTE `env_fn` is probably something like:
                # `partial(create_env, base_env_factory,  wrappers=[...])
                # or
                # `partial(foo, env_fn=base_env_factory,  wrappers=[...])
                print(env_fn)
                if inspect.isclass(env_fn.func) and issubclass(env_fn.func, MujocoEnv):
                    # Reached the lowest-level partial, the one we're looking for.
                    break
                if env_fn.args:
                    env_fn = env_fn.args[0]
                else:
                    env_fn = list(env_fn.keywords.values())[0]
            return env_fn

        if modification == "bodyparts":
            expected_factors_for_env = expected_values["bodyparts"][env_name]

            def check_env_fn_matches_expected(task_id: int, env_fn: functools.partial):
                env_fn = _unwrap_partials(env_fn)
                assert isinstance(env_fn, functools.partial)
                kwargs = env_fn.keywords

                for argument_name in ["body_name_to_size_scale", "body_name_to_mass_scale"]:
                    argument_values = np.array(list(kwargs[argument_name].values()))
                    assert (argument_values == expected_factors_for_env[task_id]).all()

            env_fn: functools.partial

            # Inspect the env functions and check that the arguments that would be passed to the
            # constructor make sense.
            # NOTE: Could also create the envs using the setting and inspect these attributes,
            # but I think that inspecting the attributes on the multi-env wrappers used by the
            # Traditional and MultiTask RL settings might not work. This is ok for now.

            for task_id, env_fn in enumerate(setting.train_envs):
                check_env_fn_matches_expected(task_id, env_fn)
            for task_id, env_fn in enumerate(setting.val_envs):
                check_env_fn_matches_expected(task_id, env_fn)
            for task_id, env_fn in enumerate(setting.test_envs):
                check_env_fn_matches_expected(task_id, env_fn)
        elif modification == "gravity":
            expected_gravities_for_env = expected_values["gravity"][env_name]

            def check_env_fn_matches_expected(task_id: int, env_fn: functools.partial):
                env_fn = _unwrap_partials(env_fn)
                kwargs = env_fn.keywords
                gravity_value: float = kwargs["gravity"]
                assert np.isclose(gravity_value, expected_gravities_for_env[task_id])

            for task_id, env_fn in enumerate(setting.train_envs):
                check_env_fn_matches_expected(task_id, env_fn)
            for task_id, env_fn in enumerate(setting.val_envs):
                check_env_fn_matches_expected(task_id, env_fn)
            for task_id, env_fn in enumerate(setting.test_envs):
                check_env_fn_matches_expected(task_id, env_fn)

        # TODO: Not sure if this check will also work with the stationary settings, so skipping it
        # for now.
        if setting.stationary_context:
            return

        # Check that the max episode length is really respected.
        with setting.train_dataloader() as temp_env:
            steps = 0
            obs = temp_env.reset()
            done = False
            while not done:
                action = temp_env.action_space.sample()
                obs, reward, done, info = temp_env.step(action)
                assert obs in temp_env.observation_space
                steps += 1
                assert steps <= 1000
            assert steps <= 1000

        # NOTE: Testing the 'live' envs is much slower, since we have to actually isntantiate the
        # envs. Skipping the rest for now.
        return

        def _check_env_attributes_match(task_id: int, env: gym.Env):
            if modification == "bodyparts":
                size_scales = env.body_name_to_size_scale
                mass_scales = env.body_name_to_mass_scale
                assert size_scales == mass_scales
                assert list(size_scales.values()) == expected_factors_for_env[task_id].tolist()
            elif modification == "gravity":
                gravity = env.gravity
                assert gravity == expected_gravities_for_env[task_id]

        setting.prepare_data()
        for task_id in range(setting.nb_tasks):
            print(f"Testing the 'live' envs for task {task_id}.")
            setting.current_task_id = task_id

            with setting.train_dataloader() as env:
                _check_env_attributes_match(task_id, env)
            with setting.val_dataloader() as env:
                _check_env_attributes_match(task_id, env)
            with setting.test_dataloader() as env:
                _check_env_attributes_match(task_id, env)


@pytest.mark.timeout(120)
def test_action_space_always_matches_obs_batch_size_in_RL(config: Config):
    """ """
    from sequoia.settings import TaskIncrementalRLSetting

    nb_tasks = 2
    batch_size = 1
    setting = TaskIncrementalRLSetting(
        dataset="cartpole",
        nb_tasks=nb_tasks,
        batch_size=batch_size,
        train_max_steps=200,
        test_max_steps=200,
        num_workers=0,
        # monitor_training_performance=True, # This is still a TODO in RL.
    )
    total_samples = len(setting.test_dataloader())

    method = OtherDummyMethod()
    _ = setting.apply(method, config=config)

    expected_encountered_batch_sizes = {batch_size or 1}
    last_batch_size = total_samples % (batch_size or 1)
    if last_batch_size != 0:
        expected_encountered_batch_sizes.add(last_batch_size)
    assert set(method.batch_sizes) == expected_encountered_batch_sizes

    # NOTE: Multiply by nb_tasks because the test loop is ran after each training task.
    actual_num_batches = len(method.batch_sizes)
    expected_num_batches = math.ceil(total_samples / (batch_size or 1)) * nb_tasks
    # MINOR BUG: There's an extra batch for each task. Might make sense, or might not,
    # not sure.
    assert actual_num_batches == expected_num_batches + nb_tasks

    expected_total = total_samples * nb_tasks
    actual_total_obs = sum(method.batch_sizes)
    assert actual_total_obs == expected_total + nb_tasks


@mtenv_required
@pytest.mark.xfail(reason="don't know how to get the max path length through mtenv!")
def test_mtenv_meta_world_support():
    from mtenv import MTEnv, make

    env: MTEnv = make("MT-MetaWorld-MT10-v0")
    env.set_task_state(0)
    env.seed(123)
    env.seed_task(123)
    obs = env.reset()
    assert isinstance(obs, dict)
    assert list(obs.keys()) == ["env_obs", "task_obs"]
    print(obs)
    done = False
    # BUG: No idea how to get the max path length, since I'm getting
    # AttributeError: 'MetaWorldMTWrapper' object has no attribute 'max_path_length'
    steps = 0
    while not done and steps < env.max_path_length:
        obs, reward, done, info = env.step(env.action_space.sample())
        # BUG: Can't render when using metaworld through mtenv, since mtenv *contains* a
        # straight-up copy-pasted old version of meta-world, which doesn't support it.
        env.render()
        steps += 1
    env.close()

    env_obs_space = env.observation_space["env_obs"]
    task_obs_space = env.observation_space["task_obs"]
    # TODO: If the task observation space is Discrete(10), then we can't create a
    # setting with more than 10 tasks! We could add a check for this.
    # TODO: Figure out the default number of tasks depending on the chosen dataset.
    setting = IncrementalRLSetting(dataset="MT-MetaWorld-MT10-v0", nb_tasks=3)
    assert setting.observation_space.x == env_obs_space
    assert setting.nb_tasks == 3

    train_env = setting.train_dataloader()
    assert train_env.observation_space.x == env_obs_space
    assert train_env.observation_space.task_labels == spaces.Discrete(3)

    n_episodes = 1
    for episode in range(n_episodes):
        obs = train_env.reset()
        done = False
        steps = 0
        while not done and steps < env.max_path_length:
            obs, reward, done, info = train_env.step(train_env.action_space.sample())
            # BUG: Can't render meta-world env when using mtenv.
            train_env.render()
            steps += 1


# @pytest.mark.no_xvfb
# @pytest.mark.xfail(reason="TODO: Rethink how we want to integrate MetaWorld envs.")
@pytest.mark.skip(reason="BUG: timeout handler seems to be bugged, test lasts forever")
@metaworld_required
@pytest.mark.timeout(60)
def test_metaworld_support(config: Config):
    """Test using metaworld benchmarks as the dataset of an RL Setting.

    NOTE: Uses either a MetaWorldEnv instance as the `dataset`, or the env id.
    TODO: Need to rethink this, we should instead use one env class per task (where each
    task env goes through a subset of the tasks for training)
    """

    # TODO: Add option of passing a benchmark instance?
    setting = IncrementalRLSetting(
        dataset="MT10",
        config=config,
        max_episode_steps=10,
        train_max_steps=500,
        test_max_steps=500,
    )
    assert setting.nb_tasks == len(setting.train_envs)
    assert setting.nb_tasks == 10
    assert setting.train_max_steps == 500
    assert setting.test_max_steps == 500
    assert setting.train_steps_per_task == 50
    assert setting.test_steps_per_task == 50

    method = DummyMethod()
    results = setting.apply(method, config=config)
    assert results.summary()


@slow
@metaworld_required
@pytest.mark.timeout(180)
@pytest.mark.parametrize("dataset", ["CW10", "CW20"])
def test_continual_world_support(dataset: str, config: Config):
    """Test using CW10 and CW20 benchmarks as the dataset of an RL Setting.

    TODO: This test is quite long to run, in part because metaworld takes like 20
    seconds to load, and there being 20 tasks in CW20
    """
    # TODO: Add option of passing a benchmark instance? That might make it quicker to
    # run tests?
    setting = IncrementalRLSetting(
        dataset=dataset,
        config=config,
    )
    assert setting.nb_tasks == 10 if dataset == "CW10" else 20
    assert setting.train_steps_per_task == 1_000_000
    assert setting.train_max_steps == 1_000_000 * setting.nb_tasks
    assert setting.test_steps_per_task == 10_000
    assert setting.test_max_steps == 10_000 * setting.nb_tasks

    setting = IncrementalRLSetting(
        dataset=dataset,
        config=config,
        max_episode_steps=10,
        train_steps_per_task=50,
        test_steps_per_task=50,
    )
    assert setting.nb_tasks == 10 if dataset == "CW10" else 20
    assert setting.train_steps_per_task == 50
    assert setting.test_steps_per_task == 50
    assert setting.train_max_steps == setting.train_steps_per_task * setting.nb_tasks
    assert setting.test_steps_per_task == setting.test_steps_per_task
    assert setting.test_max_steps == setting.test_steps_per_task * setting.nb_tasks

    assert (
        setting.nb_tasks
        == len(setting.train_envs)
        == len(setting.val_envs)
        == len(setting.test_envs)
    )
    method = DummyMethod()
    results = setting.apply(method, config=config)
    assert method.train_episodes_per_task == [5 for _ in range(setting.nb_tasks)]
    assert results.summary()


@pytest.mark.xfail(reason="Metaworld integration isn't done yet")
@metaworld_required
@pytest.mark.timeout(120)
@pytest.mark.parametrize("pass_env_id_instead_of_env_instance", [True, False])
def test_metaworld_auto_task_schedule(pass_env_id_instead_of_env_instance: bool):
    """Test that when passing just an env id from metaworld and a number of tasks,
    the task schedule is created automatically.
    """
    import metaworld
    from metaworld import MetaWorldEnv

    benchmark = metaworld.ML10()  # Construct the benchmark, sampling tasks

    env_name = "reach-v2"
    env_type: Type[MetaWorldEnv] = benchmark.train_classes[env_name]
    env = env_type()

    # TODO: When not passing a nb_tasks, the number of available tasks for that env
    # is used.
    # setting = TaskIncrementalRLSetting(
    #     dataset=env_name if pass_env_id_instead_of_env_instance else env,
    #     train_steps_per_task=1000,
    # )
    # assert setting.nb_tasks == 50
    # assert setting.steps_per_task == 1000
    # assert sorted(setting.train_task_schedule.keys()) == list(range(0, 50_000, 1000))

    # Test passing a number of tasks:

    with pytest.warns(RuntimeWarning):
        setting = TaskIncrementalRLSetting(
            dataset=env_name if pass_env_id_instead_of_env_instance else env,
            train_max_steps=2000,
            nb_tasks=2,
            test_max_steps=2000,
            transforms=[],
        )
    assert setting.nb_tasks == 2
    assert setting.steps_per_task == 1000
    assert sorted(setting.train_task_schedule.keys()) == list(range(0, 2000, 1000))
    from sequoia.common.metrics.rl_metrics import EpisodeMetrics

    method = DummyMethod()
    with pytest.warns(RuntimeWarning):
        results: IncrementalRLSetting.Results[EpisodeMetrics] = setting.apply(method)
    # TODO: Don't know if these values make sense! Rewards are super high, not sure if
    # that's normal in Mujoco/metaworld envs:
    # "Average": {
    #     "Episodes": 66,
    #     "Mean reward per episode": 13622.872306005293,
    #     "Mean reward per step": 90.81914870670195
    # }
    # assert 50 < results.average_final_performance.episodes
    # assert 10_000 < results.average_final_performance.mean_reward_per_episode
    # assert 100 < results.average_final_performance.mean_episode_length <= 150


@pytest.mark.xfail(reason="WIP: Adding dm_control support")
def test_dm_control_support():
    import numpy as np
    from dm_control import suite

    # Load one task:
    env = suite.load(domain_name="cartpole", task_name="swingup")

    # Iterate over a task set:
    for domain_name, task_name in suite.BENCHMARKING:
        task_env = suite.load(domain_name, task_name)

    # Step through an episode and print out reward, discount and observation.
    action_spec = env.action_spec()
    time_step = env.reset()
    while not time_step.last():
        action = np.random.uniform(action_spec.minimum, action_spec.maximum, size=action_spec.shape)
        time_step = env.step(action)
        print(time_step.reward, time_step.discount, time_step.observation)


# TODO: Use the task schedule as a way to specify how long each task lasts in a
# given env? For instance:


class PeriodTypeEnum(enum.Enum):
    STEPS = enum.auto()
    EPISODES = enum.auto()


class Period(NamedTuple):
    value: int
    type: PeriodTypeEnum = PeriodTypeEnum.STEPS


steps = lambda v: Period(value=v, type=PeriodTypeEnum.STEPS)
episodes = lambda v: Period(value=v, type=PeriodTypeEnum.EPISODES)

train_task_schedule = {
    steps(10): "CartPole-v0",
    episodes(1000): "ALE/Breakout-v5",
}

from gym.wrappers import TimeLimit


def make_random_cartpole_env(gravity_scale: float):
    env = gym.make("CartPole-v1")
    env = TimeLimit(env, max_episode_steps=50)
    env.unwrapped.gravity *= gravity_scale
    return env


class TestPassingEnvsForEachTask:
    """Tests that have to do with the feature of passing the list of environments to
    use for each task.
    """

    def test_raises_warning_when_envs_have_different_obs_spaces(self):
        task_envs = ["CartPole-v0", "Pendulum-v1"]
        with pytest.warns(RuntimeWarning, match="doesn't have the same observation space"):
            setting = IncrementalRLSetting(train_envs=task_envs)
            setting.train_dataloader()

    def test_passing_env_fns_for_each_task(self):
        nb_tasks = 3
        gravity_scales = [0.5 + random.random() for _ in range(nb_tasks)]

        # task_envs = ["CartPole-v0", "CartPole-v1"]
        task_envs = [
            functools.partial(make_random_cartpole_env, gravity_scales[i]) for i in range(nb_tasks)
        ]
        base_env = make_random_cartpole_env(gravity_scale=1.0)

        setting = IncrementalRLSetting(train_envs=task_envs)
        assert setting.nb_tasks == nb_tasks

        # TODO: Using 'no-op' task schedules, rather than empty ones.
        # This fixes a bug with the creation of the test environment.
        assert not any(setting.train_task_schedule.values())
        assert not any(setting.val_task_schedule.values())
        assert not any(setting.test_task_schedule.values())
        # assert not setting.train_task_schedule
        # assert not setting.val_task_schedule
        # assert not setting.test_task_schedule

        # assert len(setting.train_task_schedule.keys()) == 2

        setting.current_task_id = 0

        train_env = setting.train_dataloader()
        assert train_env.gravity == base_env.gravity * gravity_scales[0]

        setting.current_task_id = 1

        train_env = setting.train_dataloader()
        assert train_env.gravity == base_env.gravity * gravity_scales[1]

        assert isinstance(train_env.unwrapped, CartPoleEnv)

        # Not sure, do we want to add a 'observation_spaces`, `action_spaces` and
        # `reward_spaces` properties?
        assert setting.observation_space.x == train_env.observation_space.x
        if setting.task_labels_at_train_time:
            # TODO: Either add a `__getattr__` proxy on the Sparse space, or create
            # dedicated `SparseDiscrete`, `SparseBox` etc spaces so that we eventually
            # get to use `space.n` on a Sparse space.
            assert train_env.observation_space.task_labels == spaces.Discrete(setting.nb_tasks)
            sparsity = 0.0 if setting.task_labels_at_test_time else 0.5
            assert setting.observation_space.task_labels == Sparse(
                spaces.Discrete(setting.nb_tasks),
                sparsity=sparsity,
            )

    def test_passing_env_for_each_task(self):
        nb_tasks = 3
        gravity_scales = [0.5 + random.random() for _ in range(nb_tasks)]

        # task_envs = ["CartPole-v0", "CartPole-v1"]
        task_envs = [make_random_cartpole_env(gravity_scales[i]) for i in range(nb_tasks)]
        base_env = make_random_cartpole_env(1.0)
        setting = IncrementalRLSetting(train_envs=task_envs)
        assert setting.nb_tasks == nb_tasks

        # TODO: Using 'no-op' task schedules, rather than empty ones.
        # This fixes a bug with the creation of the test environment.
        assert not any(setting.train_task_schedule.values())
        assert not any(setting.val_task_schedule.values())
        assert not any(setting.test_task_schedule.values())
        # assert not setting.train_task_schedule
        # assert not setting.val_task_schedule
        # assert not setting.test_task_schedule

        # assert len(setting.train_task_schedule.keys()) == 2

        setting.current_task_id = 0

        train_env = setting.train_dataloader()
        assert train_env.gravity == base_env.gravity * gravity_scales[0]

        setting.current_task_id = 1

        train_env = setting.train_dataloader()
        assert train_env.gravity == base_env.gravity * gravity_scales[1]

        assert isinstance(train_env.unwrapped, CartPoleEnv)

        # Not sure, do we want to add a 'observation_spaces`, `action_spaces` and
        # `reward_spaces` properties?
        assert setting.observation_space.x == train_env.observation_space.x
        if setting.task_labels_at_train_time:
            # TODO: Either add a `__getattr__` proxy on the Sparse space, or create
            # dedicated `SparseDiscrete`, `SparseBox` etc spaces so that we eventually
            # get to use `space.n` on a Sparse space.
            assert train_env.observation_space.task_labels == spaces.Discrete(setting.nb_tasks)
            sparsity = 0.0 if setting.task_labels_at_test_time else 0.5
            assert setting.observation_space.task_labels == Sparse(
                spaces.Discrete(setting.nb_tasks), sparsity=sparsity
            )

    def test_command_line(self):
        # TODO: If someone passes the same env ids from the command-line, then shouldn't
        # we somehow vary the tasks by changing the level or something?

        setting = IncrementalRLSetting.from_args(argv="--train_envs CartPole-v0 Pendulum-v1")
        assert setting.train_envs == ["CartPole-v0", "Pendulum-v1"]
        # TODO: Not using this:

    def test_raises_warning_when_envs_have_different_obs_spaces(self):
        task_envs = ["CartPole-v1", "Pendulum-v1"]
        with pytest.warns(RuntimeWarning, match="doesn't have the same observation space"):
            setting = IncrementalRLSetting(train_envs=task_envs)
            setting.train_dataloader()

    def test_random_baseline(self):
        nb_tasks = 3
        gravities = [random.random() * 10 for _ in range(nb_tasks)]
        from gym.wrappers import TimeLimit

        # task_envs = ["CartPole-v0", "CartPole-v1"]
        task_envs = [make_random_cartpole_env(i) for i in range(nb_tasks)]
        setting = IncrementalRLSetting(
            train_envs=task_envs, train_max_steps=1000, test_max_steps=1000
        )
        assert setting.nb_tasks == nb_tasks
        method = RandomBaselineMethod()

        results = setting.apply(method)
        assert results.objective > 0


@pytest.mark.xfail(reason=f"Don't yet fully changing the size of the body parts.")
@mujoco_required
def test_incremental_mujoco_like_LPG_FTW():
    """Trying to get the same-ish setup as the "LPG_FTW" experiments

    See https://github.com/Lifelong-ML/LPG-FTW/tree/master/experiments
    """
    nb_tasks = 5
    from sequoia.settings.rl.envs.mujoco import ContinualHalfCheetahEnv

    task_gravity_factors = [random.random() + 0.5 for _ in range(nb_tasks)]
    task_size_scale_factors = [random.random() + 0.5 for _ in range(nb_tasks)]

    task_envs = [
        RenderEnvWrapper(
            ContinualHalfCheetahEnv(
                gravity=task_gravity_factors[task_id] * -9.81,
                body_name_to_size_scale={"torso": task_size_scale_factors[task_id]},
            ),
        )
        for task_id in range(nb_tasks)
    ]

    setting = IncrementalRLSetting(
        train_envs=task_envs,
        train_steps_per_task=10_000,
        train_wrappers=RenderEnvWrapper,
        test_max_steps=10_000,
    )
    assert setting.nb_tasks == nb_tasks

    # NOTE: Same as above: we use a `no-op` task schedule, rather than an empty one.
    assert not any(setting.train_task_schedule.values())
    assert not any(setting.val_task_schedule.values())
    assert not any(setting.test_task_schedule.values())
    # assert not setting.train_task_schedule
    # assert not setting.val_task_schedule
    # assert not setting.test_task_schedule

    method = RandomBaselineMethod()

    # TODO: Using `render=True` causes a silent crash for some reason!
    results = setting.apply(method)
    assert results.objective > 0


================================================
FILE: sequoia/settings/rl/incremental/tasks.py
================================================
""" TODO: Add the tasks for IncrementalRLSetting, on top of the existing tasks from
ContinualRL
"""
import operator
import warnings
from functools import partial, singledispatch
from typing import Callable, List

import gym
import numpy as np

from sequoia.settings.rl.envs import (
    METAWORLD_INSTALLED,
    MTENV_INSTALLED,
    MetaWorldEnv,
    MetaWorldMujocoEnv,
    MTEnv,
    SawyerXYZEnv,
)

from ..discrete.tasks import (
    DiscreteTask,
    _is_supported,
    make_discrete_task,
    sequoia_registry,
    task_sampling_function,
)

IncrementalTask = DiscreteTask


@task_sampling_function(env_registry=sequoia_registry, based_on=make_discrete_task)
@singledispatch
def make_incremental_task(
    env: gym.Env,
    *,
    step: int,
    change_steps: List[int],
    seed: int = None,
    **kwargs,
) -> IncrementalTask:
    """Generic function used by Sequoia's `IncrementalRLSetting` (and its
    descendants) to create a "task" that will be applied to an environment like `env`.

    To add support for a new type of environment, simply register a handler function:
    ```
    @make_incremental_task.register(SomeGymEnvClass)
    def make_incremental_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,):
        return {"my_attribute": random.random()}
    ```
    """
    raise NotImplementedError(f"Don't know how to create an (incremental) task for env {env}")


is_supported = partial(_is_supported, _make_task_function=make_incremental_task)

# def is_supported(
#     env_id: str,
#     env_registry: EnvRegistry = sequoia_registry,
#     _make_task_function: Callable[..., DiscreteTask] = make_incremental_task,
# ) -> bool
#     """ Returns wether Sequoia is able to create (incremental) tasks for the given
#     environment.
#     """
#     return is_supported_by_parent(env_id, env_registry=env_registry, _make_task_function=_make_task_function)

#     return make_incremental_task.is_supported(env_id=env_id, env_registry=env_registry)


if MTENV_INSTALLED:

    @make_incremental_task.register
    def make_task_for_mtenv_env(
        env: MTEnv,
        step: int,
        change_steps: List[int],
        seed: int = None,
        **kwargs,
    ) -> Callable[[MTEnv], None]:
        """Samples a task for an env from MTEnv.

        The Task in this case will be a callable that will call the env's
        `set_task_state` method, passing in an integer (`task`).

        When `seed` is None, then the task will be the same as the task index.
        """
        assert change_steps, "Need task boundaries to construct the task schedule."

        if step not in change_steps:
            raise RuntimeError(
                f"MTENV has discrete tasks (as far as I'm aware), so step {step} "
                f"should be in {change_steps}!"
            )

        task_index = change_steps.index(step)

        task_states = list(range(len(change_steps)))
        if seed is not None:
            # perform a deterministic shuffling of the 'task ids'
            rng = rng or np.random.default_rng(seed)
            rng.shuffle(task_states)

        # NOTE: Task state is an integer for now, but I'm not sure if it can also be
        # something else..
        task_state: int = task_states[task_index]
        return operator.methodcaller("set_task_state", task_state)


if METAWORLD_INSTALLED:

    @make_incremental_task.register(SawyerXYZEnv)
    @make_incremental_task.register(MetaWorldMujocoEnv)
    @make_incremental_task.register(MetaWorldEnv)
    def make_task_for_metaworld_env(
        env: MetaWorldEnv,
        step: int,
        change_steps: List[int] = None,
        seed: int = None,
        **kwargs,
    ) -> Callable[[MetaWorldEnv], None]:
        """Samples a task for an environment from MetaWorld.

        The Task in this case will be a callable that will call the env's
        `set_task` method, passing in a task from the `train_tasks` of the benchmark
        that contains this environment.

        When `seed` is None, then the task will be the same as the task index.
        """
        # TODO: Which benchmark should we use?
        found = False

        assert change_steps, "Need task boundaries to construct the task schedule."

        if step not in change_steps:
            raise RuntimeError(
                f"MTENV has discrete tasks (as far as I'm aware), so step {step} "
                f"should be in {change_steps}!"
            )

        task_index = change_steps.index(step)

        import metaworld

        # TODO: Not sure how exactly we're supposed to use the train_classes vs
        # train_tasks, should it be a MultiTaskEnv within a given env class?
        warnings.warn(RuntimeWarning("This is supposedly not the right way to do it!"))
        env_name = ""
        # Find the benchmark that contains this type of env.
        for benchmark_class in [metaworld.ML10]:
            benchmark = benchmark_class()
            for env_name, env_class in benchmark.train_classes.items():
                if isinstance(env, env_class):
                    # Found the right benchmark that contains this env class, now
                    # create the task schedule using
                    # the tasks.
                    found = True
                    break
            if found:
                break
        if not found:
            raise NotImplementedError(f"Can't find a benchmark with env class {type(env)}!")
        # `benchmark` is here the right benchmark to use to create the tasks.
        training_tasks = [task for task in benchmark.train_tasks if task.env_name == env_name]

        tasks = training_tasks.copy()
        if seed is not None:
            # perform a deterministic shuffling of the 'task ids'
            rng = rng or np.random.default_rng(seed)
            rng.shuffle(tasks)

        task = tasks[task_index]
        return operator.methodcaller("set_task", task)


================================================
FILE: sequoia/settings/rl/multi_task/__init__.py
================================================
from .setting import MultiTaskRLSetting


================================================
FILE: sequoia/settings/rl/multi_task/setting.py
================================================
""" 'Classical' RL setting.
"""
from dataclasses import dataclass
from typing import Callable, List

import gym

from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import constant

from ..task_incremental import TaskIncrementalRLSetting
from ..traditional import TraditionalRLSetting

logger = get_logger(__name__)


@dataclass
class MultiTaskRLSetting(TaskIncrementalRLSetting, TraditionalRLSetting):
    """Reinforcement Learning setting where the environment alternates between a set
    of tasks sampled uniformly.

    Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled
    during training.
    """

    # TODO: Move this into a new Assumption about the context non-stationarity.
    stationary_context: bool = constant(True)

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        Defaults to the number of tasks, but may be different, for instance in so-called
        Multi-Task Settings, this is set to 1.
        """
        return 1

    # TODO: Show how the multi-task wrapper is created here, rather than in the base class.

    def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        return super().create_train_wrappers()

    def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
        """Get the list of wrappers to add to a single test environment.

        The result of this method must be pickleable when using
        multiprocessing.

        Returns
        -------
        List[Callable[[gym.Env], gym.Env]]
            [description]
        """
        if self.stationary_context:
            logger.warning(
                "The test phase will go through all tasks in sequence, rather than "
                "shuffling them! (This is to make it easier to compile the performance "
                "metrics for each task."
            )
        new_random_task_on_reset = False
        # TODO: If we're in the 'Multi-Task RL' setting, then should we maybe change
        # the task schedule, so that we give an equal number of steps per task?
        return self._make_wrappers(
            base_env=self.test_dataset,
            task_schedule=self.test_task_schedule,
            # sharp_task_boundaries=self.known_task_boundaries_at_test_time,
            task_labels_available=self.task_labels_at_test_time,
            transforms=self.test_transforms,
            starting_step=0,
            max_steps=self.test_max_steps,
            new_random_task_on_reset=new_random_task_on_reset,
        )


================================================
FILE: sequoia/settings/rl/multi_task/setting_test.py
================================================
# TODO: Tests for the multi-task RL setting.
from typing import ClassVar, Type

import pytest

from sequoia.settings.rl.setting_test import DummyMethod

from ..task_incremental.setting_test import (
    TestTaskIncrementalRLSetting as TaskIncrementalRLSettingTests,
)
from .setting import MultiTaskRLSetting


class TestMultiTaskRLSetting(TaskIncrementalRLSettingTests):
    Setting: ClassVar[Type[Setting]] = MultiTaskRLSetting
    dataset: pytest.fixture

    # def test_on_task_switch_is_called(self):
    #     setting = self.Setting(
    #         dataset="CartPole-v0",
    #         nb_tasks=5,
    #         # train_steps_per_task=100,
    #         train_max_steps=500,
    #         test_max_steps=500,
    #     )
    #     method = DummyMethod()
    #     _ = setting.apply(method)
    #     assert setting.task_labels_at_test_time
    #     assert False, method.observation_task_labels

    def validate_results(
        self,
        setting: MultiTaskRLSetting,
        method: DummyMethod,
        results: MultiTaskRLSetting.Results,
    ) -> None:
        """Check that the results make sense.
        The Dummy Method used also keeps useful attributes, which we check here.
        """
        assert results
        assert results.objective
        assert setting.stationary_context
        assert len(results.task_results) == setting.nb_tasks
        assert results.average_metrics == sum(
            task_result.average_metrics for task_result in results.task_results
        )
        t = setting.nb_tasks
        p = setting.phases
        assert setting.known_task_boundaries_at_train_time
        assert setting.known_task_boundaries_at_test_time
        assert setting.task_labels_at_train_time
        assert setting.task_labels_at_test_time
        if setting.nb_tasks == 1:
            assert not method.received_task_ids
            assert not method.received_while_training
        else:
            # Only received during testing.
            assert method.received_task_ids == [t_i for t_i in range(t)]
            assert method.received_while_training == [False for _ in range(t)]


================================================
FILE: sequoia/settings/rl/objects.py
================================================
from dataclasses import dataclass
from typing import TypeVar

from torch import Tensor

from sequoia.settings.base import Setting

T = TypeVar("T")


@dataclass(frozen=True)
class Observations(Setting.Observations):
    """Observations in a continual RL Setting."""

    # Input example
    x: Tensor


@dataclass(frozen=True)
class Actions(Setting.Actions):
    pass


# TODO: Replace this 'Rewards' with a 'SparseRewards'-like object for RL, and a
# 'DenseRewards'-like object in SL, rather than use the same in RL and SL.


@dataclass(frozen=True)
class Rewards(Setting.Rewards[T]):
    """Rewards given back by the environment in RL Settings."""


# @dataclass(frozen=True)
# class RLReward(Rewards[T]):
#     reward: T

# @dataclass(frozen=True)
# class SLReward(Rewards[T]):
#     reward: T
#     y: Sequence[T]


ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)

# from .environment import RLEnvironment as Environment


================================================
FILE: sequoia/settings/rl/setting.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Type

from sequoia.settings.base import Setting
from sequoia.settings.base.environment import ActionType, ObservationType, RewardType

from .environment import RLEnvironment
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType


@dataclass
class RLSetting(Setting[RLEnvironment[ObservationType, ActionType, RewardType]]):
    """LightningDataModule for an 'active' setting.

    This is to be the parent of settings like RL or maybe Active Learning.
    """

    Observations: ClassVar[Type[ObservationType]] = Observations
    Actions: ClassVar[Type[ActionType]] = Actions
    Rewards: ClassVar[Type[RewardType]] = Rewards


================================================
FILE: sequoia/settings/rl/setting_test.py
================================================
""" Utilities used in tests for the RL Settings. """
from typing import Any, Callable, Dict, List, Optional
import warnings

from sequoia.common.gym_wrappers import IterableWrapper
from sequoia.methods import RandomBaselineMethod
from sequoia.settings.base import Environment
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


class DummyMethod(RandomBaselineMethod):
    """Random baseline method used for debugging the (RL) settings.

    TODO: Remove the other `DummyMethod` variants, replace them with this.
    """

    def __init__(
        self,
        additional_train_wrappers: List[Callable[[Environment], Environment]] = None,
        additional_valid_wrappers: List[Callable[[Environment], Environment]] = None,
    ):
        super().__init__()
        # Wrappers to be added to the train/val environments to debug/test that the
        # setting's environments work correctly.
        self.train_env: Optional[Environment] = None
        self.valid_env: Optional[Environment] = None
        self.additional_train_wrappers = additional_train_wrappers or []
        self.additional_valid_wrappers = additional_valid_wrappers or []
        self.all_train_values = []
        self.all_valid_values = []
        self.observation_task_labels: List[Any] = []
        self.n_fit_calls = 0
        self.n_task_switches = 0
        self.received_task_ids: List[Optional[int]] = []
        self.received_while_training: List[bool] = []
        self.train_steps_per_task: List[int] = []
        self.train_episodes_per_task: List[int] = []
        self._has_been_configured_before = False

        self.changing_attributes: List[str] = []

    def configure(self, setting):
        if self._has_been_configured_before:
            raise RuntimeError("Can't reuse this Method across Settings for now.")
        self._has_been_configured_before = True
        # The attributes to look for changes with.
        self.changing_attributes = list(
            set().union(*[task.keys() for task in setting.train_task_schedule.values()])
        )
        self.train_env = None
        self.valid_env = None

    def fit(
        self,
        train_env: Environment,
        valid_env: Environment,
    ):
        # Add wrappers, if necessary.
        for wrapper in self.additional_train_wrappers:
            train_env = wrapper(train_env)
        for wrapper in self.additional_valid_wrappers:
            valid_env = wrapper(valid_env)

        train_env = CheckAttributesWrapper(train_env, attributes=self.changing_attributes)
        valid_env = CheckAttributesWrapper(valid_env, attributes=self.changing_attributes)
        self.train_env = train_env
        self.valid_env = valid_env
        # TODO: Replace the loop below with adding soem wrappers around the train/valid envs, and
        # just delegate to super().fit (so we use the RandomBaselineMethod).
        # return super().fit(train_env, valid_env)

        episodes = 0
        val_interval = 10
        total_steps = 0
        self.train_steps_per_task.append(0)
        self.train_episodes_per_task.append(0)
        import tqdm

        train_pbar = tqdm.tqdm(desc="Fake training")
        while not train_env.is_closed():
            obs = train_env.reset()
            task_labels = obs.task_labels
            if task_labels is None or isinstance(task_labels, int) or not task_labels.shape:
                task_labels = [task_labels]
            self.observation_task_labels.extend(task_labels)
            attr_dict = {attr: getattr(train_env, attr) for attr in self.changing_attributes}
            logger.debug(f"Start of episode #{episodes}: {attr_dict}")
            done = False
            while not done and not train_env.is_closed():
                actions = train_env.action_space.sample()
                # print(train_env.current_task)
                obs, rew, done, info = train_env.step(actions)
                total_steps += 1
                self.train_steps_per_task[-1] += 1
                train_pbar.update()
                train_pbar.set_postfix({"episodes": episodes, "total steps": total_steps})
            episodes += 1
            self.train_episodes_per_task[-1] += 1

            if episodes % val_interval == 0 and not valid_env.is_closed():
                # Perform one 'validation' episode.
                obs = valid_env.reset()
                done = False
                while not done and not valid_env.is_closed():
                    actions = valid_env.action_space.sample()
                    obs, rew, done, info = valid_env.step(actions)

            if self.max_train_episodes is not None and episodes < self.max_train_episodes:
                break

        self.all_train_values.append(self.train_env.values)
        self.all_valid_values.append(self.valid_env.values)
        self.n_fit_calls += 1

    def on_task_switch(self, task_id: Optional[int] = None):
        self.n_task_switches += 1
        self.received_task_ids.append(task_id)
        self.received_while_training.append(self.training)


class CheckAttributesWrapper(IterableWrapper):
    """Wrapper that stores the value of a given attribute at each step."""

    def __init__(self, env, attributes: List[str]):
        super().__init__(env)
        self.attributes = attributes
        self.values: Dict[int, Dict[str, Any]] = {}
        self.steps = 0

    def _store_current_attributes(self):
        if self.steps not in self.values:
            self.values[self.steps] = {}
        for attribute in self.attributes:
            value = getattr(self.env, attribute)
            unwrapped_value = getattr(self.env.unwrapped, attribute)
            assert value == unwrapped_value, (attribute, value, unwrapped_value)
            self.values[self.steps][attribute] = value

    def step(self, action):
        self._store_current_attributes()
        result = super().step(action)
        self.steps += 1
        self._store_current_attributes()
        return result


================================================
FILE: sequoia/settings/rl/task_incremental/__init__.py
================================================
from .setting import TaskIncrementalRLSetting


================================================
FILE: sequoia/settings/rl/task_incremental/setting.py
================================================
from dataclasses import dataclass

from sequoia.utils.utils import constant

from ..incremental import IncrementalRLSetting


@dataclass
class TaskIncrementalRLSetting(IncrementalRLSetting):
    """Continual RL setting with clear task boundaries and task labels.

    The task labels are given at both train and test time.
    """

    task_labels_at_train_time: bool = constant(True)
    task_labels_at_test_time: bool = constant(True)


================================================
FILE: sequoia/settings/rl/task_incremental/setting_test.py
================================================
from typing import ClassVar, List, Type

import pytest

from sequoia.common.gym_wrappers import MultiTaskEnvironment
from sequoia.settings.rl.incremental.setting_test import (
    TestIncrementalRLSetting as IncrementalRLSettingTests,
)

from .setting import TaskIncrementalRLSetting


class TestTaskIncrementalRLSetting(IncrementalRLSettingTests):
    Setting: ClassVar[Type[Setting]] = TaskIncrementalRLSetting
    dataset: pytest.fixture


def test_task_label_space_of_env_has_right_n():
    setting = TaskIncrementalRLSetting(dataset="MountainCarContinuous-v0")
    default_nb_tasks = setting.nb_tasks
    assert setting.observation_space.task_labels.n == default_nb_tasks
    assert setting.train_dataloader().observation_space.task_labels.n == default_nb_tasks
    assert setting.val_dataloader().observation_space.task_labels.n == default_nb_tasks
    assert setting.test_dataloader().observation_space.task_labels.n == default_nb_tasks


def test_task_schedule_is_used():
    """Test that the tasks are switching over time."""
    setting = TaskIncrementalRLSetting(
        dataset="CartPole-v0",
        train_max_steps=100,
        nb_tasks=2,
    )

    default_length = 0.5

    for task_id in range(2):
        setting.current_task_id = task_id

        env = setting.train_dataloader(batch_size=None)
        env: MultiTaskEnvironment
        assert len(setting.train_task_schedule) == 3
        assert len(setting.val_task_schedule) == 3
        assert len(setting.test_task_schedule) == 3

        starting_length = env.length

        _ = env.reset()
        lengths: List[float] = []
        for i in range(setting.steps_per_phase):
            obs, reward, done, info = env.step(env.action_space.sample())
            # NOTE: If we're done on the last step, we can't reset, since that would go
            # over the step budget.
            if done and i != setting.steps_per_phase - 1:
                env.reset()
            # Get the length of the pole from the environment.
            length = env.length
            lengths.append(length)

        if task_id == 0:
            assert starting_length == default_length
            assert all(length == default_length for length in lengths)

        else:
            # The length of the pole is different than the default length
            assert starting_length != default_length
            # The length shouldn't be changing over time.
            assert all(length == starting_length for length in lengths)


================================================
FILE: sequoia/settings/rl/task_incremental/tasks.py
================================================
from ..incremental.tasks import make_incremental_task

# NOTE: For now there aren't any tasks specific to only task-incremental.
make_task_incremental_task = make_incremental_task
is_supported = make_task_incremental_task.is_supported


================================================
FILE: sequoia/settings/rl/traditional/__init__.py
================================================
from .setting import TraditionalRLSetting


================================================
FILE: sequoia/settings/rl/traditional/setting.py
================================================
""" 'Classical' RL setting.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict

from simple_parsing.helpers import choice
from typing_extensions import Final

from sequoia.utils.utils import constant

# NOTE: We can reuse those results for now, since they describe the same thing.
from ..discrete.results import DiscreteTaskAgnosticRLResults as TraditionalRLResults
from ..incremental import IncrementalRLSetting


@dataclass
class TraditionalRLSetting(IncrementalRLSetting):
    """Your usual "Classical" Reinforcement Learning setting.

    Implemented as a MultiTaskRLSetting, but with a single task.
    """

    # Class variable that holds the dict of available environments.
    available_datasets: ClassVar[Dict[str, str]] = IncrementalRLSetting.available_datasets.copy()
    # Which dataset/environment to use for training, validation and testing.
    dataset: str = choice(available_datasets, default="CartPole-v0")

    # IDEA: By default, only use one task, although there may actually be more than one.
    nb_tasks: int = 5

    stationary_context: Final[bool] = constant(True)
    known_task_boundaries_at_train_time: Final[bool] = constant(True)
    task_labels_at_train_time: Final[bool] = constant(True)
    task_labels_at_test_time: bool = False

    # Results: ClassVar[Type[Results]] = TaskSequenceResults

    def __post_init__(self):
        super().__post_init__()
        assert self.stationary_context

    def apply(self, method, config=None):
        results: IncrementalRLSetting.Results = super().apply(method, config=config)
        assert len(results.task_sequence_results) == 1
        return results.task_sequence_results[0]
        # result: TraditionalRLResults = TraditionalRLResults(task_results=results.task_sequence_results[0].task_results)
        result: TraditionalRLResults = results.task_sequence_results[0]
        # assert False, result._runtime
        return result

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        Defaults to the number of tasks, but may be different, for instance in so-called
        Multi-Task Settings, this is set to 1.
        """
        return 1


================================================
FILE: sequoia/settings/rl/traditional/setting_test.py
================================================
# TODO: Tests for the "traditional" RL setting.
from typing import ClassVar, Type

import pytest
import torch

from sequoia.settings.assumptions.incremental_results import TaskSequenceResults
from sequoia.settings.rl.setting_test import DummyMethod

from ..incremental.setting_test import TestIncrementalRLSetting as IncrementalRLSettingTests
from .setting import TraditionalRLSetting


class TestTraditionalRLSetting(IncrementalRLSettingTests):
    Setting: ClassVar[Type[Setting]] = TraditionalRLSetting
    dataset: pytest.fixture

    def test_on_task_switch_is_called(self):
        setting = self.Setting(
            dataset="CartPole-v0",
            nb_tasks=5,
            # train_steps_per_task=100,
            train_max_steps=500,
            test_max_steps=500,
        )
        assert setting.stationary_context
        method = DummyMethod()
        _ = setting.apply(method)
        # assert setting.task_labels_at_test_time
        # assert False, method.observation_task_labels
        assert method.n_fit_calls == 1
        import torch

        assert torch.unique_consecutive(
            torch.as_tensor(method.observation_task_labels)
        ).tolist() != list(range(setting.nb_tasks))

    def validate_results(
        self,
        setting: TraditionalRLSetting,
        method: DummyMethod,
        results: TraditionalRLSetting.Results,
    ) -> None:
        """Check that the results make sense.
        The Dummy Method used also keeps useful attributes, which we check here.
        """
        assert results
        assert results.objective
        assert setting.stationary_context
        assert len(results.task_results) == setting.nb_tasks
        assert results.average_metrics == sum(
            task_result.average_metrics for task_result in results.task_results
        )
        t = setting.nb_tasks
        p = setting.phases
        assert setting.known_task_boundaries_at_train_time
        assert setting.known_task_boundaries_at_test_time
        assert setting.task_labels_at_train_time
        assert not setting.task_labels_at_test_time
        if setting.nb_tasks == 1:
            assert not method.received_task_ids
            assert not method.received_while_training
        else:
            # Only received during testing.
            assert method.n_task_switches == t
            assert method.received_task_ids == [None for t_i in range(t)]
            assert method.received_while_training == [False for _ in range(t)]

    def validate_results(
        self,
        setting: TraditionalRLSetting,
        method: DummyMethod,
        results: TraditionalRLSetting.Results,
    ) -> None:
        assert results
        assert results.objective
        assert isinstance(results, TaskSequenceResults)
        assert len(results.task_results) == setting.nb_tasks
        assert results.average_metrics == sum(
            task_result.average_metrics for task_result in results.task_results
        )
        assert method.n_fit_calls == 1

        # BUG: Traditional/Multi-Task RL have one too many task labels:
        assert list(set(method.observation_task_labels)) == list(range(setting.nb_tasks))

        train_task_labels = torch.as_tensor(method.observation_task_labels)
        new_train_task_labels = torch.unique_consecutive(train_task_labels).tolist()
        if setting.nb_tasks > 1:
            assert new_train_task_labels != list(range(setting.nb_tasks))
        else:
            assert set(method.observation_task_labels) == {0}


================================================
FILE: sequoia/settings/rl/wrappers/__init__.py
================================================
""" Wrappers specific to the RL settings, so not exactly as general as those in
`common/gym_wrappers`.
"""
from .measure_performance import MeasureRLPerformanceWrapper
from .task_labels import HideTaskLabelsWrapper, RemoveTaskLabelsWrapper
from .typed_objects import NoTypedObjectsWrapper, TypedObjectsWrapper


================================================
FILE: sequoia/settings/rl/wrappers/measure_performance.py
================================================
""" TODO: Create a Wrapper that measures performance over the first epoch of training in SL.

Then maybe after we can make something more general that also works for RL.
"""

from typing import Any, Dict, List, Optional, Sequence, Union

import numpy as np
from torch import Tensor

import wandb
from sequoia.common.gym_wrappers.measure_performance import MeasurePerformanceWrapper
from sequoia.common.metrics import Metrics
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.base import Actions, Observations, Rewards
from sequoia.settings.rl import ActiveEnvironment
from sequoia.utils.utils import add_prefix


class MeasureRLPerformanceWrapper(
    MeasurePerformanceWrapper
    # MeasurePerformanceWrapper[ActiveEnvironment]  # python 3.7
    # MeasurePerformanceWrapper[ActiveEnvironment, EpisodeMetrics] # python 3.8+
):
    def __init__(
        self,
        env: ActiveEnvironment,
        eval_episodes: int = None,
        eval_steps: int = None,
        wandb_prefix: str = None,
    ):
        super().__init__(env)
        self._metrics: Dict[int, EpisodeMetrics] = {}
        self._eval_episodes = eval_episodes or 0
        self._eval_steps = eval_steps or 0
        # Counter for the number of steps.
        self._steps: int = 0
        # Counter for the number of episodes
        self._episodes: int = 0
        self.wandb_prefix = wandb_prefix

        self._batch_size = self.env.num_envs if self.is_vectorized else 1

        self._current_episode_reward = np.zeros([self._batch_size], dtype=float)
        self._current_episode_steps = np.zeros([self._batch_size], dtype=int)

    @property
    def in_evaluation_period(self) -> bool:
        """Returns wether the performance is currently being monitored.

        Returns
        -------
        bool
            Wether or not the performance on the env is being monitored.
        """
        if self._eval_steps:
            return self._steps <= self._eval_steps
        if self._eval_episodes:
            return self._eval_episodes <= self._eval_episodes
        return True

    def reset(self) -> Union[Observations, Any]:
        obs = super().reset()
        # assert isinstance(obs, Observations)
        return obs

    def step(self, action: Actions):
        observation, rewards_, done, info = super().step(action)
        self._steps += 1
        reward = rewards_.y if isinstance(rewards_, Rewards) else rewards_

        if isinstance(done, bool):
            self._episodes += int(done)
        elif isinstance(done, np.ndarray):
            self._episodes += sum(done)
        else:
            self._episodes += done.int().sum()

        if self.in_evaluation_period:
            if self.is_vectorized:
                for env_index, (env_is_done, env_reward) in enumerate(zip(done, reward)):
                    self._current_episode_reward[env_index] += env_reward
                    self._current_episode_steps[env_index] += 1
            else:
                self._current_episode_reward[0] += reward
                self._current_episode_steps[0] += 1

            metrics = self.get_metrics(action, reward, done)

            if metrics is not None:
                assert self._steps not in self._metrics, "two metrics at same step?"
                self._metrics[self._steps] = metrics

        return observation, rewards_, done, info

    # def send(self, action: Actions) -> Rewards:
    # self.action_ = action
    # rewards_ = super().send(action)
    # self._steps += 1
    # reward = rewards_.y if isinstance(rewards_, Rewards) else rewards_

    # # TODO: Need access to the "done" signal in here somehow.
    # done = self.done_

    # if isinstance(done, bool):
    #     self._episodes += int(done)
    # elif isinstance(done, np.ndarray):
    #     self._episodes += sum(done)
    # else:
    #     self._episodes += done.int().sum()

    # if self.in_evaluation_period:
    #     if self.is_vectorized:
    #         for env_index, (env_is_done, env_reward) in enumerate(
    #             zip(done, reward)
    #         ):
    #             self._current_episode_reward[env_index] += env_reward
    #             self._current_episode_steps[env_index] += 1
    #     else:
    #         self._current_episode_reward[0] += reward
    #         self._current_episode_steps[0] += 1

    #     metrics = self.get_metrics(action, reward, done)

    #     if metrics is not None:
    #         assert self._steps not in self._metrics, "two metrics at same step?"
    #         self._metrics[self._steps] = metrics

    # return rewards_

    def get_metrics(
        self,
        action: Union[Actions, Any],
        reward: Union[Rewards, Any],
        done: Union[bool, Sequence[bool]],
    ) -> Optional[EpisodeMetrics]:
        # TODO: Add some metric about the entropy of the policy's distribution?
        rewards = reward.y if isinstance(reward, Rewards) else reward
        actions = action.y_pred if isinstance(action, Actions) else action
        dones: Sequence[bool]
        if not self.is_vectorized:
            rewards = [rewards]
            actions = [actions]
            assert isinstance(done, bool)
            dones = [done]
        else:
            assert isinstance(done, (np.ndarray, Tensor))
            dones = done

        metrics: List[EpisodeMetrics] = []
        for env_index, (env_is_done, reward) in enumerate(zip(dones, rewards)):
            if env_is_done:
                metrics.append(
                    EpisodeMetrics(
                        n_samples=1,
                        # The average reward per episode.
                        mean_episode_reward=self._current_episode_reward[env_index],
                        # The average length of each episode.
                        mean_episode_length=self._current_episode_steps[env_index],
                    )
                )
                self._current_episode_reward[env_index] = 0
                self._current_episode_steps[env_index] = 0

        if not metrics:
            return None

        metric = sum(metrics, Metrics())
        if wandb.run:
            log_dict = metric.to_log_dict()
            if self.wandb_prefix:
                log_dict = add_prefix(log_dict, prefix=self.wandb_prefix, sep="/")
            log_dict["steps"] = self._steps
            log_dict["episode"] = self._episodes
            wandb.log(log_dict)

        return metric


================================================
FILE: sequoia/settings/rl/wrappers/measure_performance_test.py
================================================
import itertools
from functools import partial
from itertools import accumulate

import numpy as np
import pytest
from gym.vector import SyncVectorEnv

# from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.common.gym_wrappers import EnvDataset
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.conftest import DummyEnvironment

from .measure_performance import MeasureRLPerformanceWrapper


def test_measure_RL_performance_basics():
    env = DummyEnvironment(start=0, target=5, max_value=10)

    # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards)

    env = MeasureRLPerformanceWrapper(env)
    env.seed(123)
    all_episode_rewards = []
    all_episode_steps = []

    for episode in range(5):
        episode_steps = 0
        episode_reward = 0
        obs = env.reset()
        print(f"Episode {episode}, obs: {obs}")
        done = False
        while not done:
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            episode_reward += reward
            episode_steps += 1
            # print(obs, reward, done, info)

        all_episode_steps.append(episode_steps)
        all_episode_rewards.append(episode_reward)
    from itertools import accumulate

    expected_metrics = {}
    for episode_steps, cumul_step, episode_reward in zip(
        all_episode_steps, accumulate(all_episode_steps), all_episode_rewards
    ):
        expected_metrics[cumul_step] = EpisodeMetrics(
            n_samples=1,
            mean_episode_reward=episode_reward,
            mean_episode_length=episode_steps,
        )

    assert env.get_online_performance() == expected_metrics


def test_measure_RL_performance_iteration():
    env = DummyEnvironment(start=0, target=5, max_value=10)
    from gym.wrappers import TimeLimit

    max_episode_steps = 50
    env = EnvDataset(env)
    env = TimeLimit(env, max_episode_steps=max_episode_steps)

    # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards)

    env = MeasureRLPerformanceWrapper(env)
    env.seed(123)
    all_episode_rewards = []
    all_episode_steps = []

    for episode in range(5):
        episode_steps = 0
        episode_reward = 0
        for step, obs in enumerate(env):
            print(f"Episode {episode}, obs: {obs}")
            action = env.action_space.sample()
            reward = env.send(action)
            episode_reward += reward
            episode_steps += 1
            # print(obs, reward, done, info)
            assert step <= max_episode_steps, "shouldn't be able to iterate longer than that."

        all_episode_steps.append(episode_steps)
        all_episode_rewards.append(episode_reward)

    expected_metrics = {}
    for episode_steps, cumul_step, episode_reward in zip(
        all_episode_steps, accumulate(all_episode_steps), all_episode_rewards
    ):
        expected_metrics[cumul_step] = EpisodeMetrics(
            n_samples=1,
            mean_episode_reward=episode_reward,
            mean_episode_length=episode_steps,
        )

    assert env.get_online_performance() == expected_metrics


@pytest.mark.xfail(
    reason=f"TODO: The wrapper seems to works but the test condition is too complicated"
)
def test_measure_RL_performance_batched_env():
    batch_size = 3
    start = [i for i in range(batch_size)]
    target = 5
    env = EnvDataset(
        SyncVectorEnv(
            [
                partial(DummyEnvironment, start=start[i], target=target, max_value=target * 2)
                for i in range(batch_size)
            ]
        )
    )
    # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards)

    env = MeasureRLPerformanceWrapper(env)
    env.seed(123)
    all_episode_rewards = []
    all_episode_steps = []

    for step, obs in enumerate(itertools.islice(env, 100)):
        print(f"step {step} obs: {obs}")
        action = np.ones(batch_size)  # always increment the counter
        reward = env.send(action)
        print(env.done_)
        # print(obs, reward, done, info)
    assert step == 99
    from collections import defaultdict

    from sequoia.common.metrics import Metrics

    expected_metrics = defaultdict(Metrics)
    for i in range(101):
        for env_index in range(batch_size):
            if i and i % target == 0:
                expected_metrics[i] += EpisodeMetrics(
                    n_samples=1,
                    mean_episode_reward=10.0,  # ? FIXME: Actually understand this condition
                    mean_episode_length=target,
                )

            # FIXME: This test is a bit too complicated, hard to follow. I'll keep the
            # batches synced-up for now.
            # if i > 0 and (i + env_index) % target == 0:
            #     expected_metrics[i] += EpisodeMetrics(
            #         n_samples=1,
            #         mean_episode_reward=sum(target - (i + env_index % target) for j in range(start[env_index], target)),
            #         mean_episode_length=target - start[env_index] - 1
            #     )

    assert env.get_online_performance() == expected_metrics


================================================
FILE: sequoia/settings/rl/wrappers/no_typed_objects.py
================================================


================================================
FILE: sequoia/settings/rl/wrappers/task_labels.py
================================================
from collections.abc import Mapping
from dataclasses import is_dataclass, replace
from functools import singledispatch
from typing import Any, Dict, Optional, Tuple, TypeVar, Union

import gym
from gym import Space, spaces

from sequoia.common import Batch
from sequoia.common.gym_wrappers import IterableWrapper, TransformObservation
from sequoia.common.gym_wrappers.multi_task_environment import add_task_labels
from sequoia.common.gym_wrappers.utils import IterableWrapper
from sequoia.common.spaces import Sparse, TypedDictSpace
from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.settings.base.objects import ObservationType

T = TypeVar("T")


@singledispatch
def hide_task_labels(observation: Tuple[T, int]) -> Tuple[T, Optional[int]]:
    assert len(observation) == 2
    return observation[0], None


@hide_task_labels.register(dict)
def _hide_task_labels_in_dict(observation: Dict) -> Dict:
    new_observation = observation.copy()
    assert "task_labels" in observation
    new_observation["task_labels"] = None
    return new_observation


@hide_task_labels.register
def _hide_task_labels_on_batch(observation: Batch) -> Batch:
    return replace(observation, task_labels=None)


@hide_task_labels.register(Space)
def hide_task_labels_in_space(observation: Space) -> Space:
    raise NotImplementedError(
        f"TODO: Don't know how to remove task labels from space {observation}."
    )


@hide_task_labels.register
def _hide_task_labels_in_namedtuple_space(
    observation: NamedTupleSpace,
) -> NamedTupleSpace:
    spaces = observation._spaces.copy()
    task_label_space = spaces["task_labels"]

    if isinstance(task_label_space, Sparse):
        if task_label_space.sparsity == 1.0:
            # No need to change anything:
            return observation
        # Replace the existing 'Sparse' space with another one with the same
        # base but with sparsity = 1.0
        task_label_space = task_label_space.base

    assert not isinstance(task_label_space, Sparse)
    task_label_space = Sparse(task_label_space, sparsity=1.0)
    spaces["task_labels"] = task_label_space
    return type(observation)(**spaces)


@hide_task_labels.register
def _hide_task_labels_in_tuple_space(observation: spaces.Tuple) -> spaces.Tuple:
    assert len(observation.spaces) == 2, "ambiguous"

    task_label_space = observation.spaces[1]
    if isinstance(task_label_space, Sparse):
        # Replace the existing 'Sparse' space with another one with the same
        # base but with sparsity = 1.0
        task_label_space = task_label_space.base
    assert not isinstance(task_label_space, Sparse)
    # We set the task label space as sparse, instead of removing that space.
    return spaces.Tuple([observation[0], Sparse(task_label_space, sparsity=1.0)])


@hide_task_labels.register
def hide_task_labels_in_dict_space(observation: spaces.Dict) -> spaces.Dict:
    task_label_space = observation.spaces["task_labels"]
    if isinstance(task_label_space, Sparse):
        # Replace the existing 'Sparse' space with another one with the same
        # base but with sparsity = 1.0
        task_label_space = task_label_space.base
    assert not isinstance(task_label_space, Sparse)
    return type(observation)(
        {
            key: subspace if key != "task_labels" else Sparse(task_label_space, 1.0)
            for key, subspace in observation.spaces.items()
        }
    )


@hide_task_labels.register(TypedDictSpace)
def hide_task_labels_in_typed_dict_space(
    observation: TypedDictSpace[T],
) -> TypedDictSpace[T]:
    task_label_space = observation.spaces["task_labels"]
    if isinstance(task_label_space, Sparse):
        # Replace the existing 'Sparse' space with another one with the same
        # base but with sparsity = 1.0
        task_label_space = task_label_space.base
    assert not isinstance(task_label_space, Sparse)
    return type(observation)(
        {
            key: subspace if key != "task_labels" else Sparse(task_label_space, 1.0)
            for key, subspace in observation.spaces.items()
        },
        dtype=observation.dtype,
    )


class HideTaskLabelsWrapper(TransformObservation):
    """Hides the task labels by setting them to None, rather than removing them
    entirely.

    This might be useful in order not to break the inheritance 'contract' when
    going from contexts where you don't have the task labels to contexts where
    you do have them.
    """

    def __init__(self, env: gym.Env, f=hide_task_labels):
        super().__init__(env, f=f)
        self.observation_space = hide_task_labels(self.env.observation_space)


@singledispatch
def remove_task_labels(observation: Any) -> Any:
    """Removes the task labels from an observation / observation space."""
    if is_dataclass(observation):
        return replace(observation, task_labels=None)
    raise NotImplementedError(
        f"No handler registered for value {observation} of type {type(observation)}"
    )


@remove_task_labels.register(spaces.Tuple)
@remove_task_labels.register(tuple)
def _(observation: Tuple[T, Any]) -> Tuple[T]:
    if len(observation) == 2:
        return observation[1]
    if len(observation) == 1:
        return observation[0]
    raise NotImplementedError(observation)


@remove_task_labels.register
def _remove_task_labels_in_namedtuple_space(
    observation: NamedTupleSpace,
) -> NamedTupleSpace:
    spaces = observation._spaces.copy()
    spaces.pop("task_labels")
    return type(observation)(**spaces)


@remove_task_labels.register(spaces.Dict)
@remove_task_labels.register(Mapping)
def _(observation: Dict) -> Dict:
    assert "task_labels" in observation.keys()
    return type(observation)(
        **{key: value for key, value in observation.items() if key != "task_labels"}
    )


class RemoveTaskLabelsWrapper(TransformObservation):
    """Removes the task labels from the observations and the observation space."""

    def __init__(self, env: gym.Env, f=remove_task_labels):
        super().__init__(env, f=f)
        self.observation_space = remove_task_labels(self.env.observation_space)

    @classmethod
    def space_change(cls, input_space: gym.Space) -> gym.Space:
        assert isinstance(input_space, spaces.Tuple), input_space
        # assert len(input_space) == 2, input_space
        return input_space[0]


class FixedTaskLabelWrapper(IterableWrapper):
    """Wrapper that adds always the same given task id to the observations.

    Used when the list of envs for each task is passed, so that each env also has the
    task id as part of their observation space and in their observations.
    """

    def __init__(self, env: gym.Env, task_label: Optional[int], task_label_space: gym.Space):
        super().__init__(env=env)
        self.task_label = task_label
        self.task_label_space = task_label_space
        self.observation_space = add_task_labels(
            self.env.observation_space, task_labels=task_label_space
        )

    def observation(self, observation: Union[ObservationType, Any]) -> ObservationType:
        return add_task_labels(observation, self.task_label)

    def reset(self):
        return self.observation(super().reset())

    def step(self, action):
        obs, reward, done, info = super().step(action)
        return self.observation(obs), reward, done, info


================================================
FILE: sequoia/settings/rl/wrappers/typed_objects.py
================================================
from dataclasses import fields
import dataclasses
from functools import singledispatch
from typing import Any, Dict, Sequence, Tuple, TypeVar, Union

import gym
import numpy as np
from gym import Space, spaces
from torch import Tensor

from sequoia.common.gym_wrappers import IterableWrapper
from sequoia.common.gym_wrappers.convert_tensors import supports_tensors
from sequoia.common.spaces import TypedDictSpace
from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import (
    Actions,
    ActionType,
    Observations,
    ObservationType,
    Rewards,
    RewardType,
)

T = TypeVar("T")


class TypedObjectsWrapper(IterableWrapper, Environment[ObservationType, ActionType, RewardType]):
    """Wrapper that converts the observations and rewards coming from the env
    to `Batch` objects.

    NOTE: Not super necessary atm, but this would perhaps be useful if methods
    are built and expect to have a given 'type' of observations to work with,
    then any new setting that inherits from their target setting should have
    observations that subclass/inherit from the observations of their parent, so
    as not to break compatibility.

    For example, if a Method targets the ClassIncrementalSetting, then it
    expects to receive "observations" of the type described by
    ClassIncrementalSetting.Observations, and if it were to be applied on a
    TaskIncrementalSLSetting (which inherits from ClassIncrementalSetting), then
    the observations from that setting should be isinstances (or subclasses of)
    the Observations class that this method was designed to receive!
    """

    def __init__(
        self,
        env: gym.Env,
        observations_type: ObservationType,
        rewards_type: RewardType,
        actions_type: ActionType,
        observation_space: TypedDictSpace = None,
        action_space: TypedDictSpace = None,
        reward_space: TypedDictSpace = None,
    ):
        self.Observations = observations_type
        self.Rewards = rewards_type
        self.Actions = actions_type
        super().__init__(env=env)

        observation_fields = fields(self.Observations)
        action_fields = fields(self.Actions)
        reward_fields = fields(self.Rewards)

        if not all([observation_fields, action_fields, reward_fields]):
            raise RuntimeError(
                f"The Observations/Actions/Rewards classes passed to the TypedObjectsWrapper all need to have at least one field!"
            )

        simple_spaces = (spaces.Box, spaces.Discrete, spaces.MultiDiscrete, spaces.MultiBinary)
        num_envs = getattr(self.env, "num_envs", None)

        # Set the observation space.
        if observation_space:
            self.observation_space = observation_space
        elif isinstance(self.env.observation_space, spaces.Dict):
            # Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`.
            self.observation_space = TypedDictSpace(
                spaces=self.env.observation_space.spaces,
                dtype=self.Observations,
            )
        elif isinstance(self.env.observation_space, simple_spaces):
            # we can get away with this since the class has only one field and the space is simple.
            field_name = observation_fields[0].name
            if len(observation_fields) > 1:
                # all the other fields need to have a default value, since the space doesn't have any.
                # TODO: Create a `ConstantSpace`, `NoneSpace`. If a field has `None` default value,
                # put a
                required_fields = [
                    f
                    for f in observation_fields
                    if f.default is dataclasses.MISSING
                    and f.default_factory is dataclasses.MISSING
                    and f.init
                ]
                required_field_names = [f.name for f in required_fields]
                if any(f.name != field_name for f in required_fields):
                    raise NotImplementedError(
                        f"Can't infer the observaiton space is given class {self.Observations}, "
                        f"since has required fields {required_field_names} "
                        f"that aren't present in the observation space. "
                    )

            self.observation_space = TypedDictSpace(
                spaces={field_name: self.env.observation_space}, dtype=self.Observations
            )
        else:
            raise NotImplementedError(
                f"Need to pass the observation space to the TypedObjectsWrapper constructor when "
                f"the wrapped env's observation space isn't already a Dict or TypedDictSpace and "
                f"`Observations` has more than one field. (Observations: {self.Observations}, "
                f"observation_fields: {[f.name for f in observation_fields]})"
            )

        # Set/construct the action space.
        if action_space:
            self.action_space = action_space
        elif isinstance(self.env.action_space, spaces.Dict):
            # Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`.
            self.action_space = TypedDictSpace(
                spaces=self.env.action_space.spaces,
                dtype=self.Actions,
            )
        elif (isinstance(self.env.action_space, simple_spaces) and len(action_fields) == 1) or (
            isinstance(self.env.action_space, spaces.Tuple) and num_envs
        ):
            field_name = action_fields[0].name
            self.action_space = TypedDictSpace(
                spaces={field_name: self.env.action_space}, dtype=self.Actions
            )
        else:
            raise NotImplementedError(
                "Need to pass the action space to the TypedObjectsWrapper constructor when "
                "the wrapped env's action space isn't already a Dict or TypedDictSpace and "
                "the Actions class doesn't have just one field."
                f"(wrapped action space: {self.env.action_space}, Actions: {self.Actions})"
            )

        # Set / construct the reward space.

        # Get the default reward space in case the wrapped env doesn't have a `reward_space` attr.
        default_reward_space = spaces.Box(
            low=self.env.reward_range[0],
            high=self.env.reward_range[1],
            shape=((num_envs,) if num_envs is not None else ()),
            dtype=np.float64,
        )

        if reward_space:
            self.reward_space = reward_space
        elif not hasattr(self.env, "reward_space"):
            if len(reward_fields) != 1:
                raise NotImplementedError(
                    "Need to pass the reward space to the TypedObjectsWrapper constructor when "
                    "the wrapped env doesn't have a `reward_space` attribute and the Rewards "
                    "class has more than one field."
                )
            field_name = reward_fields[0].name
            self.reward_space = TypedDictSpace(
                spaces={field_name: default_reward_space},
                dtype=self.Rewards,
            )
        elif isinstance(self.env.reward_space, spaces.Dict):
            # Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`.
            self.reward_space = TypedDictSpace(
                spaces=self.env.reward_space.spaces,
                dtype=self.Rewards,
            )
        elif isinstance(self.env.reward_space, simple_spaces) and len(reward_fields) == 1:
            field_name = reward_fields[0].name
            self.reward_space = TypedDictSpace(
                spaces={field_name: self.env.reward_space},
                dtype=self.Rewards,
            )
        else:
            raise NotImplementedError(
                "Need to pass the reward space to the TypedObjectsWrapper constructor when "
                "the wrapped env's reward space isn't already a Dict or TypedDictSpace and "
                "the Rewards class doesn't have just one field."
            )

        # TODO: Using a TypedDictSpace for the action/reward spaces is a small change in code, but
        # will most likely have a large impact on all the methods and tests!
        # THis here can be used to 'turn off' the changes to those spaces done above:
        self.action_space = self.env.action_space
        self.reward_space = getattr(self.env, "reward_space", default_reward_space)

        # if isinstance(self.env.observation_space, NamedTupleSpace):
        #     self.observation_space = self.env.observation_space
        #     self.observation_space.dtype = self.Observations

    def step(
        self, action: ActionType
    ) -> Tuple[
        ObservationType, RewardType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]
    ]:
        # "unwrap" the actions before passing it to the wrapped environment.
        action = self.action(action)
        observation, reward, done, info = self.env.step(action)
        # TODO: Make the observation space a Dict
        observation = self.observation(observation)
        reward = self.reward(reward)
        return observation, reward, done, info

    def observation(self, observation: Any) -> ObservationType:
        if isinstance(observation, self.Observations):
            return observation
        if isinstance(observation, tuple):
            # TODO: Dissallow this: shouldn't handle tuples since they can be quite ambiguous.
            # assert False, observation
            return self.Observations(*observation)
        if isinstance(observation, dict):
            try:
                return self.Observations(**observation)
            except TypeError:
                assert False, (self.Observations, observation)
        assert isinstance(observation, (Tensor, np.ndarray))
        return self.Observations(observation)

    def action(self, action: ActionType) -> Any:
        # TODO: Assert this eventually
        # assert isinstance(action, Actions), action
        if isinstance(action, Actions):
            action = action.y_pred
        if isinstance(action, Tensor) and not supports_tensors(self.env.action_space):
            action = action.detach().cpu().numpy()
        if action not in self.env.action_space:
            if isinstance(self.env.action_space, spaces.Tuple):
                action = tuple(action)
        return action

    def reward(self, reward: Any) -> RewardType:
        return self.Rewards(reward)

    def reset(self, **kwargs) -> ObservationType:
        observation = self.env.reset(**kwargs)
        return self.observation(observation)

    def __iter__(self):
        for batch in self.env:
            if isinstance(batch, tuple) and len(batch) == 2:
                yield self.observation(batch[0]), self.reward(batch[1])
            elif isinstance(batch, tuple) and len(batch) == 1:
                yield self.observation(batch[0])
            else:
                yield self.observation(batch)

    def send(self, action: ActionType) -> RewardType:
        action = self.action(action)
        reward = self.env.send(action)
        return self.reward(reward)


# TODO: turn unwrap into a single-dispatch callable.
# TODO: Atm 'unwrap' basically means "get rid of everything apart from the first
# item", which is a bit ugly.
# Unwrap should probably be a method on the corresponding `Batch` class, which could
# maybe accept a Space to fit into?
@singledispatch
def unwrap(obj: Any) -> Any:
    return obj
    # raise NotImplementedError(obj)


@unwrap.register(int)
@unwrap.register(float)
@unwrap.register(np.ndarray)
@unwrap.register(list)
def _unwrap_scalar(v):
    return v


@unwrap.register(Actions)
def _unwrap_actions(obj: Actions) -> Union[Tensor, np.ndarray]:
    return obj.y_pred


@unwrap.register(Rewards)
def _unwrap_rewards(obj: Rewards) -> Union[Tensor, np.ndarray]:
    return obj.y


@unwrap.register(Observations)
def _unwrap_observations(obj: Observations) -> Union[Tensor, np.ndarray]:
    # This gets rid of everything except just the image.
    # TODO: Keep the task labels? or no? For now, no.
    return obj.x


@unwrap.register(NamedTupleSpace)
def _unwrap_space(obj: NamedTupleSpace) -> Space:
    # This gets rid of everything except just the first item in the space.
    # TODO: Keep the task labels? or no? For now, no.
    return obj[0]


@unwrap.register(TypedDictSpace)
def _unwrap_space(obj: TypedDictSpace) -> spaces.Dict:
    # This gets rid of everything except just the first item in the space.
    # TODO: Keep the task labels? or no? For now, no.
    return spaces.Dict(obj.spaces)


class NoTypedObjectsWrapper(IterableWrapper):
    """Does the opposite of the 'TypedObjects' wrapper.

    Can be added on top of that wrapper to strip off the typed objects it
    returns and just returns tensors/np.ndarrays instead.

    This is used for example when applying a method from stable-baselines3, as
    they only want to get np.ndarrays as inputs.

    Parameters
    ----------
    IterableWrapper : [type]
        [description]
    """

    def __init__(self, env: gym.Env):
        super().__init__(env)
        self.observation_space = unwrap(self.env.observation_space)

    def step(self, action):
        if isinstance(action, Actions):
            action = unwrap(action)
        if hasattr(action, "detach"):
            action = action.detach()
        assert action in self.action_space, (action, type(action), self.action_space)
        observation, reward, done, info = self.env.step(action)
        observation = unwrap(observation)
        reward = unwrap(reward)
        return observation, reward, done, info

    def reset(self, **kwargs):
        observation = self.env.reset(**kwargs)
        return unwrap(observation)


================================================
FILE: sequoia/settings/settings.puml
================================================
@startuml settings
' skinparam linetype polyline
' skinparam linetype ortho

' skinparam classFontSize 20
' fieldFontSize 20
' !include gym.puml
' !include assumptions/assumptions.puml
hide empty members
' hide fields
' hide methods  

' ' Use this to turn on / off the display of assumptions
' remove Assumptions
' ' Use this to turn on / off groups of assumptions
' remove supervision_assumptions
' remove action_space_assumption


' remove Settings
' Comment/uncomment this to show/hide the descriptions for each node.
' hide fields

package settings as sequoia.settings {
    ' !include base/base.puml

    ' package settings.base {
    ' }

    package settings.assumptions {
        !include assumptions/assumptions.puml
        remove assumptions
        remove <<Observations>>
        remove <<Actions>>
        remove <<Rewards>>
        remove <<Environment>>
        ' remove supervision_assumptions
        ' remove context_assumption_family
        ' remove <<Assumption>>
    }

    ' !include settings/rl/rl.puml
    package rl {

        ' ContinualRLSetting -.- rl.continuous.ContinuousTaskAgnosticRLSetting

        abstract class RLSetting <<AbstractSetting>> extends SparseFeedback, ActiveEnvironment {}
        package continuous as rl.continuous {
            class ContinuousTaskAgnosticRLSetting <<Setting>> implements RLSetting, ContinuousTaskAgnosticSetting {}
        }
        package discrete as rl.discrete {
            class DiscreteTaskAgnosticRLSetting <<Setting>> implements DiscreteTaskAgnosticSetting, ContinuousTaskAgnosticRLSetting {}
        }
        package incremental as rl.incremental {
            class IncrementalRLSetting <<Setting>> implements IncrementalSetting, DiscreteTaskAgnosticRLSetting {}
        }
        package class_incremental as rl.class_incremental {
            class ClassIncrementalRLSetting <<Setting>> implements ClassIncrementalSetting, IncrementalRLSetting {}
        }
        package domain_incremental as rl.domain_incremental {
            class DomainIncrementalRLSetting <<Setting>> implements DomainIncrementalSetting, IncrementalRLSetting {}
        }
        package traditional as rl.traditional {
            class TraditionalRLSetting <<Setting>> implements TraditionalSetting, IncrementalRLSetting {}
        }
        package task_incremental as rl.task_incremental {
            class TaskIncrementalRLSetting <<Setting>> implements TaskIncrementalSetting, IncrementalRLSetting {}
        }
        package multi_task as rl.multi_task {
            class MultiTaskRLSetting <<Setting>> implements MultiTaskSetting, TaskIncrementalRLSetting, TraditionalRLSetting {}
        }
        remove rl.class_incremental
        remove rl.domain_incremental
    }

    ' !include settings/rl/sl.puml
    package sl {
        abstract class SLSetting <<AbstractSetting>> extends DenseFeedback, PassiveEnvironment {}
        package continuous as sl.continuous {
            class ContinuousTaskAgnosticSLSetting <<Setting>> implements SLSetting, ContinuousTaskAgnosticSetting {}
        }
        package discrete as sl.discrete {
            class DiscreteTaskAgnosticSLSetting <<Setting>> implements DiscreteTaskAgnosticSetting, ContinuousTaskAgnosticSLSetting {}
        }
        package incremental as sl.incremental {
            class IncrementalSLSetting <<Setting>> implements IncrementalSetting, DiscreteTaskAgnosticSLSetting {}
        }
        package class_incremental as sl.class_incremental {
            class ClassIncrementalSLSetting <<Setting>> implements ClassIncrementalSetting, IncrementalSLSetting {}
        }
        package domain_incremental as sl.domain_incremental {
            class DomainIncrementalSLSetting <<Setting>> implements DomainIncrementalSetting, IncrementalSLSetting {}
        }
        package traditional as sl.traditional {
            class TraditionalSLSetting <<Setting>> implements TraditionalSetting, IncrementalSLSetting {}
        }
        package task_incremental as sl.task_incremental {
            class TaskIncrementalSLSetting <<Setting>> implements TaskIncrementalSetting, IncrementalSLSetting {}
        }
        package multi_task as sl.multi_task {
            class MultiTaskSLSetting <<Setting>> implements MultiTaskSetting, TaskIncrementalSLSetting, TraditionalSLSetting {}
        }
        remove sl.class_incremental
        remove sl.domain_incremental
    }
}


@enduml


================================================
FILE: sequoia/settings/sl/README.md
================================================
# SL Tree

This is the Tree of Setting on the RL side.


================================================
FILE: sequoia/settings/sl/__init__.py
================================================
from .. import Results
from .environment import PassiveEnvironment

# TODO: Replace all uses of 'PassiveEnvironment' with 'SLEnvironment'
SLEnvironment = PassiveEnvironment
from .continual import ContinualSLSetting
from .discrete import DiscreteTaskAgnosticSLSetting
from .incremental import IncrementalSLSetting
from .setting import SLSetting

# NOTE: Class-Incremental is now the same as IncrementalSLSetting.
# from .class_incremental import ClassIncrementalSetting
ClassIncrementalSetting = IncrementalSLSetting
from .domain_incremental import DomainIncrementalSLSetting
from .multi_task import MultiTaskSLSetting
from .task_incremental import TaskIncrementalSLSetting
from .traditional import TraditionalSLSetting

# TODO: Import variants without the 'SL' in it above, and then don't include then in the
# __all__ below, to improve backward compatibility a bit.
# __all__ = [
#     "PassiveEnvironment",
#     "SLSetting", ...
# ]


================================================
FILE: sequoia/settings/sl/continual/__init__.py
================================================
from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment
from .objects import Actions, Observations, ObservationSpace, Rewards
from .setting import ContinualSLSetting

Environment = ContinualSLEnvironment
TestEnvironment = ContinualSLTestEnvironment


================================================
FILE: sequoia/settings/sl/continual/environment.py
================================================
""" Continual SL environment. (smooth task boundaries, etc)
"""
import warnings
from functools import partial
from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Type, Union

import gym
import numpy as np
from continuum.datasets import (
    CIFAR10,
    CIFAR100,
    EMNIST,
    KMNIST,
    MNIST,
    QMNIST,
    CIFARFellowship,
    Core50,
    Core50v2_79,
    Core50v2_196,
    Core50v2_391,
    FashionMNIST,
    ImageNet100,
    ImageNet1000,
    MNISTFellowship,
    Synbols,
)
from gym import Space, spaces
from torch import Tensor
from torch.nn import functional as F
from torch.utils.data import Dataset, IterableDataset

from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support as tensor_space
from sequoia.common.gym_wrappers.utils import tile_images
from sequoia.common.spaces import Image, TypedDictSpace
from sequoia.common.transforms import Transforms
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.utils.logging_utils import get_logger

from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType

logger = get_logger(__name__)


base_observation_spaces: Dict[str, Space] = {
    dataset_class.__name__.lower(): space
    for dataset_class, space in {
        MNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        FashionMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        KMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        EMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        QMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        MNISTFellowship: tensor_space(Image(0, 1, shape=(1, 28, 28))),
        # TODO: Determine the true bounds on the image values in cifar10.
        # Appears to be  ~= [-2.5, 2.5]
        CIFAR10: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
        CIFAR100: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
        CIFARFellowship: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
        ImageNet100: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        ImageNet1000: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        Core50: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        Core50v2_79: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        Core50v2_196: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        Core50v2_391: tensor_space(Image(0, 1, shape=(224, 224, 3))),
        Synbols: tensor_space(Image(0, 1, shape=(3, 32, 32))),
    }.items()
}


base_action_spaces: Dict[str, Space] = {
    dataset_class.__name__.lower(): space
    for dataset_class, space in {
        MNIST: spaces.Discrete(10),
        FashionMNIST: spaces.Discrete(10),
        KMNIST: spaces.Discrete(10),
        EMNIST: spaces.Discrete(10),
        QMNIST: spaces.Discrete(10),
        MNISTFellowship: spaces.Discrete(30),
        CIFAR10: spaces.Discrete(10),
        CIFAR100: spaces.Discrete(100),
        CIFARFellowship: spaces.Discrete(110),
        ImageNet100: spaces.Discrete(100),
        ImageNet1000: spaces.Discrete(1000),
        Core50: spaces.Discrete(50),
        Core50v2_79: spaces.Discrete(50),
        Core50v2_196: spaces.Discrete(50),
        Core50v2_391: spaces.Discrete(50),
        Synbols: spaces.Discrete(48),
    }.items()
}

# NOTE: Since the current SL datasets are image classification, the reward spaces are
# the same as the action space. But that won't be the case when we add other types of
# datasets!
base_reward_spaces: Dict[str, Space] = {
    dataset_name: action_space
    for dataset_name, action_space in base_action_spaces.items()
    if isinstance(action_space, spaces.Discrete)
}


def split_batch(
    batch: Tuple[Tensor, ...],
    hide_task_labels: bool,
    Observations=Observations,
    Rewards=Rewards,
) -> Tuple[Observations, Rewards]:
    """Splits the batch into a tuple of Observations and Rewards.

    Parameters
    ----------
    batch : Tuple[Tensor, ...]
        A batch of data coming from the dataset.

    Returns
    -------
    Tuple[Observations, Rewards]
        A tuple of Observations and Rewards.
    """
    # In this context (class_incremental), we will always have 3 items per
    # batch, because we use the ClassIncremental scenario from Continuum.
    if len(batch) == 2 and all(isinstance(item, Tensor) for item in batch):
        x, y = batch
        t = None
    else:
        assert len(batch) == 3
        x, y, t = batch

    if hide_task_labels:
        # Remove the task labels if we're not currently allowed to have
        # them.
        # TODO: Using None might cause some issues. Maybe set -1 instead?
        t = None

    observations = Observations(x=x, task_labels=t)
    rewards = Rewards(y=y)
    return observations, rewards


# IDEA: Have this env be the 'wrapper' / base env type for the continual SL envs, and
# register them in gym!
def default_split_batch_function(
    hide_task_labels: bool,
    Observations: Type[ObservationType] = Observations,
    Rewards: Type[RewardType] = Rewards,
) -> Callable[[Tuple[Tensor, ...]], Tuple[ObservationType, RewardType]]:
    """Returns a callable that is used to split a batch into observations and rewards."""
    return partial(
        split_batch,
        hide_task_labels=hide_task_labels,
        Observations=Observations,
        Rewards=Rewards,
    )


class ContinualSLEnvironment(PassiveEnvironment[ObservationType, ActionType, RewardType]):
    """Continual Supervised Learning Environment.

    TODO: Here we actually inform the environment of its observation / action / reward
    spaces, which isn't ideal, but is arguably better than giving the env the
    responsibility (and arguments needed) to create the datasets of each task for the
    right split, apply the transforms,
    of each task and to use
    the right train/val/test split
    """

    def __init__(
        self,
        dataset: Union[Dataset, IterableDataset],
        hide_task_labels: bool = True,
        observation_space: TypedDictSpace = None,
        action_space: gym.Space = None,
        reward_space: gym.Space = None,
        Observations: Type[ObservationType] = Observations,
        Actions: Type[ActionType] = Actions,
        Rewards: Type[RewardType] = Rewards,
        split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None,
        pretend_to_be_active: bool = False,
        strict: bool = False,
        one_epoch_only: bool = True,
        drop_last: bool = False,
        **kwargs,
    ):
        assert isinstance(dataset, Dataset)
        self._hide_task_labels = hide_task_labels
        split_batch_fn = default_split_batch_function(
            hide_task_labels=hide_task_labels,
            Observations=Observations,
            Rewards=Rewards,  # TODO: Fix this 'Rewards' being of the 'wrong' type.
        )
        self._one_epoch_only = one_epoch_only
        super().__init__(
            dataset=dataset,
            split_batch_fn=split_batch_fn,
            observation_space=observation_space,
            action_space=action_space,
            reward_space=reward_space,
            pretend_to_be_active=pretend_to_be_active,
            strict=strict,
            drop_last=drop_last,
            **kwargs,
        )
        # TODO: Clean up the batching of a Sparse(Discrete) space so its less ugly.

    def step(
        self, action: ActionType
    ) -> Tuple[ObservationType, Optional[RewardType], bool, Sequence[Dict]]:
        obs, reward, done, info = super().step(action)
        if done and self._one_epoch_only:
            self.close()
        return obs, reward, done, info

    def __iter__(self):
        yield from super().__iter__()
        if self._one_epoch_only:
            self.close()

    # TODO: Remove / fix this 'split batch function'. The problem is that we need to
    # tell the environment how to take the three items from continuum and convert them
    # into


from pathlib import Path
from typing import Optional

import torch

from sequoia.common.config import Config
from sequoia.common.gym_wrappers import has_wrapper
from sequoia.common.metrics import ClassificationMetrics
from sequoia.settings.assumptions.continual import TestEnvironment
from sequoia.utils.logging_utils import get_logger

from .results import ContinualSLResults


class ContinualSLTestEnvironment(TestEnvironment[ContinualSLEnvironment]):
    def __init__(
        self,
        env: ContinualSLEnvironment,
        directory: Path,
        hide_task_labels: bool = True,
        step_limit: Optional[int] = None,
        no_rewards: bool = False,
        config: Config = None,
        **kwargs,
    ):
        from .wrappers import ShowLabelDistributionWrapper

        if not has_wrapper(env, ShowLabelDistributionWrapper):
            env = ShowLabelDistributionWrapper(env, env_name="test")
        super().__init__(
            env,
            directory=directory,
            step_limit=step_limit,
            no_rewards=no_rewards,
            config=config,
            **kwargs,
        )
        # IDEA: Make the env give us the task ids, and then hide them again after, just
        # so we can get propper 'per-task' metrics.
        # NOTE: This wouldn't be ideal however, as would assume that there is a 'discrete'
        # set of values for the task id, which is only true in Classification datasets.
        assert isinstance(self.env.unwrapped, ContinualSLEnvironment)
        self.env.unwrapped.hide_task_labels = False

        self._steps = 0
        self.results = ContinualSLResults()
        self._reset = False
        self.action_: Optional[ActionType] = None
        from collections import deque

        self.observation_queue = deque(maxlen=3)

    def get_results(self) -> ContinualSLResults:
        from .wrappers import ShowLabelDistributionWrapper

        if has_wrapper(self, ShowLabelDistributionWrapper):
            self.results.plots_dict["Label distribution"] = self.env.make_figure()
        return self.results

    def __iter__(self):
        """BUG: The iter/send type of test loop doesn't produce any results!"""
        assert self.unwrapped.pretend_to_be_active
        # obs = self.reset()
        # self.observations = obs
        # yield obs, None
        self._before_reset()
        for i, (obs, rewards) in enumerate(self.env.__iter__()):
            if i == 0:
                self._after_reset(obs)
            if len(self.observation_queue) == self.observation_queue.maxlen:
                raise RuntimeError(
                    f"Can't consume more than {self.observation_queue.maxlen} batches "
                    f"in a row without sending an action!"
                )
            self.observation_queue.append(obs)

            if self.no_rewards:
                rewards = None

            yield obs, rewards
        self.close()

    def send(self, actions: ActionType) -> Optional[RewardType]:
        self._before_step(actions)
        rewards = self.env.send(actions)
        obs = self.observation_queue.popleft()
        info = getattr(obs, "info", {})
        done = self.get_total_steps() >= self.step_limit
        self._after_step(obs, rewards, done, info)

        if self.no_rewards:
            rewards = None

        return rewards

    def reset(self):
        return super().reset()
        # if not self._reset:
        #     logger.debug("Initial reset.")
        #     self._reset = True
        #     return super().reset()
        # else:
        #     # TODO: Why is this a good thing again? Why not just let an 'EpisodeLimit'
        #     # wrapper handle this?
        #     logger.debug("Resetting the env closes it. (only one episode in SL)")
        #     self.close()
        #     return None

    def _before_step(self, action):
        self.action_ = action
        return super()._before_step(action)

    def _after_step(self, observation, reward, done, info):
        # TODO: Fix this once we actually use a ClassificationAction!
        if not isinstance(reward, Rewards):
            reward = Rewards(y=torch.as_tensor(reward))

        batch_size = reward.batch_size

        action = self.action_
        assert action is not None

        if isinstance(self.action_space, (spaces.MultiDiscrete, spaces.MultiBinary)):
            n_classes = self.action_space.nvec[0]
            from sequoia.settings.assumptions.task_type import ClassificationActions

            if not isinstance(action, ClassificationActions):
                if isinstance(action, Actions):
                    y_pred = action.y_pred
                    # 'upgrade', creating some fake logits.
                else:
                    y_pred = torch.as_tensor(action)
                fake_logits = F.one_hot(y_pred, n_classes)
                action = ClassificationActions(y_pred=y_pred, logits=fake_logits)
        else:
            raise NotImplementedError(
                f"TODO: Remove the assumption here that the env is a classification env "
                f"({self.action_space}, {self.reward_space})"
            )

        if action.batch_size != reward.batch_size:
            warnings.warn(
                RuntimeWarning(
                    f"Truncating the action since its batch size {action.batch_size} "
                    f"is larger than the rewards': ({reward.batch_size})"
                )
            )
            action = action[:, : reward.batch_size]

        # TODO: Use some kind of generic `get_metrics(actions: Actions, rewards: Rewards)`
        # function instead.
        y = reward.y
        logits = action.logits
        y_pred = action.y_pred
        metric = ClassificationMetrics(y=y, logits=logits, y_pred=y_pred)

        self.results.metrics.append(metric)
        self._steps += 1

        # Debugging issue with Monitor class:
        # return super()._after_step(observation, reward, done, info)
        if not self.enabled:
            return done

        if done and self.env_semantics_autoreset:
            # For envs with BlockingReset wrapping VNCEnv, this observation will be the
            # first one of the new episode
            if self.config.render:
                self.reset_video_recorder()
            self.episode_id += 1
            self._flush()

        # Record stats: (TODO: accuracy serves as the 'reward'!)
        reward_for_stats = metric.accuracy
        self.stats_recorder.after_step(observation, reward_for_stats, done, info)

        # Record video
        if self.config.render:
            self.video_recorder.capture_frame()
        return done
        ##

    def _after_reset(self, observation: ObservationType):
        image_batch = observation.numpy().x
        # Need to create a single image with the right dtype for the Monitor
        # from gym to create gifs / videos with it.
        if self.batch_size:
            # Need to tile the image batch so it can be seen as a single image
            # by the Monitor.
            image_batch = tile_images(image_batch)

        image_batch = Transforms.channels_last_if_needed(image_batch)
        if image_batch.dtype == np.float32:
            assert (0 <= image_batch).all() and (image_batch <= 1).all()
            image_batch = (256 * image_batch).astype(np.uint8)

        assert image_batch.dtype == np.uint8
        # Debugging this issue here:
        # super()._after_reset(image_batch)

        # -- Code from Monitor
        if not self.enabled:
            return
        # Reset the stat count
        self.stats_recorder.after_reset(observation)
        if self.config and self.config.render:
            self.reset_video_recorder()

        # Bump *after* all reset activity has finished
        self.episode_id += 1

        self._flush()
        # --

    def render(self, mode="human", **kwargs):
        # NOTE: This doesn't get called, because the video recorder uses
        # self.env.render(), rather than self.render()
        # TODO: Render when the 'render' argument in config is set to True.
        image_batch = super().render(mode=mode, **kwargs)
        if mode == "rgb_array" and self.batch_size:
            image_batch = tile_images(image_batch)
        return image_batch


================================================
FILE: sequoia/settings/sl/continual/environment_test.py
================================================
""" TODO: Tests for the TestEnvironment of the ContinualSLSetting. """

from pathlib import Path
from typing import ClassVar, Type

import gym
import numpy as np
import pytest
from torch.utils.data import Subset
from torchvision.datasets import MNIST

from sequoia.common.config import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms
from sequoia.settings.sl.environment import PassiveEnvironment

from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment
from .results import ContinualSLResults


class TestContinualSLTestEnvironment:
    Environment: ClassVar[Type[Environment]] = ContinualSLEnvironment
    TestEnvironment: ClassVar[Type[TestEnvironment]] = ContinualSLTestEnvironment

    @pytest.fixture()
    def base_env(self):
        batch_size = 5
        transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        max_samples = 100
        dataset = Subset(dataset, list(range(max_samples)))

        obs_space = Image(0, 255, (1, 28, 28), np.uint8)
        obs_space = transforms(obs_space)
        env = self.Environment(
            dataset,
            n_classes=10,
            batch_size=batch_size,
            observation_space=obs_space,
            pretend_to_be_active=True,
            drop_last=False,
        )
        assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space == env.action_space
        return env

    @pytest.mark.parametrize("no_rewards", [True, False])
    def test_iteration_produces_results(
        self,
        no_rewards: bool,
        base_env: ContinualSLEnvironment,
        tmp_path: Path,
        config: Config,
    ):
        """TODO: Test that when iterating through the env as a dataloader and sending
        actions produces results.
        """
        env = self.TestEnvironment(
            base_env,
            directory=tmp_path,
            step_limit=100 // base_env.batch_size,
            no_rewards=no_rewards,
        )
        env.config = config

        for obs, rewards in env:
            assert rewards is None
            action = env.action_space.sample()
            rewards = env.send(action)
            assert (rewards is None) == env.no_rewards

        assert env.is_closed()
        results = env.get_results()
        self.validate_results(results)

    def validate_results(self, results: ContinualSLResults):
        assert isinstance(results, ContinualSLResults)
        assert isinstance(results.average_metrics, ClassificationMetrics)
        assert results.objective > 0
        # TODO: Fix this problem:
        assert results.average_metrics.n_samples in [95, 100]

    @pytest.mark.parametrize("no_rewards", [True, False])
    def test_gym_interaction_produces_results(
        self, no_rewards: bool, base_env: PassiveEnvironment, tmp_path: Path, config: Config
    ):
        """TODO: Test that when iterating through the env as a dataloader and sending
        actions produces results.
        """
        env = self.TestEnvironment(
            base_env,
            directory=tmp_path,
            step_limit=100 // base_env.batch_size,
            no_rewards=no_rewards,
        )
        env.config = config
        done = False
        obs = env.reset()
        steps = 0
        while not done:
            action = env.action_space.sample()
            obs, rewards, done, info = env.step(action)
            steps += 1
            assert (rewards is None) == env.no_rewards

            if steps > 20:
                pytest.fail("Shouldn't have gone longer than 20 steps!")

        # BUG: There's currently a weird off-by-1 error with the total number of steps,
        # which makes these checks for `is_closed()` fail. However, in practice we don't
        # try to iterate twice on the env, so it's not a big deal.
        # FIXME: Fix this check:
        assert env.is_closed()
        # FIXME: Fix this check:
        with pytest.raises((gym.error.ClosedEnvironmentError, gym.error.Error)):
            env.reset()
        # FIXME: Fix this check:
        with pytest.raises(gym.error.ClosedEnvironmentError):
            _ = env.step(env.action_space.sample())

        results = env.get_results()
        self.validate_results(results)


================================================
FILE: sequoia/settings/sl/continual/envs.py
================================================
""" Utility functions for determining the observation space for a given SL dataset.
"""
from typing import Any, Dict, List, Optional, Sequence

import gym
import numpy as np
import torch
from continuum.datasets import (
    CIFAR10,
    CIFAR100,
    EMNIST,
    KMNIST,
    MNIST,
    QMNIST,
    CIFARFellowship,
    Core50,
    Core50v2_79,
    Core50v2_196,
    Core50v2_391,
    FashionMNIST,
    ImageNet100,
    ImageNet1000,
    MNISTFellowship,
    Synbols,
)
from continuum.tasks import TaskSet
from gym import Space, spaces
from torch.utils.data import Subset, TensorDataset

from sequoia.common.spaces import ImageTensorSpace, TensorBox, TensorDiscrete
from sequoia.common.spaces.image import could_become_image
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


base_observation_spaces: Dict[str, Space] = {
    dataset_class.__name__.lower(): space
    for dataset_class, space in {
        MNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
        FashionMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
        KMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
        EMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
        QMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
        MNISTFellowship: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
        # TODO: Determine the true bounds on the image values in cifar10.
        # Appears to be  ~= [-2.5, 2.5]
        CIFAR10: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)),
        CIFAR100: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)),
        CIFARFellowship: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)),
        ImageNet100: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
        ImageNet1000: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
        Core50: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
        Core50v2_79: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
        Core50v2_196: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
        Core50v2_391: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
        Synbols: ImageTensorSpace(0, 1, shape=(3, 32, 32)),
    }.items()
}


base_action_spaces: Dict[str, Space] = {
    dataset_class.__name__.lower(): space
    for dataset_class, space in {
        MNIST: spaces.Discrete(10),
        FashionMNIST: spaces.Discrete(10),
        KMNIST: spaces.Discrete(10),
        EMNIST: spaces.Discrete(10),
        QMNIST: spaces.Discrete(10),
        MNISTFellowship: spaces.Discrete(30),
        CIFAR10: spaces.Discrete(10),
        CIFAR100: spaces.Discrete(100),
        CIFARFellowship: spaces.Discrete(110),
        ImageNet100: spaces.Discrete(100),
        ImageNet1000: spaces.Discrete(1000),
        Core50: spaces.Discrete(50),
        Core50v2_79: spaces.Discrete(50),
        Core50v2_196: spaces.Discrete(50),
        Core50v2_391: spaces.Discrete(50),
        Synbols: spaces.Discrete(48),
    }.items()
}


# NOTE: Since the current SL datasets are image classification, the reward spaces are
# the same as the action space. But that won't be the case when we add other types of
# datasets!
base_reward_spaces: Dict[str, Space] = {
    dataset_name: action_space
    for dataset_name, action_space in base_action_spaces.items()
    if isinstance(action_space, spaces.Discrete)
}

CTRL_INSTALLED: bool = False
CTRL_STREAMS: List[str] = []
CTRL_NB_TASKS: Dict[str, Optional[int]] = {}
try:
    from ctrl.tasks.task import Task
    from ctrl.tasks.task_generator import TaskGenerator
except ImportError as exc:
    logger.debug(f"ctrl-bench isn't installed: {exc}")
    # Creating those just for type hinting.
    class Task:
        pass

    class TaskGenerator:
        pass

else:
    CTRL_INSTALLED = True
    CTRL_STREAMS = ["s_plus", "s_minus", "s_in", "s_out", "s_pl", "s_long"]
    n_tasks = [5, 5, 5, 5, 4, None]
    CTRL_NB_TASKS = dict(zip(CTRL_STREAMS, n_tasks))
    x_dims = [(3, 32, 32)] * len(CTRL_STREAMS)
    n_classes = [10, 10, 10, 10, 10, 5]

    for i, stream_name in enumerate(CTRL_STREAMS):
        # Create the 'base observation space' for this stream.
        obs_space = ImageTensorSpace(0, 1, shape=x_dims[i], dtype=torch.float32)

        # TODO: Not sure if the classes should be considered 'shared' or 'distinct'.
        # For now assume they are shared, so the setting's action space is always [0, 5]
        # but the action changes.
        # total_n_classes = n_tasks[i] * n_classes[i]
        # action_space = TensorDiscrete(n=total_n_classes)
        n_classes_per_task = n_classes[i]
        action_space = TensorDiscrete(n=n_classes_per_task)

        base_observation_spaces[stream_name] = obs_space
        base_action_spaces[stream_name] = action_space


from functools import singledispatch


@singledispatch
def get_observation_space(dataset: Any) -> gym.Space:
    raise NotImplementedError(
        f"Don't yet have a registered handler to get the observation space of dataset "
        f"{dataset}."
    )


@get_observation_space.register(Subset)
def _get_observation_space_for_subset(dataset: Subset) -> gym.Space:
    # The observations space of a Subset dataset is actually the same as the original
    # dataset.
    return get_observation_space(dataset.dataset)


@get_observation_space.register(str)
def _get_observation_space_for_dataset_name(dataset: str) -> gym.Space:
    if dataset not in base_observation_spaces:
        raise NotImplementedError(
            f"Can't yet tell what the 'base' observation space is for dataset "
            f"{dataset} because it doesn't have an entry in the "
            f"`base_observation_spaces` dict."
        )
    return base_observation_spaces[dataset]


@get_observation_space.register(TaskSet)
def _get_observation_space_for_taskset(dataset: TaskSet) -> gym.Space:
    assert False, dataset
    # return get_observation_space(type(dataset).__name__.lower())


@get_observation_space.register(TensorDataset)
def _get_observation_space_for_tensor_dataset(dataset: TensorDataset) -> gym.Space:
    x = dataset.tensors[0]
    if not (1 <= len(dataset.tensors) <= 2) or not (2 <= x.dim()):
        raise NotImplementedError(
            f"For now, can only handle TensorDatasets with 1 or 2 tensors. (x and y) "
            f"but dataset {dataset} has {len(dataset.tensors)}!"
        )

    low = x.min().cpu().item()
    high = x.max().cpu().item()
    obs_space = TensorBox(low=low, high=high, shape=x.shape[1:], dtype=x.dtype)
    if could_become_image(obs_space):
        obs_space = ImageTensorSpace.wrap(obs_space)
    return obs_space


@singledispatch
def get_action_space(dataset: Any) -> gym.Space:
    raise NotImplementedError(
        f"Don't yet have a registered handler to get the action space of dataset " f"{dataset}."
    )


@get_action_space.register(Subset)
def _get_action_space_for_subset(dataset: Subset) -> gym.Space:
    # The actions space of a Subset dataset is actually the same as the original
    # dataset.
    return get_action_space(dataset.dataset)


@get_action_space.register(str)
def _get_action_space_for_dataset_name(dataset: str) -> gym.Space:
    if dataset not in base_action_spaces:
        raise NotImplementedError(
            f"Can't yet tell what the 'base' action space is for dataset "
            f"{dataset} because it doesn't have an entry in the "
            f"`base_action_spaces` dict."
        )
    return base_action_spaces[dataset]


@singledispatch
def get_reward_space(dataset: Any) -> gym.Space:
    raise NotImplementedError(
        f"Don't yet have a registered handler to get the reward space of dataset " f"{dataset}."
    )


@get_reward_space.register(Subset)
def _get_reward_space_for_subset(dataset: Subset) -> gym.Space:
    # The rewards space of a Subset dataset is *usually* the same as the original
    # dataset.
    # TODO: Need to check this though? Maybe we're taking only the indices with a given class
    return get_reward_space(dataset.dataset)


@get_reward_space.register(str)
def _get_reward_space_for_dataset_name(dataset: str) -> gym.Space:
    if dataset not in base_reward_spaces:
        raise NotImplementedError(
            f"Can't yet tell what the 'base' reward space is for dataset "
            f"{dataset} because it doesn't have an entry in the "
            f"`base_reward_spaces` dict."
        )
    return base_reward_spaces[dataset]


@get_reward_space.register(TensorDataset)
@get_action_space.register(TensorDataset)
def get_y_space_for_tensor_dataset(dataset: TensorDataset) -> gym.Space:
    if len(dataset.tensors) != 2:
        raise NotImplementedError(
            f"Only able to detect the action space of TensorDatasets if they have two "
            f"tensors for now (x and y), but dataset {dataset} has {len(dataset.tensors)}!"
        )
    y = dataset.tensors[-1]
    low = y.min().item()
    high = y.max().item()
    y_sample_shape = y.shape[1:]

    if y.dtype.is_floating_point:
        return TensorBox(low, high, shape=y_sample_shape, dtype=y.dtype)

    # Integer y:
    if low == 0:
        n_classes = high + 1
        return TensorDiscrete(n_classes)

    # TODO: Add a space like DiscreteWithOffset ?
    return TensorBox(low, high, shape=y_sample_shape, dtype=y.dtype)


@get_action_space.register(list)
@get_action_space.register(tuple)
def _get_action_space_for_list_of_datasets(datasets: Sequence[TaskSet]) -> gym.Space:
    # TODO: IDEA: If given a list of datasets, try to find the 'union' of their spaces.
    # This is meant to be one potential solution to the case where custom datasets are
    # passed for each task, like [0, 2), [3, 4], etc.
    action_spaces = [get_action_space(dataset) for dataset in datasets]
    if isinstance(action_spaces[0], spaces.Discrete):
        lows = [0 if isinstance(space, spaces.Discrete) else space.low for space in action_spaces]
        highs = [
            space.n - 1 if isinstance(space, spaces.Discrete) else space.high
            for space in action_spaces
        ]

    if isinstance(reward_spaces[0], spaces.Discrete) and min(lows) == 0:
        return TensorDiscrete(max(highs) + 1)

    raise NotImplementedError(
        f"Don't yet know how to get the 'union' of the action spaces ({action_spaces}) "
        f" of datasets {datasets}"
    )


@get_reward_space.register(list)
@get_reward_space.register(tuple)
def _get_reward_space_for_list_of_datasets(datasets: Sequence[TaskSet]) -> gym.Space:
    # TODO: IDEA: If given a list of datasets, try to find the 'union' of their spaces.
    # This is meant to be one potential solution to the case where custom datasets are
    # passed for each task, like [0, 2), [3, 4], etc.
    reward_spaces = [get_reward_space(dataset) for dataset in datasets]
    if isinstance(reward_spaces[0], spaces.Discrete):
        lows = [0 if isinstance(space, spaces.Discrete) else space.low for space in reward_spaces]
        highs = [
            space.n - 1 if isinstance(space, spaces.Discrete) else space.high
            for space in reward_spaces
        ]

    if isinstance(reward_spaces[0], spaces.Discrete) and min(lows) == 0:
        return TensorDiscrete(max(highs) + 1)

    raise NotImplementedError(
        f"Don't yet know how to get the 'union' of the reward spaces ({reward_spaces}) "
        f" of datasets {datasets}"
    )


================================================
FILE: sequoia/settings/sl/continual/objects.py
================================================
from dataclasses import dataclass
from typing import Optional, TypeVar

from gym import spaces
from torch import Tensor

from sequoia.common.spaces import ImageTensorSpace, Sparse, TypedDictSpace
from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.sl.setting import SLSetting


@dataclass(frozen=True)
class Observations(SLSetting.Observations, ContinualAssumption.Observations):
    """Observations from a Continual Supervised Learning environment."""

    x: Tensor
    task_labels: Optional[Tensor] = None


ObservationType = TypeVar("ObservationType", bound=Observations)
import torch


class ObservationSpace(TypedDictSpace[ObservationType]):
    """Observation space of a Continual SL Setting."""

    # The sample space: this is a gym.spaces.Box subclass with added properties for
    # images, such as `channels`, `h`, `w`, `is_channels_first`, etc.
    # This space will return Tensors.
    x: ImageTensorSpace
    # The task label space: This is a gym.spaces.MultiDiscrete of Tensors.
    task_labels: Sparse[torch.LongTensor]


# TODO: Eventually also use some kind of structured action and reward space!
# TODO: Figure out how/where to switch the actions type to be specific to classification
# from sequoia.settings.assumptions.task_type import ClassificationActions


@dataclass(frozen=True)
class Actions(SLSetting.Actions):
    """Actions to be sent to a Continual Supervised Learning environment."""

    y_pred: Tensor


class ActionSpace(TypedDictSpace):
    """Action space of a Continual SL Setting."""

    y_pred: spaces.Space


@dataclass(frozen=True)
class Rewards(SLSetting.Rewards):
    """Rewards obtained from a Continual Supervised Learning environment."""

    y: Tensor


class RewardSpace(TypedDictSpace):
    """Reward space of a Continual SL Setting."""

    y: spaces.Space


ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)


================================================
FILE: sequoia/settings/sl/continual/results.py
================================================
from sequoia.common.metrics import MetricsType
from sequoia.settings.assumptions.continual import ContinualResults


class ContinualSLResults(ContinualResults[MetricsType]):
    pass


================================================
FILE: sequoia/settings/sl/continual/setting.py
================================================
import itertools
from dataclasses import dataclass
from pathlib import Path
from typing import ClassVar, Dict, List, Optional, Type, TypeVar, Union

import gym
import numpy as np
import torch
from continuum.datasets import (
    CIFAR10,
    CIFAR100,
    EMNIST,
    KMNIST,
    MNIST,
    QMNIST,
    CIFARFellowship,
    FashionMNIST,
    ImageNet100,
    ImageNet1000,
    MNISTFellowship,
    Synbols,
    _ContinuumDataset,
)
from continuum.scenarios import ClassIncremental, _BaseScenario
from continuum.tasks import TaskSet, concat, split_train_val
from gym import spaces
from simple_parsing import choice, field, list_field
from torch import Tensor
from torch.utils.data import ConcatDataset, Dataset, Subset

import wandb
from sequoia.common.config import Config
from sequoia.common.gym_wrappers import RenderEnvWrapper, TransformObservation
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.spaces import Sparse
from sequoia.common.transforms import Compose, Transforms
from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.base import Method
from sequoia.settings.sl.setting import SLSetting
from sequoia.settings.sl.wrappers import MeasureSLPerformanceWrapper
from sequoia.utils.generic_functions import concatenate
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import flag

from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment
from .envs import (
    CTRL_INSTALLED,
    CTRL_STREAMS,
    base_action_spaces,
    base_observation_spaces,
    base_reward_spaces,
    get_action_space,
    get_observation_space,
    get_reward_space,
)
from .objects import Actions, ActionSpace, Observations, ObservationSpace, Rewards, RewardSpace
from .results import ContinualSLResults
from .wrappers import relabel

logger = get_logger(__name__)

EnvironmentType = TypeVar("EnvironmentType", bound=ContinualSLEnvironment)

available_datasets = {
    c.__name__.lower(): c
    for c in [
        CIFARFellowship,
        MNISTFellowship,
        ImageNet100,
        ImageNet1000,
        CIFAR10,
        CIFAR100,
        EMNIST,
        KMNIST,
        MNIST,
        QMNIST,
        FashionMNIST,
        Synbols,
    ]
    # "synbols": Synbols,
    # "synbols_font": partial(Synbols, task="fonts"),
}
if CTRL_INSTALLED:
    available_datasets.update(dict(zip(CTRL_STREAMS, CTRL_STREAMS)))


@dataclass
class ContinualSLSetting(SLSetting, ContinualAssumption):
    """Continuous, Task-Agnostic, Continual Supervised Learning.

    This is *currently* the most "general" Supervised Continual Learning setting in
    Sequoia.

    - Data distribution changes smoothly over time.
    - Smooth transitions between "tasks"
    - No information about task boundaries or task identity (no task IDs)
    - Maximum of one 'epoch' through the environment.
    """

    # Class variables that hold the 'base' observation/action/reward spaces for the
    # available datasets.
    base_observation_spaces: ClassVar[Dict[str, gym.Space]] = base_observation_spaces
    base_action_spaces: ClassVar[Dict[str, gym.Space]] = base_action_spaces
    base_reward_spaces: ClassVar[Dict[str, gym.Space]] = base_reward_spaces

    # (NOTE: commenting out SLSetting.Observations as it is the same class
    # as Setting.Observations, and we want a consistent method resolution order.
    Observations: ClassVar[Type[Observations]] = Observations
    Actions: ClassVar[Type[Actions]] = Actions
    Rewards: ClassVar[Type[Rewards]] = Rewards
    ObservationSpace: ClassVar[Type[ObservationSpace]] = ObservationSpace

    Environment: ClassVar[Type[SLSetting.Environment]] = ContinualSLEnvironment[
        Observations, Actions, Rewards
    ]

    Results: ClassVar[Type[ContinualSLResults]] = ContinualSLResults

    # Class variable holding a dict of the names and types of all available
    # datasets.
    # TODO: Issue #43: Support other datasets than just classification
    available_datasets: ClassVar[Dict[str, Type[_ContinuumDataset]]] = available_datasets
    # A continual dataset to use. (Should be taken from the continuum package).
    dataset: str = choice(available_datasets.keys(), default="mnist")

    # Transformations to use. See the Transforms enum for the available values.
    transforms: List[Transforms] = list_field(
        Transforms.to_tensor,
        # BUG: The input_shape given to the Model doesn't have the right number
        # of channels, even if we 'fixed' them here. However the images are fine
        # after.
        Transforms.three_channels,
        Transforms.channels_first_if_needed,
    )

    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes.
    increment: Union[int, List[int]] = list_field(
        2, type=int, nargs="*", alias="n_classes_per_task"
    )
    # The scenario number of tasks.
    # If zero, defaults to the number of classes divied by the increment.
    nb_tasks: int = 0
    # A different task size applied only for the first task.
    # Desactivated if `increment` is a list.
    initial_increment: int = 0
    # An optional custom class order, used for NC.
    class_order: Optional[List[int]] = None
    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes (defaults to the value of
    # `increment`).
    test_increment: Optional[Union[List[int], int]] = None
    # A different task size applied only for the first test task.
    # Desactivated if `test_increment` is a list. Defaults to the
    # value of `initial_increment`.
    test_initial_increment: Optional[int] = None
    # An optional custom class order for testing, used for NC.
    # Defaults to the value of `class_order`.
    test_class_order: Optional[List[int]] = None

    # Wether task boundaries are smooth or not.
    smooth_task_boundaries: bool = flag(True)
    # Wether the context (task) variable is stationary or not.
    stationary_context: bool = flag(False)
    # Wether tasks share the same action space or not.
    # TODO: This will probably be moved into a different assumption.
    shared_action_space: Optional[bool] = None

    # TODO: Need to put num_workers in only one place.
    batch_size: int = field(default=32, cmd=False)
    num_workers: int = field(default=4, cmd=False)

    # When True, a Monitor-like wrapper will be applied to the training environment
    # and monitor the 'online' performance during training. Note that in SL, this will
    # also cause the Rewards (y) to be withheld until actions are passed to the `send`
    # method of the Environment.
    monitor_training_performance: bool = flag(False)

    train_datasets: List[Dataset] = field(
        default_factory=list, cmd=False, repr=False, to_dict=False
    )
    val_datasets: List[Dataset] = field(default_factory=list, cmd=False, repr=False, to_dict=False)
    test_datasets: List[Dataset] = field(default_factory=list, cmd=False, repr=False, to_dict=False)

    def __post_init__(self):
        super().__post_init__()
        # assert not self.has_setup_fit
        # Test values default to the same as train.
        self.test_increment = self.test_increment or self.increment
        self.test_initial_increment = self.test_initial_increment or self.initial_increment
        self.test_class_order = self.test_class_order or self.class_order

        # TODO: For now we assume a fixed, equal number of classes per task, for
        # sake of simplicity. We could take out this assumption, but it might
        # make things a bit more complicated.
        if isinstance(self.increment, list) and len(self.increment) == 1:
            self.increment = self.increment[0]
        if isinstance(self.test_increment, list) and len(self.test_increment) == 1:
            self.test_increment = self.test_increment[0]
        assert isinstance(self.increment, int)
        assert isinstance(self.test_increment, int)

        # The 'scenarios' for train and test from continuum. (ClassIncremental for now).
        self.train_cl_loader: Optional[_BaseScenario] = None
        self.test_cl_loader: Optional[_BaseScenario] = None
        self.train_cl_dataset: Optional[_ContinuumDataset] = None
        self.test_cl_dataset: Optional[_ContinuumDataset] = None

        # This will be set by the Experiment, or passed to the `apply` method.
        # TODO: This could be a bit cleaner.
        self.config: Config
        # Default path to which the datasets will be downloaded.
        self.data_dir: Optional[Path] = None

        self.train_env: ContinualSLEnvironment = None  # type: ignore
        self.val_env: ContinualSLEnvironment = None  # type: ignore
        self.test_env: ContinualSLEnvironment = None  # type: ignore

        # BUG: These `has_setup_fit`, `has_setup_test`, `has_prepared_data` properties
        # aren't working correctly: they get set before the call to the function has
        # been executed, making it impossible to check those values from inside those
        # functions.
        self._has_prepared_data = False
        self._has_setup_fit = False
        self._has_setup_test = False

        if CTRL_INSTALLED and self.dataset in CTRL_STREAMS:
            import ctrl
            from ctrl.tasks.task_generator import TaskGenerator

            from .envs import CTRL_NB_TASKS

            self.nb_tasks = self.nb_tasks or CTRL_NB_TASKS[self.dataset]
            if self.dataset == "s_long" and not self.nb_tasks:
                warnings.warn(
                    RuntimeWarning(
                        f"Limiting the scenario to 100 tasks for now when using 's_long' stream."
                    )
                )
                self.nb_tasks = 100
            task_generator: TaskGenerator = ctrl.get_stream(self.dataset, seed=42)
            # Get the train/val/test splits from the tasks.
            for task_dataset in itertools.islice(task_generator, self.nb_tasks):
                train_dataset = task_dataset.datasets[task_dataset.split_names.index("Train")]
                val_dataset = task_dataset.datasets[task_dataset.split_names.index("Val")]
                test_dataset = task_dataset.datasets[task_dataset.split_names.index("Test")]
                self.train_datasets.append(train_dataset)
                self.val_datasets.append(val_dataset)
                self.test_datasets.append(test_dataset)

        ## NOTE: Not sure this is a good idea, because we might easily mix the train/val
        ## and test splits between different runs! Actually, now that I think about it,
        ## I need to make sure that this isn't happening already with Avalanche!
        # if self.datasets:
        #     if any(self.train_datasets, self.val_datasets, self.test_datasets):
        #         raise RuntimeError(
        #             f"When passing your own datasets to the setting, you have to pass "
        #             f"either `datasets` or all three of `train_datasets`, "
        #             f"`val_datasets` and `test_datasets`."
        #         )
        #     self.train_datasets = []
        #     self.val_datasets = []
        #     self.test_datasets = []

        #     rng = np.random.default_rng(self.config.seed if self.config else 123)
        #     for dataset in datasets:
        #         n = len(dataset)
        #         n_train_val = int(n * 0.8)
        #         n_test = n - n_train_val
        #         n_train = int(n_train_val * 0.8)
        #         n_valid = n_train_val - n_train
        #         train_val_dataset, test_dataset = random_split(
        #             dataset, [n_train_val, n_test], generator=rng,
        #         )
        #         train_dataset, val_dataset = random_split(
        #             train_val_dataset, [n_train, n_valid], generator=rng,
        #         )

        #         self.train_datasets.append(train_dataset)
        #         self.val_datasets.append(val_dataset)
        #         self.test_datasets.append(test_dataset)

        if any([self.train_datasets, self.val_datasets, self.test_datasets]):
            if not all([self.train_datasets, self.val_datasets, self.test_datasets]):
                raise RuntimeError(
                    f"When passing your own datasets to the setting, you have to pass "
                    f"`train_datasets`, `val_datasets` and `test_datasets`."
                )
            self.nb_tasks = len(self.train_datasets)
            if not (len(self.val_datasets) == len(self.test_datasets) == self.nb_tasks):
                raise RuntimeError(
                    f"When passing your own datasets to the setting, you need to pass "
                    f"The same number of train/valid and test datasets for now."
                )
            # FIXME: For now, setting `self.dataset` to None, because it has a default
            # of 'mnist'. Should probably make it a required argument instead.
            self.dataset = None

            # x_shape = self.train_datasets[0][0][0].shape
            # self.observation_space.x.shape = x_shape
            # assert False, (x_shape, self.observation_space)

        # Note: Using the same name as in the RL Setting for now, since that's where
        # this feature of passing the "envs" for each task was first added.
        self._using_custom_envs_foreach_task: bool = bool(self.train_datasets)

        # TODO: Remove this
        if self.dataset in self.base_action_spaces:
            if isinstance(self.action_space, spaces.Discrete):
                base_action_space = self.base_action_spaces[self.dataset]
                n_classes = base_action_space.n
                self.class_order = self.class_order or list(range(n_classes))
                if self.nb_tasks:
                    self.increment = n_classes // self.nb_tasks

            if not self.nb_tasks:
                base_action_space = self.base_action_spaces[self.dataset]
                if isinstance(base_action_space, spaces.Discrete):
                    self.nb_tasks = base_action_space.n // self.increment

        assert self.nb_tasks != 0, self.nb_tasks

    def apply(
        self, method: Method["ContinualSLSetting"], config: Config = None
    ) -> ContinualSLResults:
        """Apply the given method on this setting to producing some results."""
        # TODO: It still isn't super clear what should be in charge of creating
        # the config, and how to create it, when it isn't passed explicitly.
        self.config = config or self._setup_config(method)
        assert self.config is not None

        method.configure(setting=self)

        # Run the main loop (defined in ContinualAssumption).
        # Basically does the following:
        # 1. Call method.fit(train_env, valid_env)
        # 2. Test the method on test_env.
        # Return the results, as reported by the test environment.
        results: ContinualSLResults = super().main_loop(method)
        method.receive_results(self, results=results)
        return results

    def train_dataloader(
        self, batch_size: int = 32, num_workers: Optional[int] = 4
    ) -> EnvironmentType:
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_fit:
            self.setup("fit")

        if self.train_env:
            self.train_env.close()

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        # NOTE: ATM the dataset here doesn't have any transforms. We add the transforms after the
        # dataloader below using the TransformObservations wrapper. This isn't ideal.
        dataset = self._make_train_dataset()

        # TODO: Add some kind of Wrapper around the dataset to make it
        # semi-supervised?
        env = self.Environment(
            dataset,
            hide_task_labels=(not self.task_labels_at_train_time),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            Observations=self.Observations,
            Actions=self.Actions,
            Rewards=self.Rewards,
            pin_memory=True,
            batch_size=batch_size,
            num_workers=num_workers,
            drop_last=self.drop_last,
            shuffle=False,
            one_epoch_only=(not self.known_task_boundaries_at_train_time),
        )

        if self.config.render:
            # Add a wrapper that calls 'env.render' at each step?
            env = RenderEnvWrapper(env)

        train_transforms = Compose(self.transforms + self.train_transforms)
        if train_transforms:
            env = TransformObservation(env, f=train_transforms)

        if self.config.device:
            # TODO: Put this before or after the image transforms?
            from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors

            env = ConvertToFromTensors(env, device=self.config.device)
            # env = TransformObservation(env, f=partial(move, device=self.config.device))
            # env = TransformReward(env, f=partial(move, device=self.config.device))

        if self.monitor_training_performance:
            env = MeasureSLPerformanceWrapper(
                env,
                first_epoch_only=True,
                wandb_prefix=f"Train/",
            )

        # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
        # when transforms don't propagate the 'dtype' field.
        env.observation_space.dtype = self.Observations
        self.train_env = env
        return self.train_env

    def val_dataloader(
        self, batch_size: int = 32, num_workers: Optional[int] = 4
    ) -> EnvironmentType:
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_validate:
            self.setup("validate")

        if self.val_env:
            self.val_env.close()

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        dataset = self._make_val_dataset()
        # TODO: Add some kind of Wrapper around the dataset to make it
        # semi-supervised?
        # TODO: Change the reward and action spaces to also use objects.
        env = self.Environment(
            dataset,
            hide_task_labels=(not self.task_labels_at_train_time),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            Observations=self.Observations,
            Actions=self.Actions,
            Rewards=self.Rewards,
            pin_memory=True,
            drop_last=self.drop_last,
            batch_size=batch_size,
            num_workers=num_workers,
            one_epoch_only=(not self.known_task_boundaries_at_train_time),
        )

        # TODO: If wandb is enabled, then add customized Monitor wrapper (with
        # IterableWrapper as an additional subclass). There would then be a lot of
        # overlap between such a Monitor and the current TestEnvironment.
        if self.config.render:
            # Add a wrapper that calls 'env.render' at each step?
            env = RenderEnvWrapper(env)

        # NOTE: The transforms from `self.transforms` (the 'base' transforms) were
        # already added when creating the datasets and the CL scenario.
        val_transforms = self.transforms + self.val_transforms
        if val_transforms:
            env = TransformObservation(env, f=val_transforms)

        if self.config.device:
            # TODO: Put this before or after the image transforms?
            from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors

            env = ConvertToFromTensors(env, device=self.config.device)
            # env = TransformObservation(env, f=partial(move, device=self.config.device))
            # env = TransformReward(env, f=partial(move, device=self.config.device))

        # NOTE: We don't measure online performance on the validation set.
        # if self.monitor_training_performance:
        #     env = MeasureSLPerformanceWrapper(
        #         env,
        #         first_epoch_only=True,
        #         wandb_prefix=f"Train/Task {self.current_task_id}",
        #     )

        # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
        # when transforms don't propagate the 'dtype' field.
        env.observation_space.dtype = self.Observations
        self.val_env = env
        return self.val_env

    def test_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> ContinualSLEnvironment[Observations, Actions, Rewards]:
        """Returns a Continual SL Test environment."""
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_test:
            self.setup("test")

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        dataset = self._make_test_dataset()
        env = self.Environment(
            dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            hide_task_labels=(not self.task_labels_at_test_time),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            Observations=self.Observations,
            Actions=self.Actions,
            Rewards=self.Rewards,
            pretend_to_be_active=True,
            drop_last=self.drop_last,
            shuffle=False,
            one_epoch_only=True,
        )

        # NOTE: The transforms from `self.transforms` (the 'base' transforms) were
        # already added when creating the datasets and the CL scenario.
        test_transforms = self.transforms + self.test_transforms
        if test_transforms:
            env = TransformObservation(env, f=test_transforms)

        if self.config.device:
            # TODO: Put this before or after the image transforms?
            from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors

            env = ConvertToFromTensors(env, device=self.config.device)
            # env = TransformObservation(env, f=partial(move, device=self.config.device))
            # env = TransformReward(env, f=partial(move, device=self.config.device))

        # FIXME: Instead of trying to create a 'fake' task schedule for the test
        # environment, instead let the test environment see the task ids, (and then hide
        # them if necessary) so that it can compile the stats for each task based on the
        # task IDs of the observations.

        # TODO: Configure the 'monitoring' dir properly.
        if wandb.run:
            test_dir = wandb.run.dir
        else:
            test_dir = self.config.log_dir

        test_loop_max_steps = len(dataset) // (env.batch_size or 1)
        test_env = ContinualSLTestEnvironment(
            env,
            directory=test_dir,
            step_limit=test_loop_max_steps,
            force=True,
            config=self.config,
            video_callable=None if (wandb.run or self.config.render) else False,
        )

        # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
        # when transforms don't propagate the 'dtype' field.
        env.observation_space.dtype = self.Observations
        if self.test_env:
            self.test_env.close()
        self.test_env = test_env
        return self.test_env

    def prepare_data(self, data_dir: Path = None) -> None:
        # TODO: Pass the transformations to the CL scenario, or to the dataset?
        if data_dir is None:
            if self.config:
                data_dir = self.config.data_dir
            else:
                data_dir = Path("data")

        logger.info(f"Downloading datasets to directory {data_dir}")
        self._using_custom_envs_foreach_task = bool(self.train_datasets)
        if not self._using_custom_envs_foreach_task:
            self.train_cl_dataset = self.make_dataset(data_dir, download=True, train=True)
            self.test_cl_dataset = self.make_dataset(data_dir, download=True, train=False)
        return super().prepare_data()

    def setup(self, stage: str = None):
        if not self.has_prepared_data:
            self.prepare_data()
        super().setup(stage=stage)

        if stage not in (None, "fit", "test", "validate"):
            raise RuntimeError(f"`stage` should be 'fit', 'test', 'validate' or None.")

        if stage in (None, "fit", "validate"):
            if not self._using_custom_envs_foreach_task:
                self.train_cl_dataset = self.train_cl_dataset or self.make_dataset(
                    self.config.data_dir, download=False, train=True
                )
            nb_tasks_kwarg = {}
            if self.nb_tasks is not None:
                nb_tasks_kwarg.update(nb_tasks=self.nb_tasks)
            else:
                nb_tasks_kwarg.update(increment=self.increment)
            if not self._using_custom_envs_foreach_task:
                self.train_cl_loader = self.train_cl_loader or ClassIncremental(
                    cl_dataset=self.train_cl_dataset,
                    **nb_tasks_kwarg,
                    initial_increment=self.initial_increment,
                    transformations=[],  # NOTE: Changing this: The transforms will get added after.
                    class_order=self.class_order,
                )
            if not self.train_datasets and not self.val_datasets:
                for task_id, train_taskset in enumerate(self.train_cl_loader):
                    train_taskset, valid_taskset = split_train_val(train_taskset, val_split=0.1)
                    self.train_datasets.append(train_taskset)
                    self.val_datasets.append(valid_taskset)
                # IDEA: We could do the remapping here instead of adding a wrapper later.
                if self.shared_action_space and isinstance(self.action_space, spaces.Discrete):
                    # If we have a shared output space, then they are all mapped to [0, n_per_task]
                    self.train_datasets = list(map(relabel, self.train_datasets))
                    self.val_datasets = list(map(relabel, self.val_datasets))

        if stage in (None, "test"):
            if not self._using_custom_envs_foreach_task:
                self.test_cl_dataset = self.test_cl_dataset or self.make_dataset(
                    self.config.data_dir, download=False, train=False
                )
                self.test_class_order = self.test_class_order or self.class_order
                self.test_cl_loader = self.test_cl_loader or ClassIncremental(
                    cl_dataset=self.test_cl_dataset,
                    nb_tasks=self.nb_tasks,
                    increment=self.test_increment,
                    initial_increment=self.test_initial_increment,
                    transformations=[],  # note: not passing transforms here, they get added later
                    class_order=self.test_class_order,
                )
            if not self.test_datasets:
                # TODO: If we decide to 'shuffle' the test tasks, then store the sequence of
                # task ids in a new property, probably here.
                # self.test_task_order = list(range(len(self.test_datasets)))
                self.test_datasets = list(self.test_cl_loader)
                # IDEA: We could do the remapping here instead of adding a wrapper later.
                if self.shared_action_space and isinstance(self.action_space, spaces.Discrete):
                    # If we have a shared output space, then they are all mapped to [0, n_per_task]
                    self.test_datasets = list(map(relabel, self.test_datasets))

    def _make_train_dataset(self) -> Union[TaskSet, Dataset]:
        # NOTE: Passing the same seed to `train`/`valid`/`test` is fine, because it's
        # only used for the shuffling used to make the task boundaries smooth.
        if self.smooth_task_boundaries:
            return smooth_task_boundaries_concat(
                self.train_datasets, seed=self.config.seed if self.config else None
            )
        if self.stationary_context:
            joined_dataset = concat(self.train_datasets)
            return shuffle(joined_dataset, seed=self.config.seed)
        if self.known_task_boundaries_at_train_time:
            return self.train_datasets[self.current_task_id]
        else:
            return concatenate(self.train_datasets)

    def _make_val_dataset(self) -> Dataset:
        if self.smooth_task_boundaries:
            return smooth_task_boundaries_concat(self.val_datasets, seed=self.config.seed)
        if self.stationary_context:
            joined_dataset = concat(self.val_datasets)
            return shuffle(joined_dataset, seed=self.config.seed)
        if self.known_task_boundaries_at_train_time:
            return self.val_datasets[self.current_task_id]
        return concatenate(self.val_datasets)

    def _make_test_dataset(self) -> Dataset:
        if self.smooth_task_boundaries:
            return smooth_task_boundaries_concat(self.test_datasets, seed=self.config.seed)
        else:
            return concatenate(self.test_datasets)

    def make_dataset(
        self, data_dir: Path, download: bool = True, train: bool = True, **kwargs
    ) -> _ContinuumDataset:
        # TODO: #7 Use this method here to fix the errors that happen when
        # trying to create every single dataset from continuum.
        data_dir = Path(data_dir)

        if not data_dir.exists():
            data_dir.mkdir(parents=True, exist_ok=True)

        if self.dataset in self.available_datasets:
            dataset_class = self.available_datasets[self.dataset]
            return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)

        elif self.dataset in self.available_datasets.values():
            dataset_class = self.dataset
            return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)

        elif isinstance(self.dataset, Dataset):
            logger.info(f"Using a custom dataset {self.dataset}")
            return self.dataset

        else:
            raise NotImplementedError(self.dataset)

    @property
    def observation_space(self) -> ObservationSpace[Observations]:
        """The un-batched observation space, based on the choice of dataset and
        the transforms at `self.transforms` (which apply to the train/valid/test
        environments).

        The returned space is a TypedDictSpace, with the following properties:
        - `x`: observation space (e.g. `Image` space)
        - `task_labels`: Union[Discrete, Sparse[Discrete]]
           The task labels for each sample. When task labels are not available,
           the task labels space is Sparse, and entries will be `None`.

        """
        # TODO: Need to clean this up a bit:
        if self._using_custom_envs_foreach_task:
            x_space = get_observation_space(self.train_datasets[0])
        else:
            x_space = get_observation_space(self.dataset)

        if not self.transforms:
            # NOTE: When we don't pass any transforms, continuum scenarios still
            # at least use 'to_tensor'.
            x_space = Transforms.to_tensor(x_space)
        # apply the transforms to the observation space.
        for transform in self.transforms:
            x_space = transform(x_space)
        x_space = add_tensor_support(x_space)

        task_label_space = spaces.Discrete(self.nb_tasks)
        if not self.task_labels_at_train_time:
            task_label_space = Sparse(task_label_space, 1.0)
        task_label_space = add_tensor_support(task_label_space)

        self._observation_space = self.ObservationSpace(
            x=x_space,
            task_labels=task_label_space,
            dtype=self.Observations,
        )
        return self._observation_space

    # TODO: Add a `train_observation_space`, `train_action_space`, `train_reward_space`?

    @property
    def action_space(self) -> spaces.Discrete:
        """Action space for this setting."""
        if self._action_space:
            return self._action_space
        # Determine the action space using the right dataset.
        # (NOTE: same across train/val/test for now.)
        dataset = self.dataset
        if self._using_custom_envs_foreach_task:
            dataset = self.train_datasets[0]
        action_space = get_action_space(dataset)

        # TODO: Remove this
        if isinstance(action_space, spaces.Discrete) and self.dataset in self.base_action_spaces:
            if self.shared_action_space:
                assert isinstance(self.increment, int), (
                    "Need to have same number of classes in each task when "
                    "`shared_action_space` is true."
                )
                action_space = spaces.Discrete(self.increment)

        self._action_space = action_space
        return self._action_space
        # TODO: IDEA: Have the action space only reflect the number of 'current' classes
        # in order to create a "true" class-incremental learning setting.
        # n_classes_seen_so_far = 0
        # for task_id in range(self.current_task_id):
        #     n_classes_seen_so_far += self.num_classes_in_task(task_id)
        # return spaces.Discrete(n_classes_seen_so_far)

    @property
    def reward_space(self) -> spaces.Discrete:
        if self._reward_space:
            return self._reward_space
        # Determine the reward space using the right dataset.
        # (NOTE: same across train/val/test for now.)
        dataset = self.dataset
        if self._using_custom_envs_foreach_task:
            dataset = self.train_datasets
        reward_space = get_reward_space(dataset)

        # TODO: Remove this
        if isinstance(reward_space, spaces.Discrete) and self.dataset in self.base_reward_spaces:
            if self.shared_action_space:
                assert isinstance(self.increment, int), (
                    "Need to have same number of classes in each task when "
                    "`shared_action_space` is true."
                )
                reward_space = spaces.Discrete(self.increment)

        self._reward_space = reward_space
        return self._reward_space


def smooth_task_boundaries_concat(
    datasets: List[Dataset], seed: int = None, window_length: float = 0.03
) -> ConcatDataset:
    """TODO: Use a smarter way of mixing from one to the other?"""
    lengths = [len(dataset) for dataset in datasets]
    total_length = sum(lengths)
    n_tasks = len(datasets)

    if not isinstance(window_length, int):
        window_length = int(total_length * window_length)
    assert (
        window_length > 1
    ), f"Window length should be positive or a fraction of the dataset length. ({window_length})"

    rng = np.random.default_rng(seed)

    def option1():
        shuffled_indices = np.arange(total_length)
        for start_index in range(0, total_length - window_length + 1, window_length // 2):
            rng.shuffle(shuffled_indices[start_index : start_index + window_length])
        return shuffled_indices

    # Maybe do the same but backwards?

    # IDEA #2: Sample based on how close to the 'center' of the task we are.
    def option2():
        boundaries = np.array(list(itertools.accumulate(lengths, initial=0)))
        middles = [(start + end) / 2 for start, end in zip(boundaries[0:], boundaries[1:])]
        samples_left: Dict[int, int] = {i: length for i, length in enumerate(lengths)}
        indices_left: Dict[int, List[int]] = {
            i: list(range(boundaries[i], boundaries[i] + length))
            for i, length in enumerate(lengths)
        }

        out_indices: List[int] = []
        last_dataset_index = n_tasks - 1
        for step in range(total_length):
            if step < middles[0] and samples_left[0]:
                # Prevent sampling things from task 1 at the beginning of task 0, and
                eligible_dataset_ids = [0]
            elif step > middles[-1] and samples_left[last_dataset_index]:
                # Prevent sampling things from task N-1 at the emd of task N
                eligible_dataset_ids = [last_dataset_index]
            else:
                # 'smooth', but at the boundaries there are actually two or three datasets,
                # from future tasks even!
                eligible_dataset_ids = list(k for k, v in samples_left.items() if v > 0)
                # if len(eligible_dataset_ids) > 2:
                #     # Prevent sampling from future tasks (past the next task) when at a
                #     # boundary.
                #     left_dataset_index = min(eligible_dataset_ids)
                #     right_dataset_index = min(
                #         v for v in eligible_dataset_ids if v > left_dataset_index
                #     )
                #     eligible_dataset_ids = [left_dataset_index, right_dataset_index]

            options = np.array(eligible_dataset_ids, dtype=int)

            # Calculate the 'distance' to the center of the task's dataset.
            distances = np.abs([step - middles[dataset_index] for dataset_index in options])

            # NOTE: THis exponent is kindof arbitrary, setting it to this value because it
            # sortof works for MNIST so far.
            probs = 1 / (1 + np.abs(distances) ** 2)
            probs /= sum(probs)

            chosen_dataset = rng.choice(options, p=probs)
            chosen_index = indices_left[chosen_dataset].pop()
            samples_left[chosen_dataset] -= 1
            out_indices.append(chosen_index)

        shuffled_indices = np.array(out_indices)
        return shuffled_indices

    def option3():
        shuffled_indices = np.arange(total_length)
        for start_index in range(0, total_length - window_length + 1, window_length // 2):
            rng.shuffle(shuffled_indices[start_index : start_index + window_length])
        for start_index in reversed(range(0, total_length - window_length + 1, window_length // 2)):
            rng.shuffle(shuffled_indices[start_index : start_index + window_length])
        return shuffled_indices

    shuffled_indices = option3()

    if all(isinstance(dataset, TaskSet) for dataset in datasets):
        # Use the 'concat' from continuum, just to preserve the field/methods of a
        # TaskSet.
        joined_taskset = concat(datasets)
        return subset(joined_taskset, shuffled_indices)
    else:
        joined_dataset = ConcatDataset(datasets)
        return Subset(joined_dataset, shuffled_indices)

    return shuffled_indices


from functools import singledispatch
from typing import Sequence, overload

from .wrappers import replace_taskset_attributes

DatasetType = TypeVar("DatasetType", bound=Dataset)


@overload
def subset(dataset: TaskSet, indices: Sequence[int]) -> TaskSet:
    ...


@singledispatch
def subset(dataset: DatasetType, indices: Sequence[int]) -> Union[Subset, DatasetType]:
    raise NotImplementedError(f"Don't know how to take a subset of dataset {dataset}")
    return Subset(dataset, indices)


@subset.register
def taskset_subset(taskset: TaskSet, indices: np.ndarray) -> TaskSet:
    # x, y, t = taskset.get_raw_samples(indices)
    x, y, t = taskset.get_raw_samples(indices)
    # TODO: Not sure if/how to handle the `bounding_boxes` attribute here.
    bounding_boxes = taskset.bounding_boxes
    if bounding_boxes is not None:
        bounding_boxes = bounding_boxes[indices]
    return replace_taskset_attributes(taskset, x=x, y=y, t=t, bounding_boxes=bounding_boxes)


def random_subset(
    taskset: TaskSet, n_samples: int, seed: int = None, ordered: bool = True
) -> TaskSet:
    """Returns a random (ordered) subset of the given TaskSet."""
    rng = np.random.default_rng(seed)
    dataset_length = len(taskset)
    if n_samples > dataset_length:
        raise RuntimeError(f"Dataset has {dataset_length}, asked for {n_samples} samples.")
    indices = rng.permutation(range(dataset_length))[:n_samples]
    # indices = rng.choice(len(taskset), size=n_samples, replace=False)
    if ordered:
        indices = sorted(indices)
    assert len(indices) == n_samples
    return subset(taskset, indices)


DatasetType = TypeVar("DatasetType", bound=Dataset)


def shuffle(dataset: DatasetType, seed: int = None) -> DatasetType:
    length = len(dataset)
    rng = np.random.default_rng(seed)
    perm = rng.permutation(range(length))
    return subset(dataset, perm)


import torch
from torch import Tensor


def smart_class_prediction(
    logits: Tensor, task_labels: Tensor, setting: SLSetting, train: bool
) -> Tensor:
    """Predicts classes which are available, given the task labels."""
    unique_task_ids = set(task_labels.unique().cpu().tolist())
    classes_in_each_task = {
        task_id: setting.task_classes(task_id, train=train) for task_id in unique_task_ids
    }
    y_pred = limit_to_available_classes(logits, task_labels, classes_in_each_task)
    return y_pred


def limit_to_available_classes(
    logits: Tensor, task_labels: Tensor, classes_in_each_present_task: Dict[int, List[int]]
) -> Tensor:
    B = logits.shape[0]
    C = logits.shape[-1]

    assert logits.shape[0] == task_labels.shape[0] == B
    y_preds = []
    indices = torch.arange(C, dtype=torch.long, device=logits.device)

    elligible_masks = {
        task_id: sum(
            [indices == label for label in labels],
            start=torch.zeros([C], dtype=bool, device=logits.device),
        )
        for task_id, labels in classes_in_each_present_task.items()
    }

    y_preds = []
    # TODO: Also return the logits, so we can get a loss for the selected indices?
    # logits = []
    for logit, task_label in zip(logits, task_labels):
        t = task_label.item()
        eligible_classes_list = classes_in_each_present_task[t]
        eligible_classes = torch.as_tensor(eligible_classes_list, dtype=int, device=logits.device)

        is_eligible = elligible_masks[t]

        if not is_eligible.any():
            # Return a random prediction from the set of possible classes, since
            # the network has fewer outputs than there are classes.
            # NOTE: This can occur for instance when testing on future tasks
            # when using a MultiTask module.
            y_pred = eligible_classes[torch.randint(len(eligible_classes), (1,))]
        else:
            masked_logit = logit[is_eligible]
            y_pred_without_offset = masked_logit.argmax(-1)
            y_pred = eligible_classes[y_pred_without_offset]

        assert y_pred.item() in eligible_classes_list
        y_preds.append(y_pred.reshape(()))  # Just to make sure they all have the same shape.

    return torch.stack(y_preds)


from sequoia.common.transforms.channels import has_channels_last, has_channels_first


@has_channels_last.register(ContinualSLSetting.Observations)
def _has_channels_last(obs: ContinualSLSetting.Observations) -> bool:
    return has_channels_last(obs.x)


================================================
FILE: sequoia/settings/sl/continual/setting_test.py
================================================
import functools
from collections import Counter
from pathlib import Path
from typing import Any, ClassVar, Dict, Tuple, Type

import gym
import pytest
import torch
from sklearn.datasets import make_classification
from torch.utils.data import TensorDataset, random_split

from sequoia.common.config import Config
from sequoia.methods import RandomBaselineMethod
from sequoia.settings.base.setting_test import SettingTests
from sequoia.settings.sl.continual.setting import shuffle

from .setting import ContinualSLSetting, random_subset, smooth_task_boundaries_concat
from .wrappers import ShowLabelDistributionWrapper


def test_continuum_shuffle(config: Config):
    from continuum.datasets import MNIST
    from continuum.scenarios import ClassIncremental
    from continuum.tasks import concat

    dataset = MNIST(data_path=config.data_dir, train=True)
    cl_dataset = concat(ClassIncremental(dataset, increment=2))
    shuffled_dataset = shuffle(cl_dataset)
    assert (shuffled_dataset._y != cl_dataset._y).sum() > len(cl_dataset) / 2
    assert (shuffled_dataset._t != cl_dataset._t).sum() > len(cl_dataset) / 2


class TestContinualSLSetting(SettingTests):
    Setting: ClassVar[Type[Setting]] = ContinualSLSetting

    # The kwargs to be passed to the Setting when we want to create a 'short' setting.
    # TODO: Transform this into a fixture instead.
    fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
        dataset="mnist",
        batch_size=64,
    )

    @pytest.fixture(scope="session")
    def short_setting(self, session_config):
        kwargs = self.fast_dev_run_kwargs.copy()
        kwargs["config"] = session_config

        setting = self.Setting(**kwargs)
        setting.config = session_config
        setting.prepare_data()
        setting.setup()

        # Testing this out: Shortening the train datasets:
        setting.train_datasets = [
            random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
        ]
        setting.val_datasets = [
            random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
        ]
        setting.test_datasets = [
            random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
        ]
        assert len(setting.train_datasets) == 5
        assert len(setting.val_datasets) == 5
        assert len(setting.test_datasets) == 5
        assert all(len(dataset) == 100 for dataset in setting.train_datasets)
        assert all(len(dataset) == 100 for dataset in setting.val_datasets)
        assert all(len(dataset) == 100 for dataset in setting.test_datasets)

        # Assert that calling setup doesn't overwrite the datasets.
        setting.setup()
        assert len(setting.train_datasets) == 5
        assert len(setting.val_datasets) == 5
        assert len(setting.test_datasets) == 5
        assert all(len(dataset) == 100 for dataset in setting.train_datasets)
        assert all(len(dataset) == 100 for dataset in setting.val_datasets)
        assert all(len(dataset) == 100 for dataset in setting.test_datasets)
        return setting

    def test_shared_action_space(self, config: Config):
        kwargs = self.fast_dev_run_kwargs.copy()
        kwargs["config"] = config
        if (
            isinstance(self.Setting, functools.partial)
            and not self.Setting.args[0].shared_action_space
        ):
            # NOTE: This `self.Setting` being a partial instead of a Setting class only
            # happens in the tests for the SettingProxy.
            kwargs.update(shared_action_space=True)
        elif not self.Setting.shared_action_space:
            kwargs.update(shared_action_space=True)

        setting = self.Setting(**kwargs)
        y_counter = Counter()
        t_counter = Counter()
        test_env = setting.test_dataloader()
        for obs, rewards in test_env:
            if rewards is None:
                action = test_env.action_space.sample()
                # NOTE: On the last batch, the rewards might have a smaller batch size
                # than the action space.
                # TODO: Add tests to check that the envs can explicitly handle this, so
                # that we don't give the burden to the Method.
                rewards = test_env.send(action)

            y = rewards.y.tolist()
            t = (
                obs.task_labels.tolist()
                if obs.task_labels is not None
                else [None for _ in range(obs.x.shape[0])]
            )
            y_counter.update(y)
            t_counter.update(t)

        # This is what you get with mnist, with the default class ordering:
        # if setting.known_task_boundaries_at_train_time:
        #     # Only the first task of mnist, in this case.
        #     assert y_counter == {1: 6065, 0: 5534}

        assert y_counter == {0: 4926, 1: 5074}
        if setting.task_labels_at_test_time:
            assert t_counter == {0: 2115, 1: 2042, 3: 1986, 4: 1983, 2: 1874}
        else:
            assert t_counter == {None: 10_000}
        # assert t_counter

        # Full Train envs:
        # assert y_counter == {1: 27456, 0: 26546}
        # assert False, c

    def test_only_one_epoch(self, short_setting):
        setting = short_setting
        train_env = setting.train_dataloader()

        for _ in train_env:
            pass
        if not setting.known_task_boundaries_at_train_time:
            assert train_env.is_closed()
            with pytest.raises(gym.error.ClosedEnvironmentError):
                for _ in train_env:
                    pass
        else:
            assert not train_env.is_closed()

    @pytest.mark.no_xvfb
    @pytest.mark.timeout(20)
    @pytest.mark.skipif(
        not Path("temp").exists(),
        reason="Need temp dir for saving the figure this test creates.",
    )
    def test_show_distributions(self, config: Config):
        setting = self.Setting(dataset="mnist", config=config)
        figures_dir = Path("temp")

        # fig, axes = plt.subplots(2, 3)
        name_to_env_fn = {
            "train": setting.train_dataloader,
            "valid": setting.val_dataloader,
            "test": setting.test_dataloader,
        }
        # TODO: Maybe add these plots as part of the results for ContinualSL? How much
        # memory would actually be needed to store these here?
        for i, (name, env_fn) in enumerate(name_to_env_fn.items()):
            env = env_fn(batch_size=100, num_workers=4)
            env = ShowLabelDistributionWrapper(env, env_name=name)
            # Iterate through the env.
            for obs, rewards in env:
                if rewards is None:
                    rewards = env.send(env.action_space.sample())

            fig = env.make_figure()
            fig.set_size_inches((6, 4), forward=False)
            save_path = Path(f"{figures_dir}/{setting.get_name()}_{name}.png")
            save_path.parent.mkdir(exist_ok=True)
            fig.savefig(save_path)

        # plt.waitforbuttonpress(10)
        # plt.show()

    def test_passing_datasets_to_setting(self, config: Config):
        image_shape = (16, 16, 3)
        n_classes = 10
        datasets = [
            create_image_classification_dataset(
                image_shape=image_shape, n_classes=2, y_offset=i * 2
            )
            for i in range(5)
        ]
        train_datasets = []
        val_datasets = []
        test_datasets = []
        for dataset in datasets:
            n = len(dataset)
            n_train_val = int(n * 0.8)
            n_test = n - n_train_val
            n_train = int(n_train_val * 0.8)
            n_valid = n_train_val - n_train
            train_val_dataset, test_dataset = random_split(dataset, [n_train_val, n_test])
            train_dataset, val_dataset = random_split(train_val_dataset, [n_train, n_valid])

            train_datasets.append(train_dataset)
            val_datasets.append(val_dataset)
            test_datasets.append(test_dataset)

        setting = self.Setting(
            train_datasets=train_datasets,
            val_datasets=val_datasets,
            test_datasets=test_datasets,
            transforms=[],
            # train_transforms=[],
            # val_transforms=[],
            # test_transforms=[]
        )
        assert setting.train_datasets is train_datasets
        assert setting.val_datasets is val_datasets
        assert setting.test_datasets is test_datasets
        assert setting.nb_tasks == len(setting.train_datasets)
        assert setting.observation_space.x.shape == image_shape
        assert setting.reward_space.n == n_classes

    from sequoia.conftest import skip_param

    from .envs import CTRL_INSTALLED, CTRL_STREAMS

    @pytest.mark.skipif(not CTRL_INSTALLED, reason="Need ctrl-benchmark for this test.")
    @pytest.mark.parametrize(
        "stream",
        [
            "s_plus",
            "s_minus",
            "s_in",
            "s_out",
            "s_pl",
            skip_param("s_long", reason="Very long"),
        ],
    )
    def test_ctrl_stream_support(self, stream: str, config: Config):
        setting_kwargs = self.fast_dev_run_kwargs.copy()
        setting_kwargs["dataset"] = stream
        setting = self.Setting(**setting_kwargs)
        method = RandomBaselineMethod()
        results = setting.apply(method, config=config)
        self.assert_chance_level(setting, results=results)


def create_image_classification_dataset(
    image_shape: Tuple[int, ...],
    n_classes: int,
    n_samples_per_class: int = 100,
    y_offset: int = 0,
):
    """Copied and Adapted from
    https://github.com/ContinualAI/avalanche/blob/master/tests/unit_tests_utils.py
    """
    # n_classes = 10
    # image_shape = (16, 16, 3)
    # n_samples_per_class = 100
    n_features = np.prod(image_shape)
    dataset = make_classification(
        n_samples=n_classes * n_samples_per_class,
        n_classes=n_classes,
        n_features=n_features,
        n_informative=n_features,
        n_redundant=0,
    )
    x = torch.from_numpy(dataset[0]).reshape([-1, *image_shape]).float()
    y = torch.from_numpy(dataset[1]).long()
    # y_offset can be used to get [2,3] rather than [0,1] for instance.
    if y_offset:
        y += y_offset
    return TensorDataset(x, y)

    # train_X, test_X, train_y, test_y = train_test_split(
    #     X, y, train_size=0.6, shuffle=True, stratify=y)

    # train_dataset = TensorDataset(train_X, train_y)
    # test_dataset = TensorDataset(test_X, test_y)
    # return my_nc_benchmark


from typing import List, Tuple

import numpy as np
import pytest
from torch.utils.data import DataLoader


@pytest.mark.timeout(30)
@pytest.mark.no_xvfb
def test_concat_smooth_boundaries(config: Config):
    from continuum.datasets import MNIST
    from continuum.scenarios import ClassIncremental
    from continuum.tasks import split_train_val

    dataset = MNIST(config.data_dir, download=True, train=True)
    scenario = ClassIncremental(
        dataset,
        increment=2,
    )

    print(f"Number of classes: {scenario.nb_classes}.")
    print(f"Number of tasks: {scenario.nb_tasks}.")

    train_datasets = []
    valid_datasets = []
    for task_id, train_taskset in enumerate(scenario):
        train_taskset, val_taskset = split_train_val(train_taskset, val_split=0.1)
        train_datasets.append(train_taskset)
        valid_datasets.append(val_taskset)

    # train_datasets = [Subset(task_dataset, np.arange(20)) for task_dataset in train_datasets]
    train_dataset = smooth_task_boundaries_concat(train_datasets, seed=123)

    xs = np.arange(len(train_dataset))
    y_counters: List[Counter] = []
    t_counters: List[Counter] = []
    dataloader = DataLoader(train_dataset, batch_size=100, shuffle=False)

    for x, y, t in dataloader:
        y_count = Counter(y.tolist())
        t_count = Counter(t.tolist())

        y_counters.append(y_count)
        t_counters.append(t_count)

    classes = list(set().union(*y_counters))
    nb_classes = len(classes)
    x = np.arange(len(dataloader))

    import matplotlib.pyplot as plt

    fig, axes = plt.subplots(2)
    for label in range(nb_classes):
        y = [y_counter.get(label) for y_counter in y_counters]
        axes[0].plot(x, y, label=f"class {label}")
    axes[0].legend()
    axes[0].set_title("y")
    axes[0].set_xlabel("Batch index")
    axes[0].set_ylabel("Count in batch")

    for task_id in range(scenario.nb_tasks):
        y = [t_counter.get(task_id) for t_counter in t_counters]
        axes[1].plot(x, y, label=f"Task id {task_id}")
    axes[1].legend()
    axes[1].set_title("task_id")
    axes[1].set_xlabel("Batch index")
    axes[1].set_ylabel("Count in batch")

    plt.legend()
    # plt.waitforbuttonpress(10)
    # plt.show()


================================================
FILE: sequoia/settings/sl/continual/wrappers.py
================================================
from functools import partial, singledispatch
from itertools import accumulate
from typing import Any, Dict, List

import gym
import matplotlib.pyplot as plt
import numpy as np
import torch
from continuum import TaskSet
from torch import Tensor

from sequoia.common.gym_wrappers import IterableWrapper


@singledispatch
def relabel(data: Any, mapping: Dict[int, int] = None) -> Any:
    """Relabels the given data (from a task) so they all share the same action space."""
    raise NotImplementedError(f"Don't know how to relabel {data} of type {type(data)}")


@relabel.register
def relabel_ndarray(y: np.ndarray, mapping: Dict[int, int] = None) -> np.ndarray:
    new_y = y.copy()
    mapping = mapping or {c: i for i, c in enumerate(np.unique(y))}
    for old_label, new_label in mapping.items():
        new_y[y == old_label] = new_label
    return new_y


@relabel.register
def relabel_tensor(y: Tensor, mapping: Dict[int, int] = None) -> Tensor:
    new_y = y.copy()
    mapping = mapping or {c: i for i, c in enumerate(torch.unique(y))}
    for old_label, new_label in mapping.items():
        new_y[y == old_label] = new_label
    return new_y


@relabel.register
def relabel_taskset(task_set: TaskSet, mapping: Dict[int, int] = None) -> TaskSet:
    mapping = mapping or {c: i for i, c in enumerate(task_set.get_classes())}
    old_y = task_set._y
    new_y = relabel(old_y, mapping=mapping)
    assert not task_set.target_trsf
    # TODO: Two options here: Either create a new 'y' array, OR add a target_trsf that
    # does the remapping. Not sure if there's a benefit in doing one vs the other atm.
    # NOTE: Choosing to replace the `y` to make sure that the concatenated datasets keep
    # the transformed y.
    new_taskset = replace_taskset_attributes(task_set, y=new_y)
    return new_taskset


from sequoia.utils.generic_functions.replace import replace


@replace.register
def replace_taskset_attributes(task_set: TaskSet, **kwargs) -> TaskSet:
    new_kwargs = dict(
        x=task_set._x,
        y=task_set._y,
        t=task_set._t,
        trsf=task_set.trsf,
        target_trsf=task_set.target_trsf,
        data_type=task_set.data_type,
        bounding_boxes=task_set.bounding_boxes,
    )
    new_kwargs.update(kwargs)
    return type(task_set)(**new_kwargs)


class SharedActionSpaceWrapper(IterableWrapper):
    # """ Wrapper that gets applied to a ContinualSLEnvironment
    def __init__(self, env: gym.Env, task_classes: List[int]):
        self.task_classes = task_classes
        super().__init__(env=env, f=partial(relabel, task_classes=self.task_classes))


from collections import Counter

from .environment import ContinualSLEnvironment
from .objects import ObservationType, RewardType


class ShowLabelDistributionWrapper(IterableWrapper[ContinualSLEnvironment]):
    """Wrapper around a SL environment that shows the distribution of the labels.

    Shows the distributions of the task labels, if applicable.
    """

    def __init__(self, env: ContinualSLEnvironment, env_name: str):
        super().__init__(env=env)
        self.env_name = env_name
        # IDEA: Could use bins for continuous values ?
        # IDEA: Also use a counter for the actions?
        self.counters: Dict[str, List[Counter]] = {
            "y": [],
            "t": [],
        }

    def observation(self, observation: ObservationType) -> ObservationType:
        t = observation.task_labels
        if t is None:
            t = [None] * observation.batch_size
        if isinstance(t, Tensor):
            t = t.cpu().numpy()
        t_count = Counter(t)
        self.counters["t"].append(t_count)
        return observation

    def reward(self, reward: RewardType) -> RewardType:
        y = reward.y.cpu().numpy()
        y_count = Counter(y)
        self.counters["y"].append(y_count)
        return reward

    def make_figure(self) -> plt.Figure:
        fig: plt.Figure
        axes: List[plt.Axes]
        fig, axes = plt.subplots(len(self.counters))
        # total_length: int = sum(sum(counter.values()) for counter in self.y_counters)

        for i, (name, counters) in enumerate(self.counters.items()):
            # Values for the x axis are the number of samples seen so far for each
            # batch.
            x = list(accumulate(sum(counter.values()) for counter in counters))
            unique_values = list(sorted(set().union(*counters)))
            for label in unique_values:
                y = [counter.get(label) for counter in counters]
                axes[i].plot(x, y, label=f"{name}={label}")
            axes[i].legend()
            axes[i].set_title(f"{self.env_name} {name}")
            axes[i].set_xlabel("Batch index")
            axes[i].set_ylabel("Count in batch")

        fig.set_size_inches((6, 4), forward=False)
        fig.legend()
        return fig


================================================
FILE: sequoia/settings/sl/discrete/__init__.py
================================================
from .setting import DiscreteTaskAgnosticSLSetting


================================================
FILE: sequoia/settings/sl/discrete/setting.py
================================================
from dataclasses import dataclass

from sequoia.settings.assumptions.context_discreteness import DiscreteContextAssumption
from sequoia.settings.sl.continual import ContinualSLSetting


@dataclass
class DiscreteTaskAgnosticSLSetting(DiscreteContextAssumption, ContinualSLSetting):
    """Continual Supervised Learning Setting where there are clear task boundaries, but
    where the task information isn't available.
    """


================================================
FILE: sequoia/settings/sl/discrete/setting_test.py
================================================
from typing import Any, ClassVar, Dict, Type

from sequoia.settings.sl.continual.setting_test import (
    TestContinualSLSetting as ContinualSLSettingTests,
)

from .setting import DiscreteTaskAgnosticSLSetting


class TestDiscreteTaskAgnosticSLSetting(ContinualSLSettingTests):
    Setting: ClassVar[Type[Setting]] = DiscreteTaskAgnosticSLSetting

    # The kwargs to be passed to the Setting when we want to create a 'short' setting.
    fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
        dataset="mnist",
        batch_size=64,
    )


================================================
FILE: sequoia/settings/sl/domain_incremental/__init__.py
================================================
from .setting import DomainIncrementalSLSetting


================================================
FILE: sequoia/settings/sl/domain_incremental/setting.py
================================================
from dataclasses import dataclass

from sequoia.settings.sl.incremental.setting import IncrementalSLSetting
from sequoia.utils.utils import constant


@dataclass
class DomainIncrementalSLSetting(IncrementalSLSetting):
    """Supervised CL Setting where the input domain shifts incrementally.

    Task labels and task boundaries are given at training time, but not at test-time.
    The crucial difference between the Domain-Incremental and Class-Incremental settings
    is that the action space is smaller in domain-incremental learning, as it is a
    `Discrete(n_classes_per_task)`, rather than the `Discrete(total_classes)` in
    Class-Incremental setting.

    For example: Create a classifier for odd vs even hand-written digits. It first be
    trained on digits 0 and 1, then digits 2 and 3, then digits 4 and 5, etc.
    At evaluation time, it will be evaluated on all digits
    """

    shared_action_space: bool = constant(True)


================================================
FILE: sequoia/settings/sl/domain_incremental/setting_test.py
================================================
import itertools
from typing import Any, ClassVar, Dict, Type

import numpy as np
from gym import spaces
from gym.spaces import Discrete

from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.spaces import Image, TypedDictSpace
from sequoia.settings.sl.incremental.setting_test import (
    TestIncrementalSLSetting as IncrementalSLSettingTests,
)

from .setting import DomainIncrementalSLSetting


class TestDiscreteTaskAgnosticSLSetting(IncrementalSLSettingTests):
    Setting: ClassVar[Type[Setting]] = DomainIncrementalSLSetting

    # The kwargs to be passed to the Setting when we want to create a 'short' setting.
    fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
        dataset="mnist",
        batch_size=64,
    )

    # Override how we measure 'chance' accuracy for DomainIncrementalSetting.
    def assert_chance_level(
        self,
        setting: DomainIncrementalSLSetting,
        results: DomainIncrementalSLSetting.Results,
    ):
        assert isinstance(setting, DomainIncrementalSLSetting), setting
        assert isinstance(results, DomainIncrementalSLSetting.Results), results
        # TODO: Remove this assertion:
        assert isinstance(setting.action_space, spaces.Discrete)
        # TODO: This test so far needs the 'N' to be the number of classes in total,
        # not the number of classes per task.
        num_classes = setting.action_space.n  # <-- Should be using this instead.

        average_accuracy = results.objective
        # Calculate the expected 'average' chance accuracy.
        # We assume that there is an equal number of classes in each task.
        chance_accuracy = 1 / num_classes
        assert 0.5 * chance_accuracy <= average_accuracy <= 1.5 * chance_accuracy

        for i, metric in enumerate(results.final_performance_metrics):
            assert isinstance(metric, ClassificationMetrics)
            # TODO: Same as above: Should be using `n_classes_per_task` or something
            # like it instead.
            chance_accuracy = 1 / num_classes

            task_accuracy = metric.accuracy
            # FIXME: Look into this, we're often getting results substantially
            # worse than chance, and to 'make the tests pass' (which is bad)
            # we're setting the lower bound super low, which makes no sense.
            assert 0.25 * chance_accuracy <= task_accuracy <= 2.1 * chance_accuracy


def test_domain_incremental_mnist_setup():
    setting = DomainIncrementalSLSetting(
        dataset="mnist",
        increment=2,
    )
    setting.prepare_data(data_dir="data")
    setting.setup()
    assert setting.observation_space == TypedDictSpace(
        x=Image(0.0, 1.0, (3, 28, 28), np.float32),
        task_labels=Discrete(5),
        dtype=setting.Observations,
    )
    assert setting.observation_space.dtype == setting.Observations
    assert setting.action_space == spaces.Discrete(2)
    assert setting.reward_space == spaces.Discrete(2)

    for i in range(setting.nb_tasks):
        setting.current_task_id = i
        batch_size = 5
        train_loader = setting.train_dataloader(batch_size=batch_size)

        for j, (observations, rewards) in enumerate(itertools.islice(train_loader, 100)):
            x = observations.x
            t = observations.task_labels
            y = rewards.y
            print(i, j, y, t)
            assert x.shape == (batch_size, 3, 28, 28)
            assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
            assert all(t == i)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            rewards_ = train_loader.send([4 for _ in range(batch_size)])
            assert (rewards.y == rewards_.y).all()

        train_loader.close()

        test_loader = setting.test_dataloader(batch_size=batch_size)
        for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)):
            assert rewards is None

            x = observations.x
            t = observations.task_labels
            assert t is None
            assert x.shape == (batch_size, 3, 28, 28)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            rewards = test_loader.send([0 for _ in range(batch_size)])
            assert rewards is not None
            y = rewards.y
            assert ((0 <= y) & (y < setting.n_classes_per_task)).all()


================================================
FILE: sequoia/settings/sl/environment.py
================================================
"""TODO: Creates a Gym Environment (and DataLoader) from a traditional
Supervised dataset. 
"""

from collections import deque
from typing import *

import gym
import numpy as np
from gym import spaces
from gym.vector.utils import batch_space
from torch import Tensor
from torch.utils.data import DataLoader, Dataset, IterableDataset
from torch.utils.data.dataloader import _BaseDataLoaderIter

from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.gym_wrappers.utils import tile_images
from sequoia.common.spaces import Image
from sequoia.common.transforms import Transforms
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import (
    Actions,
    ActionType,
    Observations,
    ObservationType,
    Rewards,
    RewardType,
)
from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


class PassiveEnvironment(
    DataLoader,
    Environment[Tuple[ObservationType, Optional[ActionType]], ActionType, RewardType],
):
    """Environment in which actions have no influence on future observations.

    Can either be iterated on like a normal DataLoader, in which case it gives
    back the observation and the reward at the same time, or as a gym
    Environment, in which case it gives the rewards and the next batch of
    observations once an action is given.

    Normal supervised datasets such as Mnist, ImageNet, etc. fit under this
    category. Similarly to Environment, this just adds some methods on top of
    the usual PyTorch DataLoader.
    """

    passive: ClassVar[bool] = True

    metadata = {"render.modes": ["rgb_array", "human"]}

    def __init__(
        self,
        dataset: Union[IterableDataset, Dataset],
        split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None,
        observation_space: gym.Space = None,
        action_space: gym.Space = None,
        reward_space: gym.Space = None,
        n_classes: int = None,
        pretend_to_be_active: bool = False,
        strict: bool = False,
        drop_last: bool = False,
        **kwargs,
    ):
        """Creates the DataLoader/Environment for the given dataset.

        Parameters
        ----------
        dataset : Union[IterableDataset, Dataset]
            The dataset to iterate on. Should ideally be indexable (a Map-style
            dataset).

        split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional
            A function to call on each item in the dataset in order to split it into
            Observations and Rewards, by default None, in which case we assume that the
            dataset items are tuples of length 2.

        observation_space : gym.Space, optional
            The single (non-batched) observation space. Default to `None`, in which case
            this will try to infer the shape of the space using the first item in the
            dataset.

        action_space : gym.Space, optional
            The non-batched action space. Defaults to None, in which case the
            `n_classes` argument must be passed, and the action space is assumed to be
            discrete (i.e. that the loader is for a classification dataset).

        reward_space : gym.Space, optional
            The non-batched reward (label) space. Defaults to `None`, in which case it
            will be the same as the action space (as is the case in classification).

        n_classes : int, optional
            Number of classes in the dataset. Used in case `action_space` isn't passed.
            Defaults to `None`.

        pretend_to_be_active : bool, optional
            Wether to withhold the rewards (labels) from the batches when being
            iterated on like the usual dataloader, and to only give them back
            after an action is received through the 'send' method. False by
            default, in which case this behaves exactly as a normal dataloader
            when being iterated on.

            When False, the batches yielded by this dataloader will be of the form
            `Tuple[Observations, Rewards]` (as usual in SL).
            However, when set to True, the batches will be `Tuple[Observations, None]`!
            Rewards will then be returned by the environment when an action is passed to
            the Send method.

        strict : bool, optional
            [description], by default False

        # Examples:
        ```python
        train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10)

        # The usual Dataloader-style:
        for x, y in train_env:
            # train as usual
            (...)

        # OpenAI Gym style:
        for episode in range(5):
            # NOTE: "episode" in RL is an "epoch" in SL:
            obs = train_env.reset()
            done = False
            while not done:
                actions = train_env.action_space.sample()
                obs, rewards, done, info = train_env.step(actions)
        ```
        """
        super().__init__(dataset=dataset, drop_last=drop_last, **kwargs)
        self.split_batch_fn = split_batch_fn

        # TODO: When the spaces aren't passed explicitly, assumes a classification dataset.
        if not observation_space:
            # NOTE: Assuming min/max of 0 and 1 respectively, but could actually use
            # min_max of the dataset samples too.
            first_item = self.dataset[0]
            if isinstance(first_item, tuple):
                x, *_ = first_item
            else:
                assert isinstance(first_item, (np.ndarray, Tensor))
                x = first_item
            observation_space = Image(0.0, 1.0, x.shape)
        if not action_space:
            assert n_classes, "must pass either `action_space`, or `n_classes` for now"
            action_space = spaces.Discrete(n_classes)
        elif isinstance(action_space, spaces.Discrete):
            n_classes = action_space.n

        if not reward_space:
            # Assuming a classification dataset by default:
            # (action space = reward space = Discrete(n_classes))
            reward_space = action_space

        assert observation_space
        assert action_space
        assert reward_space

        self.single_observation_space: gym.Space = observation_space
        self.single_action_space: gym.Space = action_space
        self.single_reward_space: gym.Space = reward_space

        if self.batch_size:
            observation_space = batch_space(observation_space, self.batch_size)
            action_space = batch_space(action_space, self.batch_size)
            reward_space = batch_space(reward_space, self.batch_size)

        self.observation_space: gym.Space = add_tensor_support(observation_space)
        self.action_space: gym.Space = add_tensor_support(action_space)
        self.reward_space: gym.Space = add_tensor_support(reward_space)

        self.pretend_to_be_active = pretend_to_be_active
        self._strict = strict
        self._reward_queue = deque(maxlen=10)

        self.n_classes: Optional[int] = n_classes
        self._iterator: Optional[_BaseDataLoaderIter] = None
        # NOTE: These here are never processed with self.observation or self.reward.
        self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None
        self._done: Optional[bool] = None
        self._is_closed: bool = False

        self._action: Optional[ActionType] = None
        # from gym.envs.classic_control.rendering import SimpleImageViewer
        self.viewer = None

    def is_closed(self) -> bool:
        return self._is_closed

    def reset(self) -> ObservationType:
        """Resets the env by deleting and re-creating the dataloader iterator.

        TODO: This might be pretty expensive, since it's maybe re-creating all the
        worker processes. There might be an easier way of going about this.

        Returns the first batch of observations.
        """
        if self._is_closed:
            raise gym.error.ClosedEnvironmentError("Can't reset: Env is closed.")
        self._iterator = super().__iter__()
        self._previous_batch = None
        self._current_batch = self.get_next_batch()
        self._done = False
        obs = self._current_batch[0]
        return self.observation(obs)

    def close(self) -> None:
        if not self._is_closed:
            if self.viewer:
                self.viewer.close()
            if self.num_workers > 0 and self._iterator:
                self._iterator._shutdown_workers()
            self._is_closed = True

    def __del__(self):
        if not self._is_closed:
            self.close()

    def render(self, mode: str = "rgb_array") -> np.ndarray:
        observations = self._current_batch[0]
        if isinstance(observations, Observations):
            image_batch = observations.x
        else:
            assert isinstance(observations, Tensor)
            image_batch = observations
        if isinstance(image_batch, Tensor):
            image_batch = image_batch.cpu().numpy()

        if self.batch_size:
            image_batch = tile_images(image_batch)

        image_batch = Transforms.channels_last_if_needed(image_batch)
        image_batch = Transforms.three_channels(image_batch)
        assert image_batch.shape[-1] in {3, 4}, image_batch.shape
        if image_batch.dtype == np.float32:
            assert (0 <= image_batch).all() and (image_batch <= 1).all()
            image_batch = (256 * image_batch).astype(np.uint8)
        assert image_batch.dtype == np.uint8

        if mode == "rgb_array":
            # NOTE: Need to create a single image, channels_last format, and
            # possibly even of dtype uint8, in order for things like Monitor to
            # work.
            return image_batch

        if mode == "human":
            # return plt.imshow(image_batch)
            if self.viewer is None:
                display = None
                # TODO: There seems to be a bit of a bug, tests sometime fail because
                # "Can't connect to display: None" etc.
                from gym.utils import pyglet_rendering
                # from pyvirtualdisplay import Display
                # display = Display(visible=0, size=(1366, 768))
                # display.start()
                self.viewer = pyglet_rendering.SimpleImageViewer()

            self.viewer.imshow(image_batch)
            return self.viewer.isopen

        raise NotImplementedError(f"Unsuported mode {mode}")

    def get_next_batch(self) -> Tuple[ObservationType, RewardType]:
        """Gets the next batch from the underlying dataset.

        Uses the `split_batch_fn`, if needed. Does NOT apply the self.observation
        and self.reward methods.

        Returns
        -------
        Tuple[ObservationType, RewardType]
            [description]
        """
        if self._is_closed:
            raise gym.error.ClosedEnvironmentError("Can't get the next batch: Env is closed.")
        if self._iterator is None:
            self._iterator = super().__iter__()
        try:
            batch = next(self._iterator)
        except StopIteration:
            batch = None

        if self.split_batch_fn and batch is not None:
            batch = self.split_batch_fn(batch)
        return batch
        # obs, reward = batch
        # return self.observation(obs), self.reward(reward)

    def step(self, action: ActionType) -> Tuple[ObservationType, RewardType, bool, Dict]:
        if self._is_closed:
            raise gym.error.ClosedEnvironmentError("Can't step on a closed env.")
        if self._done is None:
            raise gym.error.ResetNeeded("Need to reset the env before calling step.")
        if self._done:
            raise gym.error.ResetNeeded("Need to reset the env since it is done.")

        # Transform the Action, if needed:
        action = self.action(action)

        # NOTE: This prev/current/next setup is so we can give the right 'done'
        # signal.
        self._previous_batch = self._current_batch
        if self._next_batch is None:
            # This should only ever happen right after resetting.
            self._next_batch = self.get_next_batch()
        self._current_batch = self._next_batch
        self._next_batch = self.get_next_batch()
        # self._next_batch = self._observations, self._rewards

        assert self._previous_batch is not None

        # TODO: Return done=True when the iterator is exhausted?
        self._done = self._next_batch is None
        obs = self._current_batch[0]
        reward = self._previous_batch[1]
        # Empty for now I guess?
        info = {}
        return obs, reward, self._done, info

    def action(self, action: ActionType) -> ActionType:
        """Transform the action, if needed.

        Parameters
        ----------
        action : ActionType
            [description]

        Returns
        -------
        ActionType
            [description]
        """
        return action

    def observation(self, observation: ObservationType) -> ObservationType:
        """Transform the observation, if needed.

        Parameters
        ----------
        observation : ObservationType
            [description]

        Returns
        -------
        ObservationType
            [description]
        """
        return observation

    def reward(self, reward: RewardType) -> RewardType:
        """Transform the reward, if needed.

        Parameters
        ----------
        reward : RewardType
            [description]

        Returns
        -------
        RewardType
            [description]
        """
        return reward

    def get_info(self) -> Dict:
        """Returns the dict to be returned as the 'info' in step().

        IDEA: We could subclass this to change whats in the 'info' dict, maybe
        add some task information?

        Returns
        -------
        Dict
            [description]
        """
        return {}

    def __iter__(self) -> Iterable[Tuple[ObservationType, Optional[RewardType]]]:
        """Iterate over the dataset, yielding batches of Observations and
        Rewards, just like a regular DataLoader.
        """
        # if self.split_batch_fn:
        #     return map(self.split_batch_fn, super().__iter__())
        # else:
        #     return super().__iter__()
        if self._is_closed:
            raise gym.error.ClosedEnvironmentError("Can't iterate over closed env.")

        for batch in super().__iter__():

            if self.split_batch_fn:
                observations, rewards = self.split_batch_fn(batch)
            else:
                if len(batch) != 2:
                    raise RuntimeError(
                        f"You need to pass a `split_batch_fn` to create "
                        f"observations and rewards, since batch doesn't have "
                        f"2 items: {batch}"
                    )
                observations, rewards = batch

            # Apply any transformations (in case this is wrapped with
            # TransformObservation or something similar)
            self._observations = self.observation(observations)
            self._rewards = self.reward(rewards)

            self._previous_batch = self._current_batch
            self._current_batch = (self._observations, self._rewards)

            if self.pretend_to_be_active:
                self._action = None
                self._reward_queue.append(self._rewards)
                yield self._observations, None
                if self._action is None:
                    if self._strict:
                        # IDEA: yield the same observation, as long as we dont receive an action.
                        raise RuntimeError("Need to send an action between each observations.")
                    logger.warning("Didn't receive an action, rewards will be delayed!.")
            else:
                yield self._observations, self._rewards

    def send(self, action: Actions) -> Rewards:
        """Return the last latch of rewards from the dataset (which were
        withheld if in 'active' mode)
        """
        if self.pretend_to_be_active:
            self._action = action
            return self._reward_queue.popleft()
        else:
            # NOTE: What about sending the reward as well this way?
            return self._rewards


================================================
FILE: sequoia/settings/sl/environment_test.py
================================================
from typing import ClassVar, Iterable, Tuple, Type

import gym
import numpy as np
import pytest
import torch
from gym import spaces
from torch import Tensor
from torch.utils.data import Subset, TensorDataset
from torchvision.datasets import MNIST

from sequoia.common.gym_wrappers import TransformObservation
from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms

from .environment import PassiveEnvironment


def check_env(env: PassiveEnvironment):
    """Perform a step gym-style and dataloader-style and check that items
    fit their respective spaces.
    """
    reset_obs = env.reset()
    # Test out the reset & step methods (gym style)
    assert reset_obs in env.observation_space, reset_obs.shape
    assert env.observation_space.sample() in env.observation_space
    assert env.action_space.sample() in env.action_space
    assert env.reward_space == env.action_space
    step_obs, step_rewards, done, info = env.step(env.action_space.sample())
    assert step_obs in env.observation_space
    assert step_rewards in env.reward_space
    # TODO: Should passive environments return a single 'done' value? or a list
    # like vectorized environments in RL?
    assert not done  # shouldn't be `done`.

    for iter_obs, iter_rewards in env:
        assert iter_obs in env.observation_space, iter_obs.shape
        assert iter_rewards in env.reward_space
        break
    else:
        assert False, "should have iterated"


class TestPassiveEnvironment:
    # NOTE: Defining tests in a class like this so we can reuse them while changing some
    # component, for example in the case of `env_proxy_test.py`.
    PassiveEnvironment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment

    @pytest.fixture(scope="session")
    def mnist_dataset(self):
        transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
        dataset = MNIST("data", transform=transforms)
        return dataset

    def test_passive_environment_as_dataloader(self, mnist_dataset):
        batch_size = 1
        transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
        dataset = mnist_dataset
        obs_space = Image(0, 255, (1, 28, 28), np.uint8)
        obs_space = transforms(obs_space)

        env: Iterable[Tuple[Tensor, Tensor]] = self.PassiveEnvironment(
            dataset,
            batch_size=batch_size,
            n_classes=10,
            observation_space=obs_space,
        )

        for x, y in env:
            assert x.shape == (batch_size, 3, 28, 28)
            x = x.permute(0, 2, 3, 1)
            assert y.tolist() == [5]
            break

            # reward = env.send(4)
            # assert reward is None, reward
            # plt.imshow(x[0])
            # plt.title(f"y: {y[0]}")
            # plt.waitforbuttonpress(10)

    def test_mnist_as_gym_env(self, mnist_dataset):
        # from continuum.datasets import MNIST
        dataset = mnist_dataset

        batch_size = 4
        env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)

        assert env.observation_space.shape == (batch_size, 3, 28, 28)
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)

        env.seed(123)
        obs = env.reset()
        assert obs.shape == (batch_size, 3, 28, 28)

        for i in range(10):
            obs, reward, done, info = env.step(env.action_space.sample())
            assert obs.shape == (batch_size, 3, 28, 28)
            assert reward.shape == (batch_size,)
            assert not done
        env.close()

    def test_env_gives_done_on_last_item(self):
        # from continuum.datasets import MNIST
        max_samples = 100
        batch_size = 1
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        dataset = Subset(dataset, list(range(max_samples)))

        env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)

        assert env.observation_space.shape == (batch_size, 3, 28, 28)
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)

        env.seed(123)
        obs = env.reset()
        assert obs.shape == (batch_size, 3, 28, 28)
        # Starting at 1 since reset() gives one observation already.
        for i in range(1, max_samples):
            obs, reward, done, info = env.step(env.action_space.sample())
            assert obs.shape == (batch_size, 3, 28, 28)
            assert reward.shape == (batch_size,)
            assert done == (i == max_samples - 1), i
            if done:
                break
        else:
            assert False, "Should have reached done=True!"
        assert i == max_samples - 1
        env.close()

    def test_env_done_works_with_batch_size(self):
        # from continuum.datasets import MNIST
        max_samples = 100
        batch_size = 5
        max_batches = max_samples // batch_size
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        dataset = Subset(dataset, list(range(max_samples)))

        env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)

        assert env.observation_space.shape == (batch_size, 3, 28, 28)
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)

        env.seed(123)
        obs = env.reset()
        assert obs.shape == (batch_size, 3, 28, 28)
        # Starting at 1 since reset() gives one observation already.
        for i in range(1, max_batches):

            obs, reward, done, info = env.step(env.action_space.sample())
            assert obs.shape == (batch_size, 3, 28, 28)
            assert reward.shape == (batch_size,)
            assert done == (i == max_batches - 1), i
            if done:
                break
        else:
            assert False, "Should have reached done=True!"
        assert i == max_batches - 1
        env.close()

    def test_multiple_epochs_env(self):
        max_epochs = 3
        max_samples = 100
        batch_size = 5
        max_batches = max_samples // batch_size
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        dataset = Subset(dataset, list(range(max_samples)))

        env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)

        assert env.observation_space.shape == (batch_size, 3, 28, 28)
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)

        env.seed(123)
        total_steps = 0
        for epoch in range(max_epochs):
            obs = env.reset()
            total_steps += 1

            assert obs.shape == (batch_size, 3, 28, 28)
            # Starting at 1 since reset() gives one observation already.
            for i in range(1, max_batches):
                obs, reward, done, info = env.step(env.action_space.sample())
                assert obs.shape == (batch_size, 3, 28, 28)
                assert reward.shape == (batch_size,)
                assert done == (i == max_batches - 1), i
                total_steps += 1
                if done:
                    break
            else:
                assert False, "Should have reached done=True!"
            assert i == max_batches - 1
        assert total_steps == max_batches * max_epochs

        env.close()

    def test_cant_iterate_after_closing_passive_env(self):
        max_epochs = 3
        max_samples = 200
        batch_size = 5
        max_batches = max_samples // batch_size
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        dataset = Subset(dataset, list(range(max_samples)))

        env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size, num_workers=4)

        assert env.observation_space.shape == (batch_size, 3, 28, 28)
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)
        total_steps = 0
        for epoch in range(max_epochs):
            for obs, reward in env:
                assert obs.shape == (batch_size, 3, 28, 28)
                assert reward.shape == (batch_size,)
                total_steps += 1
        assert total_steps == max_batches * max_epochs

        env.close()

        with pytest.raises(gym.error.ClosedEnvironmentError):
            for _ in zip(range(3), env):
                pass

        with pytest.raises(gym.error.ClosedEnvironmentError):
            env.reset()

        with pytest.raises(gym.error.ClosedEnvironmentError):
            env.get_next_batch()

        with pytest.raises(gym.error.ClosedEnvironmentError):
            env.step(env.action_space.sample())

    def test_multiple_epochs_dataloader(self):
        """Test that we can iterate on the dataloader more than once."""
        max_epochs = 3
        max_samples = 200
        batch_size = 5
        max_batches = max_samples // batch_size
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        dataset = Subset(dataset, list(range(max_samples)))

        env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)

        assert env.observation_space.shape == (batch_size, 3, 28, 28)
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)
        total_steps = 0
        for epoch in range(max_epochs):
            for obs, reward in env:
                assert obs.shape == (batch_size, 3, 28, 28)
                assert reward.shape == (batch_size,)
                total_steps += 1

        assert total_steps == max_batches * max_epochs

    def test_multiple_epochs_dataloader_with_split_batch_fn(self):
        """Test that we can iterate on the dataloader more than once."""
        max_epochs = 3
        max_samples = 200
        batch_size = 5

        def split_batch_fn(batch):
            (
                x,
                y,
            ) = batch
            # some dummy function.
            return torch.zeros_like(x), y

        max_batches = max_samples // batch_size
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        dataset = Subset(dataset, list(range(max_samples)))

        env = self.PassiveEnvironment(
            dataset, n_classes=10, batch_size=batch_size, split_batch_fn=split_batch_fn
        )

        assert env.observation_space.shape == (batch_size, 3, 28, 28)
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)
        total_steps = 0
        for epoch in range(max_epochs):
            for obs, reward in env:
                assert obs.shape == (batch_size, 3, 28, 28)
                assert torch.all(obs == 0)
                assert reward.shape == (batch_size,)
                total_steps += 1

        assert total_steps == max_batches * max_epochs

    def test_env_requires_reset_before_step(self):
        # from continuum.datasets import MNIST
        max_samples = 100
        batch_size = 5
        max_batches = max_samples // batch_size
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        dataset = Subset(dataset, list(range(max_samples)))

        env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)

        with pytest.raises(gym.error.ResetNeeded):
            env.step(env.action_space.sample())

    def test_split_batch_fn(self):
        # from continuum.datasets import MNIST
        batch_size = 5
        max_batches = 10

        def split_batch_fn(
            batch: Tuple[Tensor, Tensor, Tensor]
        ) -> Tuple[Tuple[Tensor, Tensor], Tensor]:
            x, y, t = batch
            return (x, t), y

        # dataset = MNIST("data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]))
        from continuum import ClassIncremental
        from continuum.datasets import MNIST

        scenario = ClassIncremental(
            MNIST("data", download=True, train=True),
            increment=2,
            transformations=Compose([Transforms.to_tensor, Transforms.three_channels]),
        )

        classes_per_task = scenario.nb_classes // scenario.nb_tasks
        print(f"Number of classes per task {classes_per_task}.")
        for i, task_dataset in enumerate(scenario):
            env = self.PassiveEnvironment(
                task_dataset,
                n_classes=classes_per_task,
                batch_size=batch_size,
                split_batch_fn=split_batch_fn,
                # Need to pass the observation space, in this case.
                observation_space=spaces.Dict(
                    x=spaces.Box(low=0, high=1, shape=(3, 28, 28)),
                    t=spaces.Discrete(scenario.nb_tasks),  # task label
                ),
                action_space=spaces.Box(
                    low=np.array([i * classes_per_task]),
                    high=np.array([(i + 1) * classes_per_task]),
                    dtype=int,
                ),
            )
            assert spaces.Box(
                low=np.array([i * classes_per_task]),
                high=np.array([(i + 1) * classes_per_task]),
                dtype=int,
            ).shape == (1,)
            assert isinstance(env.observation_space["x"], spaces.Box)
            assert env.observation_space["x"].shape == (batch_size, 3, 28, 28)
            assert env.observation_space["t"].shape == (batch_size,)
            assert env.action_space.shape == (batch_size, 1)
            assert env.reward_space.shape == (batch_size, 1)

            env.seed(123)

            obs = env.reset()
            assert len(obs) == 2
            x, t = obs
            assert x.shape == (batch_size, 3, 28, 28)
            assert t.shape == (batch_size,)

            obs, reward, done, info = env.step(env.action_space.sample())
            assert x.shape == (batch_size, 3, 28, 28)
            assert t.shape == (batch_size,)
            assert reward.shape == (batch_size,)
            assert not done

            env.close()

    def test_observation_wrapper_applied_to_passive_environment(self):
        """Test that when we apply a gym wrapper to a PassiveEnvironment, it also
        affects the observations / actions / rewards produced when iterating on the
        env.
        """
        batch_size = 5

        transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
        dataset = MNIST("data", transform=transforms)
        obs_space = Image(0, 255, (1, 28, 28), np.uint8)
        obs_space = transforms(obs_space)
        dataset.classes
        env = self.PassiveEnvironment(
            dataset,
            n_classes=10,
            batch_size=batch_size,
            observation_space=obs_space,
        )

        assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space == env.action_space

        env.seed(123)

        check_env(env)

        # Apply a transformation that changes the observation space.
        env = TransformObservation(env=env, f=Compose([Transforms.resize_64x64]))
        assert env.observation_space == Image(0, 1, (batch_size, 3, 64, 64))
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space.shape == (batch_size,)

        env.seed(123)
        check_env(env)

        env.close()

        # from continuum import ClassIncremental
        # from continuum.datasets import MNIST
        # from continuum.tasks import split_train_val

    def test_passive_environment_interaction(self):
        """Test the gym.Env-style interaction with a PassiveEnvironment."""
        batch_size = 5
        transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        max_samples = 100
        dataset = Subset(dataset, list(range(max_samples)))

        obs_space = Image(0, 255, (1, 28, 28), np.uint8)
        obs_space = transforms(obs_space)
        env = self.PassiveEnvironment(
            dataset,
            n_classes=10,
            batch_size=batch_size,
            observation_space=obs_space,
            pretend_to_be_active=True,
        )

        assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space == env.action_space
        env.seed(123)
        obs = env.reset()
        assert obs in env.observation_space

        obs, reward, done, info = env.step(env.action_space.sample())
        assert reward is not None
        assert obs in env.observation_space

        for i, (obs, reward) in enumerate(env):
            assert obs in env.observation_space
            assert reward is None
            other_reward = env.send(env.action_space.sample())
            assert other_reward is not None
        assert i == max_samples // batch_size - 1

    def test_passive_environment_without_pretend_to_be_active(self):
        """Test the gym.Env-style interaction with a PassiveEnvironment."""
        batch_size = 5
        transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        max_samples = 100
        dataset = Subset(dataset, list(range(max_samples)))

        obs_space = Image(0, 255, (1, 28, 28), np.uint8)
        obs_space = transforms(obs_space)
        env = self.PassiveEnvironment(
            dataset,
            n_classes=10,
            batch_size=batch_size,
            observation_space=obs_space,
            pretend_to_be_active=False,
        )
        assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
        assert env.action_space.shape == (batch_size,)
        assert env.reward_space == env.action_space
        env.seed(123)
        obs = env.reset()
        assert obs in env.observation_space

        obs, reward, done, info = env.step(env.action_space.sample())
        assert reward is not None

        for i, (obs, reward) in enumerate(env):
            assert reward is not None
            other_reward = env.send(env.action_space.sample())
            assert (other_reward == reward).all()
        assert i == max_samples // batch_size - 1

    def test_passive_environment_needs_actions_to_be_sent(self):
        """Test the 'active dataloader' style interaction."""
        batch_size = 10
        transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
        dataset = MNIST(
            "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
        )
        max_samples = 105
        dataset = Subset(dataset, list(range(max_samples)))

        obs_space = Image(0, 255, (1, 28, 28), np.uint8)
        obs_space = transforms(obs_space)
        env = PassiveEnvironment(
            dataset,
            n_classes=10,
            batch_size=batch_size,
            observation_space=obs_space,
            pretend_to_be_active=True,
            strict=True,
        )

        with pytest.raises(RuntimeError):
            for i, (obs, _) in enumerate(env):
                pass

        env = self.PassiveEnvironment(
            dataset,
            n_classes=10,
            batch_size=batch_size,
            observation_space=obs_space,
            pretend_to_be_active=True,
        )
        for i, (obs, _) in enumerate(env):
            assert isinstance(obs, Tensor)
            action = env.action_space.sample()[: obs.shape[0]]
            rewards = env.send(action)
            assert rewards is not None
            assert rewards.shape[0] == action.shape[0]

    def test_passive_environment_active_mode_action_reward_match(self):
        """Test the 'active dataloader' style interaction."""
        batch_size = 10
        max_samples = 105
        dataset = TensorDataset(
            torch.arange(max_samples).reshape([max_samples, 1, 1, 1])
            * torch.ones([max_samples, 3, 32, 32]),
            torch.arange(max_samples),
        )
        dataset = Subset(dataset, list(range(max_samples)))
        env = self.PassiveEnvironment(
            dataset,
            n_classes=max_samples,
            batch_size=batch_size,
            pretend_to_be_active=True,
        )

        for i, (obs, _) in enumerate(env):
            print(i)
            expected_obs = torch.arange(i * batch_size, (i + 1) * batch_size)
            expected_obs = expected_obs[: obs.shape[0]]
            assert (obs == expected_obs.reshape([obs.shape[0], 1, 1, 1])).all()
            action = torch.arange(i * batch_size, (i + 1) * batch_size, dtype=int)
            action = action[: obs.shape[0]]
            rewards = env.send(action)
            assert (rewards == action).all()


================================================
FILE: sequoia/settings/sl/incremental/__init__.py
================================================
from .environment import IncrementalSLEnvironment
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .results import IncrementalSLResults
from .setting import IncrementalSLSetting

Environment = IncrementalSLEnvironment
ClassIncrementalSetting = IncrementalSLSetting


================================================
FILE: sequoia/settings/sl/incremental/environment.py
================================================
from typing import Any, Callable, Tuple, Union

import gym
from gym import spaces
from torch.utils.data import Dataset, IterableDataset

from sequoia.common.spaces import TypedDictSpace
from sequoia.settings.base.objects import Rewards as BaseRewards
from sequoia.settings.sl.continual.environment import ContinualSLEnvironment
from sequoia.utils.logging_utils import get_logger

from ..continual.environment import ContinualSLTestEnvironment
from .objects import Actions, ActionType, Observations, ObservationType, RewardType

logger = get_logger(__name__)


class IncrementalSLEnvironment(ContinualSLEnvironment[ObservationType, ActionType, RewardType]):
    def __init__(
        self,
        dataset: Union[Dataset, IterableDataset],
        hide_task_labels: bool = True,
        observation_space: TypedDictSpace[ObservationType] = None,
        action_space: gym.Space = None,
        reward_space: gym.Space = None,
        split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None,
        pretend_to_be_active: bool = False,
        strict: bool = False,
        one_epoch_only: bool = False,
        **kwargs,
    ):
        super().__init__(
            dataset,
            hide_task_labels=hide_task_labels,
            observation_space=observation_space,
            action_space=action_space,
            reward_space=reward_space,
            split_batch_fn=split_batch_fn,
            pretend_to_be_active=pretend_to_be_active,
            strict=strict,
            one_epoch_only=one_epoch_only,
            **kwargs,
        )


import bisect
import warnings
from typing import Any, Dict

import numpy as np
import torch
from torch.nn import functional as F

from sequoia.common.gym_wrappers.utils import tile_images
from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.transforms import Transforms
from sequoia.settings.assumptions.iid_results import TaskResults
from sequoia.settings.assumptions.incremental import TaskSequenceResults

from .results import IncrementalSLResults


class IncrementalSLTestEnvironment(ContinualSLTestEnvironment):
    def __init__(self, env: gym.Env, *args, task_schedule: Dict[int, Any] = None, **kwargs):
        super().__init__(env, *args, **kwargs)
        self._steps = 0
        # TODO: Maybe rework this so we don't depend on the test phase being one task at
        # a time, instead store the test metrics in the task corresponding to the
        # task_label in the observations.
        # BUG: The problem is, right now we're depending on being passed the
        # 'task schedule', which we then use to get the task ids. This
        # is actually pretty bad, because if the class ordering was changed between
        # training and testing, then, this wouldn't actually report the correct results!
        self.task_schedule = task_schedule or {}
        self.task_steps = sorted(self.task_schedule.keys())
        self.results: TaskSequenceResults[ClassificationMetrics] = TaskSequenceResults(
            task_results=[TaskResults() for step in self.task_steps]
        )
        # self._reset = False
        # NOTE: The task schedule is already in terms of the number of batches.
        self.boundary_steps = [step for step in self.task_schedule.keys()]

    def get_results(self) -> IncrementalSLResults:
        return self.results

    def reset(self):
        return super().reset()
        # if not self._reset:
        #     logger.debug("Initial reset.")
        #     self._reset = True
        #     return super().reset()
        # else:
        #     logger.debug("Resetting the env closes it.")
        #     self.close()
        #     return None

    def _before_step(self, action):
        self._action = action
        return super()._before_step(action)

    def _after_step(self, observation, reward, done, info):
        if not isinstance(reward, BaseRewards):
            reward = BaseRewards(y=torch.as_tensor(reward))

        batch_size = reward.batch_size

        action = self._action
        assert action is not None

        if isinstance(self.action_space, (spaces.MultiDiscrete, spaces.MultiBinary)):
            n_classes = self.action_space.nvec[0]
            from sequoia.settings.assumptions.task_type import ClassificationActions

            if not isinstance(action, ClassificationActions):
                if isinstance(action, Actions):
                    y_pred = action.y_pred
                    # 'upgrade', creating some fake logits.
                else:
                    y_pred = torch.as_tensor(action)
                fake_logits = F.one_hot(y_pred, n_classes)
                action = ClassificationActions(y_pred=y_pred, logits=fake_logits)
        else:
            raise NotImplementedError(
                f"TODO: Remove the assumption here that the env is a classification env "
                f"({self.action_space}, {self.reward_space})"
            )

        if action.batch_size != reward.batch_size:
            warnings.warn(
                RuntimeWarning(
                    f"Truncating the action since its batch size {action.batch_size} "
                    f"is larger than the rewards': ({reward.batch_size})"
                )
            )
            action = action[:, : reward.batch_size]

        # TODO: Use some kind of generic `get_metrics(actions: Actions, rewards: Rewards)`
        # function instead.
        y = reward.y
        logits = action.logits
        y_pred = action.y_pred
        metric = ClassificationMetrics(y=y, logits=logits, y_pred=y_pred)
        reward = metric.accuracy

        task_steps = sorted(self.task_schedule.keys())
        assert 0 in task_steps, task_steps

        nb_tasks = len(task_steps)
        assert nb_tasks >= 1

        # Given the step, find the task id.
        task_id = bisect.bisect_right(task_steps, self._steps) - 1
        self.results.task_results[task_id].metrics.append(metric)

        self._steps += 1

        # FIXME: Temporary fix: TODO: Make sure this doesn't truncate the number of labels
        if self._steps == self.step_limit - 1:
            self.close()
            done = True

        # Debugging issue with Monitor class:
        # return super()._after_step(observation, reward, done, info)
        if not self.enabled:
            return done

        if done and self.env_semantics_autoreset:
            # For envs with BlockingReset wrapping VNCEnv, this observation will be the
            # first one of the new episode
            if self.config.render:
                self.reset_video_recorder()
            self.episode_id += 1
            self._flush()

        # Record stats: (TODO: accuracy serves as the 'reward'!)
        reward_for_stats = metric.accuracy
        self.stats_recorder.after_step(observation, reward_for_stats, done, info)

        # Record video
        if self.config and self.config.render:
            self.video_recorder.capture_frame()
        return done

    def _after_reset(self, observation: Observations):
        image_batch = observation.numpy().x
        # Need to create a single image with the right dtype for the Monitor
        # from gym to create gifs / videos with it.
        if self.batch_size:
            # Need to tile the image batch so it can be seen as a single image
            # by the Monitor.
            image_batch = tile_images(image_batch)

        image_batch = Transforms.channels_last_if_needed(image_batch)
        if image_batch.dtype == np.float32:
            assert (0 <= image_batch).all() and (image_batch <= 1).all()
            image_batch = (256 * image_batch).astype(np.uint8)

        assert image_batch.dtype == np.uint8
        # Debugging this issue here:
        # super()._after_reset(image_batch)

        # -- Code from Monitor
        if not self.enabled:
            return
        # Reset the stat count
        self.stats_recorder.after_reset(observation)
        if self.config.render:
            self.reset_video_recorder()

        # Bump *after* all reset activity has finished
        self.episode_id += 1

        self._flush()
        # --

    def render(self, mode="human", **kwargs):
        # NOTE: This doesn't get called, because the video recorder uses
        # self.env.render(), rather than self.render()
        # TODO: Render when the 'render' argument in config is set to True.
        image_batch = super().render(mode=mode, **kwargs)
        if mode == "rgb_array" and self.batch_size:
            image_batch = tile_images(image_batch)
        return image_batch


================================================
FILE: sequoia/settings/sl/incremental/environment_test.py
================================================
from functools import partial
from typing import ClassVar, Type

from sequoia.common.metrics import ClassificationMetrics
from sequoia.settings.assumptions.discrete_results import TaskSequenceResults

from ..continual.environment_test import (
    TestContinualSLTestEnvironment as ContinualSLTestEnvironmentTests,
)
from .environment import IncrementalSLEnvironment, IncrementalSLTestEnvironment


class TestIncrementalSLTestEnvironment(ContinualSLTestEnvironmentTests):
    Environment: ClassVar[Type[Environment]] = IncrementalSLEnvironment
    TestEnvironment: ClassVar[Type[TestEnvironment]] = partial(
        IncrementalSLTestEnvironment, task_schedule={i * 20: {} for i in range(5)}
    )

    def validate_results(self, results: TaskSequenceResults):
        # NOTE: We're not checking that the results here represent the entire transfer
        # matrix, because the test env is only used for one test loop.
        # The Setting creates the transfer matrix using multiple of these
        # `TaskSequenceResults` objects, each of which is obtained after training on
        # a task in the training loop.
        assert isinstance(results, TaskSequenceResults)
        assert isinstance(results.average_metrics, ClassificationMetrics)
        assert results.objective > 0
        # TODO: Fix this check:
        assert results.average_metrics.n_samples in [95, 100]


================================================
FILE: sequoia/settings/sl/incremental/objects.py
================================================
""" Observations/Actions/Rewards particular to an IncrementalSLSetting. 

This is just meant as a cleaner way to import the Observations/Actions/Rewards.
"""
from dataclasses import dataclass
from typing import Optional, TypeVar

from torch import Tensor

from sequoia.settings.sl.discrete.setting import DiscreteTaskAgnosticSLSetting

# from sequoia.settings.sl.continual.objects import Observations, Actions, Rewards
# from sequoia.settings.assumptions.context_visibility


@dataclass(frozen=True)
class IncrementalSLObservations(DiscreteTaskAgnosticSLSetting.Observations):
    """Incremental Observations, in a supervised context."""

    x: Tensor
    task_labels: Optional[Tensor] = None


@dataclass(frozen=True)
class IncrementalSLActions(DiscreteTaskAgnosticSLSetting.Actions):
    """Incremental Actions, in a supervised (passive) context."""


@dataclass(frozen=True)
class IncrementalSLRewards(DiscreteTaskAgnosticSLSetting.Rewards):
    """Incremental Rewards, in a supervised context."""


Observations = IncrementalSLObservations
Actions = IncrementalSLActions
Rewards = IncrementalSLRewards
# Environment = C
# Results = IncrementalSLResults

# ObservationType = TypeVar("ObservationType", bound=Observations)
# ActionType = TypeVar("ActionType", bound=Actions)
# RewardType = TypeVar("RewardType", bound=Rewards)

ObservationType = TypeVar("ObservationType", bound=IncrementalSLObservations)
ActionType = TypeVar("ActionType", bound=IncrementalSLActions)
RewardType = TypeVar("RewardType", bound=IncrementalSLRewards)

# from .environment import IncrementalSLEnvironment
# Environment = IncrementalSLEnvironment


================================================
FILE: sequoia/settings/sl/incremental/results.py
================================================
""" Object representing the "Results" of applying a Method on a Class-Incremental Setting.

This object basically calculates the 'objective' specific to this setting as
well as provide a set of methods for making useful plots and utilities for
logging results to wandb.
"""
from typing import ClassVar

import matplotlib.pyplot as plt

import wandb
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.plotting import autolabel

logger = get_logger(__name__)


class IncrementalSLResults(IncrementalAssumption.Results):
    """Results for a ClassIncrementalSetting.

    The main objective in this setting is the average test accuracy over all
    tasks.

    The plots to generate are:
    - Accuracy per task
    - Average Test Accuray over the course of testing
    - Confusion matrix at the end of testing

    All of these will be created from the list of test metrics (Classification
    metrics for now).

    TODO: Add back Wandb logging somehow, even though we might be doing the
    evaluation loop ourselves.
    TODO: Fix this for the 'incremental regression' case.
    """

    # Higher accuracy => better
    lower_is_better: ClassVar[bool] = False
    objective_name: ClassVar[str] = "Average Accuracy"

    # Minimum runtime considered (in hours).
    # (No extra points are obtained when going faster than this.)
    min_runtime_hours: ClassVar[float] = 5.0 / 60.0  # 5 minutes
    # Maximum runtime allowed (in hours).
    max_runtime_hours: ClassVar[float] = 1.0  # one hour.

    def make_plots(self):
        plots_dict = {}
        if wandb.run:
            # TODO: Add a Histogram plot from wandb?
            pass
        else:
            # TODO: Add back the plots.
            plots_dict["task_metrics"] = self.task_accuracies_plot()
        return plots_dict

    def task_accuracies_plot(self):
        figure: plt.Figure
        axes: plt.Axes
        figure, axes = plt.subplots()
        x = list(range(self.num_tasks))
        y = [metrics.accuracy for metrics in self.final_performance_metrics]
        rects = axes.bar(x, y)
        axes.set_title("Task Accuracy")
        axes.set_xlabel("Task")
        axes.set_ylabel("Accuracy")
        axes.set_ylim(0, 1.0)
        autolabel(axes, rects)
        return figure

    def cumul_metrics_plot(self):
        """TODO: Create a plot that shows the evolution of the test performance over
        all test tasks seen so far.

        (during training or during testing?)
        """
        figure: plt.Figure
        axes: plt.Axes
        figure, axes = plt.subplots()
        x = list(range(self.num_tasks))
        y = []
        metric_name: str = ""
        for i in range(self.num_tasks):
            previous_metrics = self.metrics_matrix[i][: i + 1]
            cumul_metrics = sum(previous_metrics)
            y.append(cumul_metrics.objective)
            if not metric_name:
                metric_name = cumul_metrics.objective_name

        # x = [metrics.n_samples for metrics in cumulative_metrics]
        # y = [metrics.accuracy for metrics in cumulative_metrics]
        axes.plot(x, y)
        axes.set_xlabel("# of learned tasks")
        axes.set_ylabel(f"Average {metric_name} on tasks seen so far")
        return figure

    # def summary(self) -> str:
    #     s = StringIO()
    #     with redirect_stdout(s):
    #         for i, average_task_metrics in enumerate(self[-1].average_metrics_per_task):
    #             print(f"Test Results on task {i}: {average_task_metrics}")
    #         print(f"Average test metrics accross all the test tasks: {self[-1].average_metrics}")
    #     s.seek(0)
    #     return s.read()

    # def to_log_dict(self) -> Dict[str, float]:
    #     results = {}
    #     results[self.objective_name] = self.objective
    #     average_metrics = self[-1].average_metrics

    #     if isinstance(average_metrics, ClassificationMetrics):
    #         results["accuracy/average"] = average_metrics.accuracy
    #     elif isinstance(average_metrics, RegressionMetrics):
    #         results["mse/average"] = average_metrics.mse
    #     else:
    #         results["average metrics"] = average_metrics

    #     for i, average_task_metrics in enumerate(self[-1].average_metrics_per_task):
    #         if isinstance(average_task_metrics, ClassificationMetrics):
    #             results[f"accuracy/task_{i}"] = average_task_metrics.accuracy
    #         elif isinstance(average_task_metrics, RegressionMetrics):
    #             results[f"mse/task_{i}"] = average_task_metrics.mse
    #         else:
    #             results[f"task_{i}"] = average_task_metrics
    #     return results


================================================
FILE: sequoia/settings/sl/incremental/setting.py
================================================
""" Defines a `Setting` subclass for "Class-Incremental" Continual Learning.

Example command to run a method on this setting (in debug mode):
```
python main.py --setting class_incremental --method baseline --debug  \
    --batch_size 128 --max_epochs 1
```

Class-Incremental definition from [iCaRL](https://arxiv.org/abs/1611.07725):

    "Formally, we demand the following three properties of an algorithm to qualify
    as class-incremental:
    i)  it should be trainable from a stream of data in which examples of
        different classes occur at different times
    ii) it should at any time provide a competitive multi-class classifier for
        the classes observed so far,
    iii) its computational requirements and memory footprint should remain
        bounded, or at least grow very slowly, with respect to the number of classes
        seen so far."
"""
import itertools
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Type, Union

from continuum import ClassIncremental
from continuum.datasets import _ContinuumDataset
from continuum.scenarios.base import _BaseScenario
from simple_parsing import choice, field
from torch import Tensor
from torch.utils.data import Dataset

import wandb
from sequoia.common.config import Config
from sequoia.common.gym_wrappers import TransformObservation
from sequoia.settings.assumptions.incremental import IncrementalAssumption, IncrementalResults
from sequoia.settings.base import Method
from sequoia.settings.rl.wrappers import HideTaskLabelsWrapper
from sequoia.settings.sl.continual.wrappers import relabel
from sequoia.settings.sl.environment import Actions, PassiveEnvironment, Rewards
from sequoia.settings.sl.setting import SLSetting
from sequoia.settings.sl.wrappers import MeasureSLPerformanceWrapper
from sequoia.utils import get_logger

from ..discrete.setting import DiscreteTaskAgnosticSLSetting
from .environment import IncrementalSLEnvironment, IncrementalSLTestEnvironment
from .objects import Actions, Observations, Rewards
from .results import IncrementalSLResults

logger = get_logger(__name__)
# # NOTE: This dict reflects the observation space of the different datasets
# # *BEFORE* any transforms are applied. The resulting property on the Setting is
# # based on this 'base' observation space, passed through the transforms.
# # TODO: Make it possible to automatically add tensor support if the dtype passed to a
# # gym space is a `torch.dtype`.
# tensor_space = add_tensor_support


@dataclass
class IncrementalSLSetting(IncrementalAssumption, DiscreteTaskAgnosticSLSetting):
    """Supervised Setting where the data is a sequence of 'tasks'.

    This class is basically is the supervised version of an Incremental Setting


    The current task can be set at the `current_task_id` attribute.
    """

    Results: ClassVar[Type[IncrementalResults]] = IncrementalSLResults

    Observations: ClassVar[Type[Observations]] = Observations
    Actions: ClassVar[Type[Actions]] = Actions
    Rewards: ClassVar[Type[Rewards]] = Rewards

    Environment: ClassVar[Type[SLSetting.Environment]] = IncrementalSLEnvironment[
        Observations, Actions, Rewards
    ]

    Results: ClassVar[Type[IncrementalSLResults]] = IncrementalSLResults

    # Class variable holding a dict of the names and types of all available
    # datasets.
    available_datasets: ClassVar[
        Dict[str, Type[_ContinuumDataset]]
    ] = DiscreteTaskAgnosticSLSetting.available_datasets.copy()

    # A continual dataset to use. (Should be taken from the continuum package).
    dataset: str = choice(available_datasets.keys(), default="mnist")

    # TODO: IDEA: Adding these fields/constructor arguments so that people can pass a
    # custom ready-made `Scenario` from continuum to use (not sure this is a good idea
    # though)
    train_cl_scenario: Optional[_BaseScenario] = field(default=None, cmd=False, to_dict=False)
    test_cl_scenario: Optional[_BaseScenario] = field(default=None, cmd=False, to_dict=False)

    def __post_init__(self):
        """Initializes the fields of the Setting (and LightningDataModule),
        including the transforms, shapes, etc.
        """
        super().__post_init__()

        # TODO: For now we assume a fixed, equal number of classes per task, for
        # sake of simplicity. We could take out this assumption, but it might
        # make things a bit more complicated.
        assert isinstance(self.increment, int)
        assert isinstance(self.test_increment, int)

        self.n_classes_per_task: int = self.increment
        self.test_increment = self.increment

    def apply(self, method: Method, config: Config = None) -> IncrementalSLResults:
        """Apply the given method on this setting to producing some results."""
        # TODO: It still isn't super clear what should be in charge of creating
        # the config, and how to create it, when it isn't passed explicitly.
        self.config = config or self._setup_config(method)
        assert self.config

        method.configure(setting=self)

        # Run the main loop (which is defined in IncrementalAssumption).
        results: IncrementalSLResults = super().main_loop(method)
        logger.info(results.summary())

        method.receive_results(self, results=results)
        return results

    def prepare_data(self, data_dir: Path = None, **kwargs):
        self.config = self.config or Config.from_args(self._argv, strict=False)
        # if self.batch_size is None:
        #     logger.warning(UserWarning(
        #         f"Using the default batch size of 32. (You can set the "
        #         f"batch size by passing a value to the Setting constructor, or "
        #         f"by setting the attribute inside your 'configure' method) "
        #     ))
        #     self.batch_size = 32

        # data_dir = data_dir or self.data_dir or self.config.data_dir
        # self.make_dataset(data_dir, download=True)
        # self.data_dir = data_dir
        return super().prepare_data(data_dir=data_dir, **kwargs)

    def setup(self, stage: str = None):
        super().setup(stage=stage)
        # TODO: Adding this temporarily just for the competition: The TestEnvironment
        # needs access to this information in order to split the metrics for each task.
        self.test_boundary_steps = [0] + list(itertools.accumulate(map(len, self.test_datasets)))[
            :-1
        ]
        self.test_steps = sum(map(len, self.test_datasets))
        # self.test_steps = [0] + list(
        #     itertools.accumulate(map(len, self.test_datasets))
        # )[:-1]

    # def _make_train_dataset(self) -> Dataset:
    #     return self.train_datasets[self.current_task_id]

    # def _make_val_dataset(self) -> Dataset:
    #     return self.val_datasets[self.current_task_id]

    # def _make_test_dataset(self) -> Dataset:
    #     return concat(self.test_datasets)

    def train_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> IncrementalSLEnvironment:
        """Returns a DataLoader for the train dataset of the current task."""
        # NOTE: The implementation for this is in `DiscreteTaskAgnosticSLSetting`:
        # TODO: Fix the inheritance order so that clicking on this super().train_dataloader gets us
        # to the right point in code.
        # train_env = DiscreteTaskAgnosticSLSetting.train_dataloader(
        #     self, batch_size=batch_size, num_workers=num_workers
        # )
        train_env = super().train_dataloader(batch_size=batch_size, num_workers=num_workers)
        # Overwrite the wandb prefix for the `MeasureSLPerformanceWrapper` to include
        # the task id.
        if self.monitor_training_performance:
            # Overwrite the 'wandb prefix'
            assert isinstance(train_env, MeasureSLPerformanceWrapper)
            train_env.wandb_prefix = f"Train/Task {self.current_task_id}"
        self.train_env = train_env
        return self.train_env

    def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> PassiveEnvironment:
        """Returns a DataLoader for the validation dataset of the current task."""
        val_env = super().val_dataloader(batch_size=batch_size, num_workers=num_workers)
        return self.val_env

    def test_dataloader(
        self, batch_size: int = None, num_workers: int = None
    ) -> PassiveEnvironment["ClassIncrementalSetting.Observations", Actions, Rewards]:
        """Returns a DataLoader for the test dataset of the current task."""
        if not self.has_prepared_data:
            self.prepare_data()
        if not self.has_setup_test:
            self.setup("test")

        # Join all the test datasets.
        dataset = self._make_test_dataset()

        batch_size = batch_size if batch_size is not None else self.batch_size
        num_workers = num_workers if num_workers is not None else self.num_workers

        env = self.Environment(
            dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            hide_task_labels=(not self.task_labels_at_test_time),
            observation_space=self.observation_space,
            action_space=self.action_space,
            reward_space=self.reward_space,
            Observations=self.Observations,
            Actions=self.Actions,
            Rewards=self.Rewards,
            pretend_to_be_active=True,
            shuffle=False,
            drop_last=self.drop_last,
        )

        # NOTE: The transforms from `self.transforms` (the 'base' transforms) were
        # already added when creating the datasets and the CL scenario.
        test_transforms = self.transforms + self.test_transforms
        if test_transforms:
            env = TransformObservation(env, f=test_transforms)

        if self.config.device:
            # TODO: Put this before or after the image transforms?
            from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors

            env = ConvertToFromTensors(env, device=self.config.device)

        # TODO: Remove this, I don't think it's used anymore, since `hide_task_labels`
        # is an argument to self.Environment now.
        if not self.task_labels_at_test_time:
            env = HideTaskLabelsWrapper(env)

        # TODO: Remove this once that stuff with the 'fake' task schedule is fixed below,
        # base it on the equivalent in ContinualSLSetting instead (which should actually
        # be moved into DiscreteTaskAgnosticSL, now that I think about it!)

        # Testing this out, we're gonna have a "test schedule" like this to try
        # to imitate the MultiTaskEnvironment in RL.
        transition_steps = [0] + list(itertools.accumulate(map(len, self.test_datasets)))[:-1]
        # FIXME: Creating a 'task schedule' for the TestEnvironment, mimicing what's in
        # the RL settings.
        test_task_schedule = dict.fromkeys(
            [step // (env.batch_size or 1) for step in transition_steps],
            range(len(transition_steps)),
        )
        # TODO: Configure the 'monitoring' dir properly.
        if wandb.run:
            test_dir = wandb.run.dir
        else:
            test_dir = self.config.log_dir

        test_loop_max_steps = len(dataset) // (env.batch_size or 1)
        # TODO: Fix this: iteration doesn't ever end for some reason.

        test_env = IncrementalSLTestEnvironment(
            env,
            directory=test_dir,
            step_limit=test_loop_max_steps,
            task_schedule=test_task_schedule,
            force=True,
            config=self.config,
            video_callable=None if (wandb.run or self.config.render) else False,
        )

        if self.test_env:
            self.test_env.close()
        self.test_env = test_env
        return self.test_env

    def split_batch_function(
        self, training: bool
    ) -> Callable[[Tuple[Tensor, ...]], Tuple[Observations, Rewards]]:
        """Returns a callable that is used to split a batch into observations and rewards."""
        assert False, "TODO: Removing this."
        task_classes = {i: self.task_classes(i, train=training) for i in range(self.nb_tasks)}

        def split_batch(batch: Tuple[Tensor, ...]) -> Tuple[Observations, Rewards]:
            """Splits the batch into a tuple of Observations and Rewards.

            Parameters
            ----------
            batch : Tuple[Tensor, ...]
                A batch of data coming from the dataset.

            Returns
            -------
            Tuple[Observations, Rewards]
                A tuple of Observations and Rewards.
            """
            # In this context (class_incremental), we will always have 3 items per
            # batch, because we use the ClassIncremental scenario from Continuum.
            assert len(batch) == 3
            x, y, t = batch

            # Relabel y so it is always in [0, n_classes_per_task) for each task.
            if self.shared_action_space:
                y = relabel(y, task_classes)

            if (training and not self.task_labels_at_train_time) or (
                not training and not self.task_labels_at_test_time
            ):
                # Remove the task labels if we're not currently allowed to have
                # them.
                # TODO: Using None might cause some issues. Maybe set -1 instead?
                t = None

            observations = self.Observations(x=x, task_labels=t)
            rewards = self.Rewards(y=y)

            return observations, rewards

        return split_batch

    def make_train_cl_scenario(self, train_dataset: _ContinuumDataset) -> _BaseScenario:
        """Creates a train ClassIncremental object from continuum."""
        return ClassIncremental(
            train_dataset,
            nb_tasks=self.nb_tasks,
            increment=self.increment,
            initial_increment=self.initial_increment,
            class_order=self.class_order,
            transformations=self.transforms,
        )

    def make_test_cl_scenario(self, test_dataset: _ContinuumDataset) -> _BaseScenario:
        """Creates a test ClassIncremental object from continuum."""
        return ClassIncremental(
            test_dataset,
            nb_tasks=self.nb_tasks,
            increment=self.test_increment,
            initial_increment=self.test_initial_increment,
            class_order=self.test_class_order,
            transformations=self.transforms,
        )

    def make_dataset(
        self, data_dir: Path, download: bool = True, train: bool = True, **kwargs
    ) -> _ContinuumDataset:
        # TODO: #7 Use this method here to fix the errors that happen when
        # trying to create every single dataset from continuum.
        data_dir = Path(data_dir)

        if not data_dir.exists():
            data_dir.mkdir(parents=True, exist_ok=True)

        if self.dataset in self.available_datasets:
            dataset_class = self.available_datasets[self.dataset]
            return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)

        elif self.dataset in self.available_datasets.values():
            dataset_class = self.dataset
            return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)

        elif isinstance(self.dataset, Dataset):
            logger.info(f"Using a custom dataset {self.dataset}")
            return self.dataset

        else:
            raise NotImplementedError(self.dataset)

    # These methods below are used by the MultiHeadModel, mostly when
    # using a multihead model, to figure out how to relabel the batches, or how
    # many classes there are in the current task (since we support a different
    # number of classes per task).
    # TODO: Remove this? Since I'm simplifying to a fixed number of classes per
    # task for now...

    def num_classes_in_task(self, task_id: int, train: bool) -> Union[int, List[int]]:
        """Returns the number of classes in the given task."""
        increment = self.increment if train else self.test_increment
        if isinstance(increment, list):
            return increment[task_id]
        return increment

    def num_classes_in_current_task(self, train: bool = None) -> int:
        """Returns the number of classes in the current task."""
        # TODO: Its ugly to have the 'method' tell us if we're currently in
        # train/eval/test, no? Maybe just make a method for each?
        return self.num_classes_in_task(self._current_task_id, train=train)

    def task_classes(self, task_id: int, train: bool) -> List[int]:
        """Gives back the 'true' labels present in the given task."""
        start_index = sum(self.num_classes_in_task(i, train) for i in range(task_id))
        end_index = start_index + self.num_classes_in_task(task_id, train)
        if train:
            return self.class_order[start_index:end_index]
        # Set the same ordering as during training, by default.
        self.test_class_order = self.test_class_order or self.class_order
        return self.test_class_order[start_index:end_index]

    def current_task_classes(self, train: bool) -> List[int]:
        """Gives back the labels present in the current task."""
        return self.task_classes(self._current_task_id, train)

    def _check_environments(self):
        """Do a quick check to make sure that the dataloaders give back the
        right observations / reward types.
        """
        for loader_method in [
            self.train_dataloader,
            self.val_dataloader,
            self.test_dataloader,
        ]:
            logger.debug(f"Checking loader method {loader_method.__name__}")
            env = loader_method(batch_size=5)
            obs = env.reset()
            assert isinstance(obs, self.Observations)
            # Convert the observation to numpy arrays, to make it easier to
            # check if the elements are in the spaces.
            obs = obs.numpy()
            # take a slice of the first batch, to get sample tensors.
            first_obs = obs[:, 0]
            # TODO: Here we'd like to be able to check that the first observation
            # is inside the observation space, but we can't do that because the
            # task label might be None, and so that would make it fail.
            x, task_label = first_obs
            if task_label is None:
                assert x in self.observation_space["x"]

            for i in range(5):
                actions = env.action_space.sample()
                observations, rewards, done, info = env.step(actions)
                assert isinstance(observations, self.Observations), type(observations)
                assert isinstance(rewards, self.Rewards), type(rewards)
                actions = env.action_space.sample()
                if done:
                    observations = env.reset()
            env.close()


# def relabel(y: Tensor, task_classes: Dict[int, List[int]]) -> Tensor:
#     """ Relabel the elements of 'y' to their  index in the list of classes for
#     their task.

#     Example:

#     >>> import torch
#     >>> y = torch.as_tensor([2, 3, 2, 3, 2, 2])
#     >>> task_classes = {0: [0, 1], 1: [2, 3]}
#     >>> relabel(y, task_classes)
#     tensor([0, 1, 0, 1, 0, 0])
#     """
#     # TODO: Double-check that this never leaves any zeros where it shouldn't.
#     new_y = torch.zeros_like(y)
#     # assert unique_y <= set(task_classes), (unique_y, task_classes)
#     for task_id, task_true_classes in task_classes.items():
#         for i, label in enumerate(task_true_classes):
#             new_y[y == label] = i
#     return new_y


# This is just meant as a cleaner way to import the Observations/Actions/Rewards
# than particular setting.
Observations = IncrementalSLSetting.Observations
Actions = IncrementalSLSetting.Actions
Rewards = IncrementalSLSetting.Rewards

# TODO: I wouldn't want these above to overwrite / interfere with the import of
# the "base" versions of these objects from sequoia.settings.bases.objects, which are
# imported in settings/__init__.py. Will have to check that doing
# `from .passive import *` over there doesn't actually import these here.


if __name__ == "__main__":
    import doctest

    doctest.testmod()


================================================
FILE: sequoia/settings/sl/incremental/setting_test.py
================================================
from typing import Any, ClassVar, Dict, Type

import pytest
from continuum import ClassIncremental
from gym import spaces
from gym.spaces import Discrete, Space

from sequoia.common.config import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.spaces import Sparse
from sequoia.common.spaces.typed_dict import TypedDictSpace
from sequoia.conftest import skip_param, xfail_param, requires_pyglet
from sequoia.settings.sl.continual.envs import get_action_space

from ..discrete.setting_test import (
    TestDiscreteTaskAgnosticSLSetting as DiscreteTaskAgnosticSLSettingTests,
)
from .setting import IncrementalSLSetting
from .setting import IncrementalSLSetting as ClassIncrementalSetting


class TestIncrementalSLSetting(DiscreteTaskAgnosticSLSettingTests):
    Setting: ClassVar[Type[IncrementalSLSetting]] = IncrementalSLSetting
    fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
        dataset="mnist",
        batch_size=64,
    )

    def assert_chance_level(
        self, setting: IncrementalSLSetting, results: IncrementalSLSetting.Results
    ):
        assert isinstance(setting, ClassIncrementalSetting), setting
        assert isinstance(results, ClassIncrementalSetting.Results), results
        # TODO: Remove this assertion:
        assert isinstance(setting.action_space, spaces.Discrete)
        # TODO: This test so far needs the 'N' to be the number of classes in total,
        # not the number of classes per task.
        # num_classes = setting.action_space.n  # <-- Should be using this instead.
        if setting._using_custom_envs_foreach_task:
            num_classes = get_action_space(setting.train_datasets[0]).n
        else:
            num_classes = get_action_space(setting.dataset).n

        average_accuracy = results.objective
        # Calculate the expected 'average' chance accuracy.
        # We assume that there is an equal number of classes in each task.
        # chance_accuracy = 1 / setting.n_classes_per_task
        chance_accuracy = 1 / num_classes

        assert 0.5 * chance_accuracy <= average_accuracy <= 1.5 * chance_accuracy

        for i, metric in enumerate(results.final_performance_metrics):
            assert isinstance(metric, ClassificationMetrics)
            # TODO: Same as above: Should be using `n_classes_per_task` or something
            # like it instead.
            chance_accuracy = 1 / setting.n_classes_per_task
            chance_accuracy = 1 / num_classes

            task_accuracy = metric.accuracy
            # FIXME: Look into this, we're often getting results substantially
            # worse than chance, and to 'make the tests pass' (which is bad)
            # we're setting the lower bound super low, which makes no sense.
            assert 0.25 * chance_accuracy <= task_accuracy <= 2.1 * chance_accuracy

    # TODO: Add a fixture that specifies a data folder common to all tests.
    @pytest.mark.parametrize(
        "dataset_name",
        [
            "mnist",
            # "synbols",
            skip_param("synbols", reason="Causes tests to hang for some reason?"),
            "cifar10",
            "cifar100",
            "fashionmnist",
            "kmnist",
            xfail_param("emnist", reason="Bug in emnist, requires split positional arg?"),
            xfail_param("qmnist", reason="Bug in qmnist, 229421 not in list"),
            "mnistfellowship",
            "cifar10",
            "cifarfellowship",
        ],
    )
    @pytest.mark.timeout(60)
    def test_observation_spaces_match_dataset(self, dataset_name: str):
        """Test to check that the `observation_spaces` and `reward_spaces` dict
        really correspond to the entries of the corresponding datasets, before we do
        anything with them.
        """
        # CIFARFellowship, MNISTFellowship, ImageNet100,
        # ImageNet1000, CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST,
        # QMNIST, FashionMNIST,
        dataset_class = self.Setting.available_datasets[dataset_name]
        dataset = dataset_class("data")

        observation_space = self.Setting.base_observation_spaces[dataset_name]
        reward_space = self.Setting.base_reward_spaces[dataset_name]
        for task_dataset in ClassIncremental(dataset, nb_tasks=1):
            first_item = task_dataset[0]
            x, t, y = first_item
            assert x.shape == observation_space.shape
            assert x in observation_space, (x.min(), x.max(), observation_space)
            assert y in reward_space

    @pytest.mark.parametrize("dataset_name", ["mnist"])
    @pytest.mark.parametrize("nb_tasks", [2, 5])
    def test_task_label_space(self, dataset_name: str, nb_tasks: int):
        nb_tasks = 2
        setting = ClassIncrementalSetting(
            dataset=dataset_name,
            nb_tasks=nb_tasks,
        )
        task_label_space: Space = setting.observation_space.task_labels
        # TODO: Should the task label space be Sparse[Discrete]? or Discrete?
        assert task_label_space == Discrete(nb_tasks)

    @pytest.mark.parametrize("dataset_name", ["mnist"])
    def test_setting_obs_space_changes_when_transforms_change(self, dataset_name: str):
        """TODO: Test that the `observation_space` property on the
        ClassIncrementalSetting reflects the data produced by the dataloaders, and
        that changing a transform on a Setting also changes the value of that
        property on both the Setting itself, as well as on the corresponding
        dataloaders/environments.
        """
        import torch

        # dataset = ClassIncrementalSetting.available_datasets[dataset_name]
        setting = self.Setting(
            dataset=dataset_name,
            nb_tasks=1,
            transforms=[],
            train_transforms=[],
            val_transforms=[],
            test_transforms=[],
            batch_size=None,
            num_workers=0,
            config=Config(device=torch.device("cpu")),
        )
        base_x_space = type(setting).base_observation_spaces[dataset_name]
        assert setting.observation_space.x == base_x_space
        # TODO: Should the 'transforms' apply to ALL the environments, and the
        # train/valid/test transforms apply only to those envs?
        from sequoia.common.transforms import Transforms

        from sequoia.common.transforms import Compose

        transforms = Compose(
            [
                Transforms.to_tensor,
                Transforms.three_channels,
                Transforms.channels_first_if_needed,
                Transforms.resize_32x32,
            ]
        )
        setting.transforms = transforms
        expected_x_space = transforms(base_x_space)
        # Check the the `x` property of the setting's observation space has also been transformed:
        assert setting.observation_space.x == expected_x_space

        # When there are no transforms in setting.train_tansforms, the observation
        # space of the Setting and of the train dataloader are the same:
        train_env = setting.train_dataloader(batch_size=None, num_workers=None)
        assert not setting.train_transforms
        assert train_env.observation_space == setting.observation_space

        reset_obs = train_env.reset()
        assert reset_obs["x"] in train_env.observation_space["x"], reset_obs[0].shape
        assert reset_obs["task_labels"] in train_env.observation_space["task_labels"]
        assert reset_obs in train_env.observation_space
        assert reset_obs in setting.observation_space
        assert isinstance(reset_obs, ClassIncrementalSetting.Observations)

        # When we add a transform to `setting.train_transforms` the observation
        # space of the Setting and of the train dataloader are different:
        # NOTE: Transforms should act as the 'base', and train_transforms gets added to it.
        setting.train_transforms = [Transforms.resize_64x64]

        train_env = setting.train_dataloader(batch_size=None)
        assert train_env.f == setting.transforms + setting.train_transforms

        assert train_env.observation_space.x.shape == (3, 64, 64)
        assert train_env.reset() in train_env.observation_space

        # The Setting's property didn't change:
        assert setting.observation_space.x.shape == (3, 32, 32)
        #
        #  ---------- Same tests for the val_environment --------------
        #
        val_env = setting.val_dataloader(batch_size=None)
        assert val_env.observation_space == setting.observation_space
        assert val_env.reset() in val_env.observation_space

        # When we add a transform to `setting.val_transforms` the observation
        # space of the Setting and of the val dataloader are different:
        setting.val_transforms = [Transforms.resize_64x64]
        val_env = setting.val_dataloader(batch_size=None)
        assert val_env.observation_space != setting.observation_space
        assert val_env.observation_space.x.shape == (3, 64, 64)
        assert val_env.reset() in val_env.observation_space
        #
        #  ---------- Same tests for the test_environment --------------
        #

        with setting.test_dataloader(batch_size=None) as test_env:
            if setting.task_labels_at_test_time:
                assert test_env.observation_space == setting.observation_space
            else:
                assert isinstance(test_env.observation_space["task_labels"], Sparse)
            obs = test_env.reset()
            assert obs in test_env.observation_space

        setting.test_transforms = [Transforms.resize_64x64]
        with setting.test_dataloader(batch_size=None) as test_env:
            # When we add a transform to `setting.test_transforms` the observation
            # space of the Setting and of the test dataloader are different:
            assert test_env.observation_space != setting.observation_space
            assert test_env.observation_space.x.shape == (3, 64, 64)
            assert test_env.reset() in test_env.observation_space


# TODO: This renders, even when we're using the pytest-xvfb plugin, which might
# mean that it's actually creating a Display somewhere?
@pytest.mark.timeout(30)
@requires_pyglet
def test_render(config: Config):
    setting = ClassIncrementalSetting(dataset="mnist", config=config)
    import matplotlib.pyplot as plt

    plt.ion()
    for task_id in range(setting.nb_tasks):
        setting.current_task_id = task_id
        env = setting.train_dataloader(batch_size=16, num_workers=0)
        obs = env.reset()
        done = False
        while not done:
            obs, rewards, done, info = env.step(env.action_space.sample())
            env.render("human")
            # break
        env.close()


def test_class_incremental_random_baseline():
    pass


================================================
FILE: sequoia/settings/sl/incremental/unused_batch_transforms.py
================================================
from dataclasses import dataclass, replace
from functools import partial
from typing import Callable, List, Tuple, Union

import gym
import torch
from gym.wrappers import TransformReward
from simple_parsing import list_field
from torch import Tensor

from sequoia.settings import Observations, Rewards


def relabel(y: Tensor, task_classes: List[int]) -> Tensor:
    new_y = torch.zeros_like(y)
    for i, label in enumerate(task_classes):
        new_y[y == label] = i
    return new_y


class RelabelWrapper(TransformReward):
    def __init__(self, env: gym.Env, task_classes: List[int]):
        self.task_classes = task_classes
        super().__init__(env=env, f=partial(relabel, task_classes=self.task_classes))


@dataclass
class RelabelTransform(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]):
    """Transform that puts labels back into the [0, n_classes_per_task] range.

    For instance, if it's given a bunch of images that have labels [2, 3, 2]
    and the `task_classes = [2, 3]`, then the new labels will be
    `[0, 1, 0]`.

    Note that the order in `task_classes` is perserved. For instance, in the
    above example, if `task_classes = [3, 2]`, then the new labels would be
    `[1, 0, 1]`.

    IMPORTANT: This transform needs to be applied BEFORE ReorderTensor or
    SplitBatch, because it expects the batch to be (x, y, t) order
    """

    task_classes: List[int] = list_field()

    def __call__(self, batch: Tuple[Tensor, ...]):
        assert isinstance(batch, (list, tuple)), batch
        if len(batch) == 2:
            observations, rewards = batch
        if len(batch) == 1:
            return batch
        x, y, *task_labels = batch

        # if y.max() == len(self.task_classes):
        #     # No need to relabel this batch.
        #     # @lebrice: Can we really skip relabeling in this case?
        #     return batch

        new_y = relabel(y, task_classes=self.task_classes)
        return (x, new_y, *task_labels)


@dataclass
class ReorderTensors(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]):
    # reorder tensors in the batch so the task labels go into the observations:
    # (x, y, t) -> (x, t, y)
    # TODO: Change this to:
    # (x, y, t) -> ((x, t), y) maybe?
    def __call__(self, batch: Tuple[Tensor, ...]):
        assert isinstance(batch, (list, tuple))
        if len(batch) == 2:
            observations, rewards = batch
            if isinstance(observations, Observations) and isinstance(rewards, Rewards):
                return batch
        elif len(batch) == 3:
            x, y, *extra_labels = batch
            if len(extra_labels) == 1:
                task_labels = extra_labels[0]
                return (x, task_labels, y)
        assert False, batch


@dataclass
class DropTaskLabels(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]):
    def __call__(self, batch: Union[Tuple[Tensor, ...], Observations]):
        assert isinstance(batch, (tuple, list))
        if len(batch) == 2:
            observations, rewards = batch
            if isinstance(observations, Observations) and isinstance(rewards, Rewards):
                return replace(observations, task_labels=None), rewards
        elif len(batch) == 3:
            # This is tricky. If we're placed BEFORE the 'ReorderTensors',
            # then the ordering is `x, y, t`, while if we're AFTER, the
            # ordering would then be 'x, t, y'..
            x, v1, v2 = batch
            # IDEA: For now, we assume that the 'y' is a lot more erratic than
            # the task label. Therefore, the number of unique consecutive should
            # be greater for `y` than for `t`.
            u1 = len(v1.unique_consecutive())
            u2 = len(v2.unique_consecutive())
            if u1 > u2:
                y, t = v1, v2
            elif u1 == u2:
                # hmmm wtf?
                assert False, (v1, v2, u1, u2)
            else:
                y, t = v2, v1
            return x, y, t
        assert False, f"There are no task labels to drop: {batch}"


================================================
FILE: sequoia/settings/sl/multi_task/__init__.py
================================================
from .setting import MultiTaskSLSetting

Observations = MultiTaskSLSetting.Observations
Actions = MultiTaskSLSetting.Actions
Rewards = MultiTaskSLSetting.Rewards
# TODO?
# Environment = MultiTaskSetting.Environment


================================================
FILE: sequoia/settings/sl/multi_task/setting.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Type

from sequoia.settings.sl.task_incremental import TaskIncrementalSLSetting
from sequoia.utils import get_logger

# TODO: Playing around with this 'constant_property' idea as an alternative to the
# init=False of `constant` field.
from sequoia.utils.utils import constant_property

from ..task_incremental.setting import TaskIncrementalSLSetting
from ..traditional.setting import TraditionalSLSetting

logger = get_logger(__name__)


@dataclass
class MultiTaskSLSetting(TaskIncrementalSLSetting, TraditionalSLSetting):
    """IID version of the Task-Incremental Setting, where the data is shuffled.

    Can be used to estimate the upper bound performance of Task-Incremental CL Methods.
    """

    Results: ClassVar[Type[Results]] = TraditionalSLSetting.Results

    stationary_context: bool = constant_property(True)

    def __post_init__(self):
        super().__post_init__()
        # We reuse the training loop from Incremental, by modifying it so it
        # discriminates between "phases" and "tasks".

    @property
    def phases(self) -> int:
        return 1

    # def _make_train_dataset(self) -> Dataset:
    #     """ Returns the training dataset, which in this case will be shuffled.

    #     IDEA: We could probably do it the same way in both RL and SL:
    #     1. Create the 'datasets' for all the tasks;
    #     2. "concatenate"+"Shuffle" the "datasets":
    #         - in SL: ConcatDataset / shuffle the datasets
    #         - in RL: Create a true `MultiTaskEnvironment` that accepts a list of envs as
    #           an input and alternates between environments at each episode.
    #           (either round-robin style, or randomly)

    #     Returns
    #     -------
    #     Dataset
    #     """
    #     joined_dataset = concat(self.train_datasets)
    #     return shuffle(joined_dataset, seed=self.config.seed)

    # def _make_val_dataset(self) -> Dataset:
    #     joined_dataset = concat(self.val_datasets)
    #     return shuffle(joined_dataset, seed=self.config.seed)

    # def _make_test_dataset(self) -> Dataset:
    #     return concat(self.test_datasets)

    # def train_dataloader(
    #     self, batch_size: int = None, num_workers: int = None
    # ) -> PassiveEnvironment:
    #     """Returns a DataLoader for the training dataset.

    #     This dataloader will yield batches which will very likely contain data from
    #     multiple different tasks, and will contain task labels.

    #     Parameters
    #     ----------
    #     batch_size : int, optional
    #         Batch size to use. Defaults to None, in which case the value of
    #         `self.batch_size` is used.
    #     num_workers : int, optional
    #         Number of workers to use. Defaults to None, in which case the value of
    #         `self.num_workers` is used.

    #     Returns
    #     -------
    #     PassiveEnvironment
    #         A "Passive" Dataloader/gym.Env.
    #     """
    #     return super().train_dataloader(batch_size=batch_size, num_workers=num_workers)

    # def val_dataloader(
    #     self, batch_size: int = None, num_workers: int = None
    # ) -> PassiveEnvironment:
    #     """Returns a DataLoader for the validation dataset.

    #     This dataloader will yield batches which will very likely contain data from
    #     multiple different tasks, and will contain task labels.

    #     Parameters
    #     ----------
    #     batch_size : int, optional
    #         Batch size to use. Defaults to None, in which case the value of
    #         `self.batch_size` is used.
    #     num_workers : int, optional
    #         Number of workers to use. Defaults to None, in which case the value of
    #         `self.num_workers` is used.

    #     Returns
    #     -------
    #     PassiveEnvironment
    #         A "Passive" Dataloader/gym.Env.
    #     """
    #     return super().val_dataloader(batch_size=batch_size, num_workers=num_workers)

    # def test_dataloader(
    #     self, batch_size: int = None, num_workers: int = None
    # ) -> PassiveEnvironment:
    #     """Returns a DataLoader for the test dataset.

    #     This dataloader will yield batches which will very likely contain data from
    #     multiple different tasks, and will contain task labels.

    #     Unlike the train and validation environments, the test environment will not
    #     yield rewards until the action has been sent to it using either `send` (when
    #     iterating in the DataLoader-style) or `step` (when interacting with the
    #     environment in the gym.Env style). For more info, take a look at the
    #     `PassiveEnvironment` class.

    #     Parameters
    #     ----------
    #     batch_size : int, optional
    #         Batch size to use. Defaults to None, in which case the value of
    #         `self.batch_size` is used.
    #     num_workers : int, optional
    #         Number of workers to use. Defaults to None, in which case the value of
    #         `self.num_workers` is used.

    #     Returns
    #     -------
    #     PassiveEnvironment
    #         A "Passive" Dataloader/gym.Env.
    #     """
    #     return super().test_dataloader(batch_size=batch_size, num_workers=num_workers)

    # def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
    #     """ Runs a multi-task test loop and returns the Results.
    #     """
    #     return super().test_loop(method)
    # # TODO:
    # test_env = self.test_dataloader()
    # try:
    #     # If the Method has `test` defined, use it.
    #     method.test(test_env)
    #     test_env.close()
    #     # Get the metrics from the test environment
    #     test_results: Results = test_env.get_results()
    #     print(f"Test results: {test_results}")
    #     return test_results

    # except NotImplementedError:
    #     logger.info(
    #         f"Will query the method for actions at each step, "
    #         f"since it doesn't implement a `test` method."
    #     )

    # obs = test_env.reset()

    # # TODO: Do we always have a maximum number of steps? or of episodes?
    # # Will it work the same for Supervised and Reinforcement learning?
    # max_steps: int = getattr(test_env, "step_limit", None)

    # # Reset on the last step is causing trouble, since the env is closed.
    # pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
    # episode = 0
    # for step in pbar:
    #     if test_env.is_closed():
    #         logger.debug(f"Env is closed")
    #         break
    #     # logger.debug(f"At step {step}")
    #     action = method.get_actions(obs, test_env.action_space)

    #     # logger.debug(f"action: {action}")
    #     # TODO: Remove this:
    #     if isinstance(action, Actions):
    #         action = action.y_pred
    #     if isinstance(action, Tensor):
    #         action = action.cpu().numpy()

    #     obs, reward, done, info = test_env.step(action)

    #     if done and not test_env.is_closed():
    #         # logger.debug(f"end of test episode {episode}")
    #         obs = test_env.reset()
    #         episode += 1

    # test_env.close()
    # test_results = test_env.get_results()

    # return test_results


================================================
FILE: sequoia/settings/sl/multi_task/setting_test.py
================================================
"""
TODO: Tests for the multi-task SL setting.

- Has only one train/test 'phase'
    - The nb_tasks attribute should still reflect the number of tasks.
- on_task_switch should never be called during training
- (not so sure during testing)
- Task labels should be available for both training and testing.
- Classes shouldn't be relabeled.

"""
import dataclasses
import itertools

import numpy as np
import pytest
import torch
from gym.spaces import Discrete

from sequoia.common.spaces import Image, TypedDictSpace
from sequoia.settings import Actions, Environment

from .setting import MultiTaskSLSetting


def check_is_multitask_env(env: Environment, has_rewards: bool):
    # dataloader-style:
    for i, (observations, rewards) in itertools.islice(enumerate(env), 10):
        assert isinstance(observations, MultiTaskSLSetting.Observations)
        task_labels = observations.task_labels.cpu().tolist()
        assert len(set(task_labels)) > 1
        if has_rewards:
            assert isinstance(rewards, MultiTaskSLSetting.Rewards)
            # Check that there is no relabelling happening, by checking that there are
            # more different y's then there are usually classes in each batch.
            assert len(set(rewards.y.cpu().tolist())) > 2
        else:
            assert rewards is None

    # gym-style interaction:
    obs = env.reset()
    assert isinstance(env.observation_space, TypedDictSpace)
    space_shapes = {k: s.shape for k, s in env.observation_space.spaces.items()}
    space_dtypes = {k: s.dtype for k, s in env.observation_space.spaces.items()}
    # assert False, (obs.keys(), obs.numpy().keys())
    assert obs.shapes == space_shapes
    assert obs.numpy().shapes == space_shapes

    assert obs.dtypes == space_dtypes
    x_space = env.observation_space.x
    t_space = env.observation_space.task_labels
    assert obs.x in x_space, (obs.x, x_space)
    assert obs.task_labels in t_space, (obs.task_labels, t_space)
    assert isinstance(obs, env.observation_space.dtype)

    assert obs in env.observation_space
    done = False
    steps = 0
    while not done and steps < 10:
        action = Actions(y_pred=torch.randint(10, [env.batch_size]))
        # BUG: convert_tensors seems to be causing issues again: We shouldn't have
        # to manually convert obs to numpy before checking `obs in obs_space`.
        # TODO: Also not super clean that we can't just do `action in action_space`.
        # assert action.numpy() in env.action_space
        assert action.y_pred.numpy() in env.action_space
        obs, reward, done, info = env.step(action)
        assert obs.numpy() in env.observation_space
        assert reward.y in env.reward_space
        steps += 1
        assert done is False
    assert steps == 10


from sequoia.common.config import Config


def test_multitask_setting(config: Config):
    config = dataclasses.replace(config, device=torch.device("cpu"))
    setting = MultiTaskSLSetting(dataset="mnist", config=config)
    assert setting.phases == 1
    assert setting.nb_tasks == 5
    from sequoia.common.spaces.image import ImageTensorSpace
    from sequoia.common.spaces.tensor_spaces import TensorDiscrete

    assert setting.observation_space == TypedDictSpace(
        x=ImageTensorSpace(0.0, 1.0, (3, 28, 28), np.float32, device=config.device),
        task_labels=TensorDiscrete(5, device=config.device),
        dtype=setting.Observations,
    )
    assert setting.action_space == Discrete(10)
    # assert setting.config.device.type == "cuda" if torch.cuda.is_available() else "cpu"

    with setting.train_dataloader(batch_size=32, num_workers=0) as train_env:
        check_is_multitask_env(train_env, has_rewards=True)

    with setting.val_dataloader(batch_size=32, num_workers=0) as val_env:
        check_is_multitask_env(val_env, has_rewards=True)


@pytest.mark.xfail(reason="test environments still operate in a 'sequential tasks' way")
def test_multitask_setting_test_env():
    setting = MultiTaskSLSetting(dataset="mnist")

    assert setting.phases == 1
    assert setting.nb_tasks == 5
    assert setting.observation_space == TypedDictSpace(
        x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5)
    )
    assert setting.action_space == Discrete(10)

    # FIXME: Wait, actually, this test environment, will it be shuffled, or not?
    with setting.test_dataloader(batch_size=32, num_workers=0) as test_env:
        check_is_multitask_env(test_env, has_rewards=False)


from sequoia.settings.assumptions.incremental_test import DummyMethod


def test_on_task_switch_is_called_multi_task():
    setting = MultiTaskSLSetting(
        dataset="mnist",
        nb_tasks=5,
        # train_steps_per_task=100,
        # max_steps=500,
        # test_steps_per_task=100,
        train_transforms=[],
        test_transforms=[],
        val_transforms=[],
    )
    method = DummyMethod()
    results = setting.apply(method)
    assert method.n_task_switches == setting.nb_tasks
    assert method.received_task_ids == list(range(setting.nb_tasks))
    assert method.received_while_training == [False for _ in range(setting.nb_tasks)]


================================================
FILE: sequoia/settings/sl/setting.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Type, TypeVar

from pytorch_lightning import LightningDataModule
from simple_parsing import choice, list_field
from torch import Tensor

from sequoia.common.transforms import Transforms
from sequoia.settings import Setting
from sequoia.settings.base.environment import ActionType, ObservationType, RewardType

from .environment import PassiveEnvironment


@dataclass
class SLSetting(Setting[PassiveEnvironment[ObservationType, ActionType, RewardType]]):
    """Supervised Learning Setting.

    Core assuptions:
    - Current actions have no influence on future observations.
    - The environment gives back "dense feedback", (the 'reward' associated with all
      possible actions at each step, rather than a single action)

    For example, supervised learning is a Passive setting, since predicting a
    label has no effect on the reward you're given (the label) or on the next
    samples you observe.
    """

    @dataclass(frozen=True)
    class Observations(Setting.Observations):
        x: Tensor

    @dataclass(frozen=True)
    class Actions(Setting.Actions):
        pass

    @dataclass(frozen=True)
    class Rewards(Setting.Rewards):
        pass

    Environment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment

    # TODO: rename/remove this, as it isn't used, and there could be some
    # confusion with the available_datasets in task-incremental and iid.
    # Also, since those are already LightningDataModules, what should we do?
    available_datasets: ClassVar[Dict[str, Type[LightningDataModule]]] = {
        # "mnist": MNISTDataModule,
        # "fashion_mnist": FashionMNISTDataModule,
        # "cifar10": CIFAR10DataModule,
        # "imagenet": ImagenetDataModule,
    }
    # Which setup / dataset to use.
    # The setups/dataset are implemented as `LightningDataModule`s.
    dataset: str = choice(available_datasets.keys(), default="mnist")

    # Transforms to be applied to the observatons of the train/valid/test
    # environments.
    transforms: List[Transforms] = list_field()

    # Transforms to be applied to the training datasets.
    train_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
    # Transforms to be applied to the validation datasets.
    val_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
    # Transforms to be applied to the testing datasets.
    test_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
    # Wether to drop the last batch (during training). Useful if you use batchnorm, to
    # avoid having an error when the batch_size is 1.
    drop_last: bool = False


SettingType = TypeVar("SettingType", bound=SLSetting)


================================================
FILE: sequoia/settings/sl/task_incremental/__init__.py
================================================
""" Task Incremental Setting 

Adds the additional assumption that the task labels are available at test time.
"""
# 1. Import stuff from the Parent
# NOTE: Here there doesn't seem to be a need for a custom 'Results' class for
# TaskIncremental, given how similar it is to ClassIncremental.
# 2. Import what we overwrite/customize
from .setting import TaskIncrementalSLSetting


================================================
FILE: sequoia/settings/sl/task_incremental/setting.py
================================================
""" Defines the Task-Incremental CL Setting.

Task-Incremental CL is a variant of the ClassIncrementalSetting with task labels
available at both train and test time.
"""

from dataclasses import dataclass
from typing import ClassVar, Type, TypeVar

from sequoia.settings.assumptions.task_incremental import TaskIncrementalAssumption
from sequoia.settings.sl.incremental import IncrementalSLResults as TaskIncrementalSLResults
from sequoia.settings.sl.incremental import IncrementalSLSetting
from sequoia.utils.utils import constant


@dataclass
class TaskIncrementalSLSetting(TaskIncrementalAssumption, IncrementalSLSetting):
    """Setting where data arrives in a series of Tasks, and where the task
    labels are always available (both train and test time).
    """

    Results: ClassVar[Type[Results]] = TaskIncrementalSLResults

    # Wether task labels are available at train time. (Forced to True.)
    task_labels_at_train_time: bool = constant(True)
    # Wether task labels are available at test time.
    # TODO: Is this really always True for all Task-Incremental Settings?
    task_labels_at_test_time: bool = constant(True)


SettingType = TypeVar("SettingType", bound=TaskIncrementalSLSetting)


================================================
FILE: sequoia/settings/sl/task_incremental/setting_test.py
================================================
import itertools
import math
from typing import *

import pytest

from sequoia.common.config import Config
from sequoia.settings.assumptions.incremental_test import OtherDummyMethod
from sequoia.utils.logging_utils import get_logger

from ..incremental.setting_test import TestIncrementalSLSetting as IncrementalSLSettingTests
from .setting import TaskIncrementalSLSetting

logger = get_logger(__name__)


class TestTaskIncrementalSLSetting(IncrementalSLSettingTests):
    Setting: ClassVar[Type[Setting]] = TaskIncrementalSLSetting
    fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
        dataset="mnist",
        batch_size=64,
    )


def check_only_right_classes_present(setting: TaskIncrementalSLSetting):
    """Checks that only the classes within each task are present.

    TODO: This should be refactored to be based more on the reward space.
    """
    assert setting.task_labels_at_test_time and setting.task_labels_at_test_time

    for i in range(setting.nb_tasks):
        setting.current_task_id = i
        batch_size = 5
        train_loader = setting.train_dataloader(batch_size=batch_size)

        # get the classes in the current task:
        task_classes = setting.task_classes(i, train=True)

        for j, (observations, rewards) in enumerate(itertools.islice(train_loader, 100)):
            x = observations.x
            t = observations.task_labels

            if setting.task_labels_at_train_time:
                assert t is not None

            y = rewards.y
            print(i, j, y, t)
            y_in_task_classes = [y_i in task_classes for y_i in y.tolist()]
            assert all(y_in_task_classes)
            assert x.shape == (batch_size, 3, 28, 28)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            reward = train_loader.send([4 for _ in range(batch_size)])
            if rewards is not None:
                # IF we send somethign to the env, then it should give back the same
                # labels as for the last batch.
                assert (reward.y == rewards.y).all()

        train_loader.close()

        valid_loader = setting.val_dataloader(batch_size=batch_size)
        for j, (observations, rewards) in enumerate(itertools.islice(valid_loader, 100)):
            x = observations.x
            t = observations.task_labels

            if setting.monitor_training_performance:
                assert rewards is None

            if setting.task_labels_at_train_time:
                assert t is not None

            y = rewards.y
            print(i, j, y, t)
            y_in_task_classes = [y_i in task_classes for y_i in y.tolist()]
            assert all(y_in_task_classes)
            assert x.shape == (batch_size, 3, 28, 28)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

            reward = valid_loader.send(valid_loader.action_space.sample())
            if rewards is not None:
                # IF we send somethign to the env, then it should give back the same
                # labels as for the last batch.
                assert (reward.y == rewards.y).all()

        valid_loader.close()

        # FIXME: get the classes in the current task, at test-time.
        task_classes = list(range(setting.reward_space.n))

        test_loader = setting.test_dataloader(batch_size=batch_size)
        assert not test_loader.unwrapped._hide_task_labels
        for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)):
            x = observations.x
            t = observations.task_labels
            if setting.task_labels_at_test_time:
                assert t is not None

            if rewards is None:
                rewards = test_loader.send(test_loader.action_space.sample())
                assert rewards is not None
                assert rewards.y is not None

            y = rewards.y
            print(i, j, y, t)
            y_in_task_classes = [y_i in task_classes for y_i in y.tolist()]
            assert all(y_in_task_classes)
            assert x.shape == (batch_size, 3, 28, 28)
            x = x.permute(0, 2, 3, 1)[0]
            assert x.shape == (28, 28, 3)

        test_loader.close()


def test_task_incremental_mnist_setup():
    setting = TaskIncrementalSLSetting(
        dataset="mnist",
        increment=2,
        # BUG: When num_workers > 0, some of the tests hang, but only when running *all* the tests!
        # num_workers=0,
    )
    assert setting.task_labels_at_test_time and setting.task_labels_at_train_time
    setting.prepare_data(data_dir="data")
    setting.setup()
    check_only_right_classes_present(setting)


@pytest.mark.xfail(
    reason=(
        "TODO: Continuum actually re-labels the images to 0-10, regardless of the "
        "class order. The actual images are ok though."
    )
)
def test_task_incremental_mnist_setup_reversed_class_order():
    setting = TaskIncrementalSLSetting(
        dataset="mnist",
        nb_tasks=5,
        class_order=list(reversed(range(10))),
        # num_workers=0,
    )
    assert setting.task_labels_at_train_time and setting.task_labels_at_test_time
    assert (
        setting.known_task_boundaries_at_train_time and setting.known_task_boundaries_at_test_time
    )
    setting.prepare_data(data_dir="data")
    setting.setup()
    check_only_right_classes_present(setting)


def test_class_incremental_mnist_setup_with_nb_tasks():
    setting = TaskIncrementalSLSetting(
        dataset="mnist",
        nb_tasks=2,
        num_workers=0,
    )
    assert setting.increment == 5
    setting.prepare_data(data_dir="data")
    setting.setup()
    assert len(setting.train_datasets) == 2
    assert len(setting.val_datasets) == 2
    assert len(setting.test_datasets) == 2
    check_only_right_classes_present(setting)


def test_action_space_always_matches_obs_batch_size(config: Config):
    """Make sure that the batch size in the observations always matches the action
    space provided to the `get_actions` method.

    ALSO:
    - Make sure that we get asked for actions for all the observations in the test set,
      even when there is a shorter last batch.
    - The total number of observations match the dataset size.
    """
    nb_tasks = 5
    # TODO: The `drop_last` argument seems to not be used correctly by the dataloaders / test loop.
    batch_size = 128

    # HUH why are we doing this here?
    setting = TaskIncrementalSLSetting(
        dataset="mnist",
        nb_tasks=nb_tasks,
        batch_size=batch_size,
        num_workers=4,
        monitor_training_performance=True,
        drop_last=False,
    )

    # 10_000 examples in the test dataset of mnist.
    total_samples = len(setting.test_dataloader().dataset)

    method = OtherDummyMethod()
    _ = setting.apply(method, config=config)

    # Multiply by nb_tasks because the test loop is ran after each training task.
    assert sum(method.batch_sizes) == total_samples * nb_tasks
    assert len(method.batch_sizes) == math.ceil(total_samples / batch_size) * nb_tasks
    if total_samples % batch_size == 0:
        assert set(method.batch_sizes) == {batch_size}
    else:
        assert set(method.batch_sizes) == {batch_size, total_samples % batch_size}


================================================
FILE: sequoia/settings/sl/traditional/__init__.py
================================================
# 1. Import stuff from the Parent
# 2. Import what we overwrite/customize
from .results import IIDResults
from .setting import TraditionalSLSetting


================================================
FILE: sequoia/settings/sl/traditional/results.py
================================================
"""Defines the Results of apply a Method to an IID Setting.  
"""
from pathlib import Path
from typing import Dict, Union

import matplotlib.pyplot as plt

from sequoia.settings.sl.incremental.results import IncrementalSLResults


class IIDResults(IncrementalSLResults):
    """Results of applying a Method on an IID Setting.

    # TODO: Refactor this to be based on `TaskResults`?
    """

    def save_to_dir(self, save_dir: Union[str, Path]) -> None:
        # TODO: Add wandb logging here somehow.
        save_dir = Path(save_dir)
        save_dir.mkdir(exist_ok=True, parents=True)
        plots: Dict[str, plt.Figure] = self.make_plots()

        # Save the actual 'results' object to a file in the save dir.
        results_json_path = save_dir / "results.json"
        self.save(results_json_path)
        print(f"Saved a copy of the results to {results_json_path}")

        print(f"\nPlots: {plots}\n")
        for fig_name, figure in plots.items():
            print(f"fig_name: {fig_name}")
            # figure.show()
            # plt.waitforbuttonpress(10)
            path = (save_dir / fig_name).with_suffix(".jpg")
            path.parent.mkdir(exist_ok=True, parents=True)
            figure.savefig(path)
            print(f"Saved figure at path {path}")

    def make_plots(self) -> Dict[str, plt.Figure]:
        plots_dict = super().make_plots()
        # TODO: Could add a Confusion Matrix plot?
        plots_dict.update({"class_accuracies": self.class_accuracies_plot()})
        return plots_dict

    def class_accuracies_plot(self):
        figure: plt.Figure
        axes: plt.Axes
        figure, axes = plt.subplots()
        y = self[0][0].average_metrics.class_accuracy
        x = list(range(len(y)))
        rects = axes.bar(x, y)
        axes.set_title("Class Accuracy")
        axes.set_xlabel("Class")
        axes.set_ylabel("Accuracy")
        axes.set_ylim(0, 1.0)
        # autolabel(axes, rects)
        return figure

    # def summary(self) -> str:
    #     s = StringIO()
    #     with redirect_stdout(s):
    #         print(f"Average Accuracy: {self.average_metrics.accuracy:.2%}")
    #         for i, class_acc in enumerate(self.average_metrics.class_accuracy):
    #             print(f"Accuracy for class {i}: {class_acc:.3%}")
    #     s.seek(0)
    #     return s.read()

    def to_log_dict(self, verbose: bool = False) -> Dict[str, float]:
        results = super().to_log_dict(verbose=verbose)
        # Remove the useless 2-levels of nesting from the log_dict
        results.update(results.pop("Task 0").pop("Task 0"))
        # assert False, json.dumps(results, indent="\t")
        return results


================================================
FILE: sequoia/settings/sl/traditional/setting.py
================================================
""" Defines the TraditionalSLSetting, as a variant of the TaskIncremental setting with
only one task.
"""
from dataclasses import dataclass
from typing import ClassVar, List, Optional, Type, TypeVar, Union

from sequoia.utils.utils import constant

# TODO: Re-arrange the 'multiple-inheritance' with domain-incremental and
# task-incremental, this might not be 100% accurate, as the "IID" you get from
# moving down from domain-incremental (+ only one task) might not be exactly the same as
# the one you get form TaskIncremental (+ only one task)
from ..incremental import IncrementalSLSetting
from .results import IIDResults

# TODO: IDEA: Add the pytorch lightning datamodules in the list of
# 'available datasets' for the IID setting, and make sure that it doesn't mess
# up the methods in the parents (train/val loop, dataloader construction, etc.)
# IDEA: Maybe overwrite the 'train/val/test_dataloader' methods on the setting
# and when the chosen dataset is a LightnignDataModule, then just return the
# result from the corresponding method on the LightningDataModule, rather than
# from super().
# from pl_bolts.datamodules import (CIFAR10DataModule, FashionMNISTDataModule,
#                                   ImagenetDataModule, MNISTDataModule)


@dataclass
class TraditionalSLSetting(IncrementalSLSetting):
    """Your 'usual' supervised learning Setting, where the samples are i.i.d.

    This Setting is slightly different than the others, in that it can be recovered in
    *two* different ways:
    - As a variant of Task-Incremental learning, but where there is only one task;
    - As a variant of Domain-Incremental learning, but where there is only one task.
    """

    Results: ClassVar[Type[Results]] = IIDResults

    # Number of tasks.
    nb_tasks: int = 5

    stationary_context: bool = constant(True)

    # increment: Union[int, List[int]] = constant(None)
    # A different task size applied only for the first task.
    # Desactivated if `increment` is a list.
    initial_increment: int = constant(None)
    # An optional custom class order, used for NC.
    class_order: Optional[List[int]] = constant(None)
    # Either number of classes per task, or a list specifying for
    # every task the amount of new classes (defaults to the value of
    # `increment`).
    test_increment: Optional[Union[List[int], int]] = constant(None)
    # A different task size applied only for the first test task.
    # Desactivated if `test_increment` is a list. Defaults to the
    # value of `initial_increment`.
    test_initial_increment: Optional[int] = constant(None)
    # An optional custom class order for testing, used for NC.
    # Defaults to the value of `class_order`.
    test_class_order: Optional[List[int]] = constant(None)

    @property
    def phases(self) -> int:
        """The number of training 'phases', i.e. how many times `method.fit` will be
        called.

        Defaults to the number of tasks, but may be different, for instance in so-called
        Multi-Task Settings, this is set to 1.
        """
        return 1 if self.stationary_context else self.nb_tasks


SettingType = TypeVar("SettingType", bound=TraditionalSLSetting)


if __name__ == "__main__":
    TraditionalSLSetting.main()


================================================
FILE: sequoia/settings/sl/traditional/setting_test.py
================================================
import pytest

from sequoia.methods import Method
from sequoia.settings import (
    ClassIncrementalSetting,
    DomainIncrementalSLSetting,
    TaskIncrementalSLSetting,
)

from ..continual.setting import ContinualSLSetting
from ..discrete.setting import DiscreteTaskAgnosticSLSetting
from ..incremental.setting import IncrementalSLSetting
from ..multi_task.setting import MultiTaskSLSetting
from .setting import TraditionalSLSetting


class ContinualSLMethod(Method, target_setting=ContinualSLSetting):
    pass


class DiscreteTaskAgnosticSLMethod(Method, target_setting=DiscreteTaskAgnosticSLSetting):
    pass


class IncrementalSLMethod(Method, target_setting=IncrementalSLSetting):
    pass


class ClassIncrementalSLMethod(Method, target_setting=ClassIncrementalSetting):
    pass


class DomainIncrementalSLMethod(Method, target_setting=DomainIncrementalSLSetting):
    pass


class TaskIncrementalSLMethod(Method, target_setting=TaskIncrementalSLSetting):
    pass


class TraditionalSLMethod(Method, target_setting=TraditionalSLSetting):
    pass


class MultiTaskSLMethod(Method, target_setting=MultiTaskSLSetting):
    pass


def test_methods_applicable_to_iid_setting():
    """Test to make sure that Methods that are applicable to the Domain-Incremental
    are applicable to the IID Setting, same for those targetting the Task-Incremental
    setting.
    """
    assert ContinualSLMethod.is_applicable(ContinualSLSetting)
    assert ContinualSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert ContinualSLMethod.is_applicable(IncrementalSLSetting)
    assert ContinualSLMethod.is_applicable(ClassIncrementalSetting)
    assert ContinualSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert ContinualSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert ContinualSLMethod.is_applicable(TraditionalSLSetting)
    assert ContinualSLMethod.is_applicable(MultiTaskSLSetting)

    assert not DiscreteTaskAgnosticSLMethod.is_applicable(ContinualSLSetting)
    assert DiscreteTaskAgnosticSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert DiscreteTaskAgnosticSLMethod.is_applicable(IncrementalSLSetting)
    assert DiscreteTaskAgnosticSLMethod.is_applicable(ClassIncrementalSetting)
    assert DiscreteTaskAgnosticSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert DiscreteTaskAgnosticSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert DiscreteTaskAgnosticSLMethod.is_applicable(TraditionalSLSetting)
    assert DiscreteTaskAgnosticSLMethod.is_applicable(MultiTaskSLSetting)

    assert not IncrementalSLMethod.is_applicable(ContinualSLSetting)
    assert not IncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert IncrementalSLMethod.is_applicable(IncrementalSLSetting)
    assert IncrementalSLMethod.is_applicable(ClassIncrementalSetting)
    assert IncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert IncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert IncrementalSLMethod.is_applicable(TraditionalSLSetting)
    assert IncrementalSLMethod.is_applicable(MultiTaskSLSetting)

    assert not ClassIncrementalSLMethod.is_applicable(ContinualSLSetting)
    assert not ClassIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert ClassIncrementalSLMethod.is_applicable(IncrementalSLSetting)
    assert ClassIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
    assert ClassIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert ClassIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert ClassIncrementalSLMethod.is_applicable(TraditionalSLSetting)
    assert ClassIncrementalSLMethod.is_applicable(MultiTaskSLSetting)

    assert not TaskIncrementalSLMethod.is_applicable(ContinualSLSetting)
    assert not TaskIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert not TaskIncrementalSLMethod.is_applicable(IncrementalSLSetting)
    assert not TaskIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
    assert TaskIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert not TaskIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert not TaskIncrementalSLMethod.is_applicable(TraditionalSLSetting)
    assert TaskIncrementalSLMethod.is_applicable(MultiTaskSLSetting)

    assert not DomainIncrementalSLMethod.is_applicable(ContinualSLSetting)
    assert not DomainIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert not DomainIncrementalSLMethod.is_applicable(IncrementalSLSetting)
    assert not DomainIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
    assert not DomainIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert DomainIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert not DomainIncrementalSLMethod.is_applicable(TraditionalSLSetting)
    # TODO: What about this one?
    # assert DomainIncrementalSLMethod.is_applicable(MultiTaskSLSetting)

    assert not TraditionalSLMethod.is_applicable(ContinualSLSetting)
    assert not TraditionalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert not TraditionalSLMethod.is_applicable(IncrementalSLSetting)
    assert not TraditionalSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert not TraditionalSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert not TraditionalSLMethod.is_applicable(ClassIncrementalSetting)
    assert TraditionalSLMethod.is_applicable(TraditionalSLSetting)
    assert TraditionalSLMethod.is_applicable(MultiTaskSLSetting)

    assert not MultiTaskSLMethod.is_applicable(ContinualSLSetting)
    assert not MultiTaskSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
    assert not MultiTaskSLMethod.is_applicable(IncrementalSLSetting)
    assert not MultiTaskSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert not MultiTaskSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert not MultiTaskSLMethod.is_applicable(ClassIncrementalSetting)
    assert not MultiTaskSLMethod.is_applicable(TraditionalSLSetting)
    assert MultiTaskSLMethod.is_applicable(MultiTaskSLSetting)


def test_get_parents():
    # TODO: THis is a bit funky, now that Class-Incremental is a "pointer" to
    # Incremental, and Traditional has been moved under TaskIncremental
    assert TraditionalSLSetting in IncrementalSLSetting.get_children()
    assert TraditionalSLSetting not in TaskIncrementalSLSetting.get_children()
    assert TraditionalSLSetting in IncrementalSLSetting.immediate_children()

    assert TaskIncrementalSLSetting not in TraditionalSLSetting.parents()
    assert ClassIncrementalSetting in TaskIncrementalSLSetting.immediate_parents()

    assert TaskIncrementalSLSetting not in TraditionalSLSetting.get_parents()
    assert ClassIncrementalSetting in TraditionalSLSetting.get_parents()
    assert TraditionalSLSetting not in TraditionalSLSetting.get_parents()


@pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.")
def test_get_parents_domain_incremental():
    assert TraditionalSLSetting in DomainIncrementalSLSetting.get_children()
    assert DomainIncrementalSLSetting in TraditionalSLSetting.get_immediate_parents()


@pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.")
def test_method_applicability_domain_incremental():
    assert not DomainIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
    assert not DomainIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
    assert DomainIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
    assert DomainIncrementalSLMethod.is_applicable(TraditionalSLSetting)


@pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.")
def test_get_parents_domain_incremental():
    assert DomainIncrementalSLSetting in TraditionalSLSetting.get_parents()


================================================
FILE: sequoia/settings/sl/wrappers/__init__.py
================================================
""" Module defining gym wrappers that are specific to SL Environments.
"""
from .measure_performance import MeasureSLPerformanceWrapper


================================================
FILE: sequoia/settings/sl/wrappers/measure_performance.py
================================================
""" TODO: Create a Wrapper that measures performance over the first epoch of training in SL.

Then maybe after we can make something more general that also works for RL.
"""
import warnings
from collections import defaultdict

""" Wrapper that gets applied onto the environment in order to measure the online
training performance.

TODO: Move this somewhere more appropriate. There's also the RL version of the wrapper
here.
"""
from typing import Dict, Iterator, Optional, Tuple

import numpy as np
from gym.utils import colorize
from torch import Tensor

import wandb
from sequoia.common.gym_wrappers.measure_performance import MeasurePerformanceWrapper
from sequoia.common.metrics import ClassificationMetrics, Metrics
from sequoia.settings.base import Actions, Observations, Rewards
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.utils.utils import add_prefix


class MeasureSLPerformanceWrapper(
    MeasurePerformanceWrapper,
    # MeasurePerformanceWrapper[PassiveEnvironment]  # Python 3.7
    # MeasurePerformanceWrapper[PassiveEnvironment, ClassificationMetrics] # Python 3.8+
):
    def __init__(
        self,
        env: PassiveEnvironment,
        first_epoch_only: bool = False,
        wandb_prefix: str = None,
    ):
        super().__init__(env)
        # Metrics mapping from step to the metrics at that step.
        self._metrics: Dict[int, ClassificationMetrics] = defaultdict(Metrics)
        self.first_epoch_only = first_epoch_only
        self.wandb_prefix = wandb_prefix
        # Counter for the number of steps.
        self._steps: int = 0
        assert isinstance(self.env.unwrapped, PassiveEnvironment)
        if not self.env.unwrapped.pretend_to_be_active:
            warnings.warn(
                RuntimeWarning(
                    colorize(
                        "Your online performance "
                        + ("during the first epoch " if self.first_epoch_only else "")
                        + "on this environment will be monitored! "
                        "Since this env is Passive, i.e. a Supervised Learning "
                        "DataLoader, the Rewards (y) will be withheld until "
                        "actions are passed to the 'send' method. Make sure that "
                        "your training loop can handle this small tweak.",
                        color="yellow",
                    )
                )
            )
        self.env.unwrapped.pretend_to_be_active = True
        self.__epochs = 0

    def reset(self) -> Observations:
        return self.env.reset()

    @property
    def in_evaluation_period(self) -> bool:
        if self.first_epoch_only:
            # TODO: Double-check the iteraction of IterableDataset and __len__
            return self.__epochs == 0
        return True

    def step(self, action: Actions):
        observation, reward, done, info = self.env.step(action)
        # TODO: Make this wrapper task-aware, using the task ids in this `observation`?
        if self.in_evaluation_period:
            # TODO: Edge case, but we also need the prediction for the last batch to be
            # counted.
            self._metrics[self._steps] += self.get_metrics(action, reward)
        elif self.first_epoch_only:
            # If we are at the last batch in the first epoch, we still keep the metrics
            # for that batch, even though we're technically not in the first epoch
            # anymore.
            # TODO: CHeck the length through the dataset? or through a more 'clean' way
            # e.g. through the `max_steps` property of a TimeLimit wrapper or something?
            num_batches = len(self.unwrapped.dataset) // self.batch_size
            if not self.unwrapped.drop_last:
                num_batches += 1 if len(self.unwrapped.dataset) % self.batch_size else 0
            # currently_at_last_batch = self._steps == num_batches - 1
            currently_at_last_batch = self._steps == num_batches - 1
            if self.__epochs == 1 and currently_at_last_batch:
                self._metrics[self._steps] += self.get_metrics(action, reward)
        self._steps += 1
        return observation, reward, done, info

    def send(self, action: Actions):
        if not isinstance(action, Actions):
            assert isinstance(action, (np.ndarray, Tensor))
            action = Actions(action)

        reward = self.env.send(action)

        if self.in_evaluation_period:
            # TODO: Edge case, but we also need the prediction for the last batch to be
            # counted.
            self._metrics[self._steps] += self.get_metrics(action, reward)
        elif self.first_epoch_only:
            # If we are at the last batch in the first epoch, we still keep the metrics
            # for that batch, even though we're technically not in the first epoch
            # anymore.
            # TODO: CHeck the length through the dataset? or through a more 'clean' way
            # e.g. through the `max_steps` property of a TimeLimit wrapper or something?
            num_batches = len(self.unwrapped.dataset) // self.batch_size
            if not self.unwrapped.drop_last:
                num_batches += 1 if len(self.unwrapped.dataset) % self.batch_size else 0
            # currently_at_last_batch = self._steps == num_batches - 1
            currently_at_last_batch = self._steps == num_batches - 1
            if self.__epochs == 1 and currently_at_last_batch:
                self._metrics[self._steps] += self.get_metrics(action, reward)
        # This is ok since we don't increment in the iterator.
        self._steps += 1
        return reward

    def get_metrics(self, action: Actions, reward: Rewards) -> Metrics:
        assert action.y_pred.shape == reward.y.shape, (action.shapes, reward.shapes)
        metric = ClassificationMetrics(y_pred=action.y_pred, y=reward.y, num_classes=self.n_classes)

        if wandb.run:
            log_dict = metric.to_log_dict()
            if self.wandb_prefix:
                log_dict = add_prefix(log_dict, prefix=self.wandb_prefix, sep="/")
            log_dict["steps"] = self._steps
            wandb.log(log_dict)
        return metric

    def __iter__(self) -> Iterator[Tuple[Observations, Optional[Rewards]]]:
        if self.__epochs == 1 and self.first_epoch_only:
            print(
                colorize(
                    "Your performance during the first epoch on this environment has "
                    "been successfully measured! The environment will now yield the "
                    "rewards (y) during iteration, and you are no longer required to "
                    "send an action for each observation.",
                    color="green",
                )
            )
            self.env.unwrapped.pretend_to_be_active = False

        for obs, rew in self.env.__iter__():
            if self.in_evaluation_period:
                yield obs, None
            else:
                yield obs, rew
        self.__epochs += 1


================================================
FILE: sequoia/settings/sl/wrappers/measure_performance_test.py
================================================
""" TODO: Tests for the 'measure performance wrapper' to be used to get the performance
over the first "epoch" 
"""
import dataclasses
from typing import Iterable, Tuple, TypeVar

import numpy as np
import pytest
import torch
from torch.utils.data import TensorDataset

from sequoia.common import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.settings.rl.wrappers import TypedObjectsWrapper
from sequoia.settings.sl import ClassIncrementalSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards

from .measure_performance import MeasureSLPerformanceWrapper

T = TypeVar("T")


def with_is_last(iterable: Iterable[T]) -> Iterable[Tuple[T, bool]]:
    """Function that mimics what's happening in pytorch-lightning, where the iterator
    is one-offset. This can cause a bit of headache in Sequoia's wrappers when iterating
    over an env, because they expect an action for each observation.
    """
    iterator = iter(iterable)
    sentinel = object()
    previous_value = next(iterator)
    current_value = next(iterator, sentinel)
    while current_value is not sentinel:
        yield previous_value, False
        previous_value = current_value
        current_value = next(iterator, sentinel)
    yield previous_value, True


def test_measure_performance_wrapper():
    dataset = TensorDataset(
        torch.arange(100).reshape([100, 1, 1, 1]) * torch.ones([100, 3, 32, 32]),
        torch.arange(100),
    )
    pretend_to_be_active = True
    env = PassiveEnvironment(
        dataset, batch_size=1, n_classes=100, pretend_to_be_active=pretend_to_be_active
    )
    for i, (x, y) in enumerate(env):
        # print(x)
        assert y is None if pretend_to_be_active else y is not None
        assert (x == i).all()
        action = i if i < 50 else 0
        reward = env.send(action)
        assert reward == i
    assert i == 99
    # This might be a bit weird, since .reset() will give the same obs as the first x
    # when iterating.
    obs = env.reset()
    for i, (x, y) in enumerate(env):
        # print(x)
        assert y is None
        assert (x == i).all()
        action = i if i < 50 else 0
        reward = env.send(action)
        assert reward == i
    assert i == 99
    from sequoia.settings.sl.continual.objects import Observations, Actions, Rewards

    env = TypedObjectsWrapper(
        env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
    )
    # TODO: Do we want to require Observations / Actions / Rewards objects?
    env = MeasureSLPerformanceWrapper(env, first_epoch_only=False)
    for epoch in range(3):
        for i, (observations, rewards) in enumerate(env):
            assert observations is not None
            assert rewards is None
            assert (observations.x == i).all()

            # Only guess correctly for the first 50 steps.
            action = Actions(y_pred=np.array([i if i < 50 else 0]))
            rewards = env.send(action)
            assert (rewards.y == i).all()
        assert i == 99
    assert epoch == 2

    assert set(env.get_online_performance().keys()) == set(range(100 * 3))
    for i, (step, metric) in enumerate(env.get_online_performance().items()):
        assert step == i
        assert metric.accuracy == (1.0 if (i % 100) < 50 else 0.0), (i, step, metric)

    metrics = env.get_average_online_performance()
    assert isinstance(metrics, ClassificationMetrics)
    # Since we guessed the correct class only during the first 50 steps.
    assert metrics.accuracy == 0.5


def make_dummy_env(n_samples: int = 100, batch_size: int = 1, drop_last: bool = False):
    dataset = TensorDataset(
        torch.arange(n_samples).reshape([n_samples, 1, 1, 1]) * torch.ones([n_samples, 3, 32, 32]),
        torch.arange(n_samples),
    )
    pretend_to_be_active = False
    env = PassiveEnvironment(
        dataset,
        batch_size=batch_size,
        n_classes=n_samples,
        pretend_to_be_active=pretend_to_be_active,
        drop_last=drop_last,
    )
    env = TypedObjectsWrapper(
        env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
    )
    return env


def test_measure_performance_wrapper_first_epoch_only():
    env = make_dummy_env(n_samples=100, batch_size=1)
    env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)

    for epoch in range(2):
        print(f"start epoch {epoch}")
        for i, (observations, rewards) in enumerate(env):
            assert observations is not None
            if epoch == 0:
                assert rewards is None
            else:
                assert rewards is not None
                rewards_ = rewards  # save these for a comparison below.

            assert (observations.x == i).all()

            # Only guess correctly for the first 50 steps.
            action = Actions(y_pred=np.array([i if i < 50 else 0]))

            rewards = env.send(action)
            if epoch != 0:
                # We should just receive what we already got by iterating.
                assert rewards.y == rewards_.y
            assert (rewards.y == i).all()
        assert i == 99

    # do another epoch, but this time don't even send actions.
    for i, (observations, rewards) in enumerate(env):
        assert (observations.x == i).all()
        assert (rewards.y == i).all()
    assert i == 99

    assert set(env.get_online_performance().keys()) == set(range(100))
    for i, (step, metric) in enumerate(env.get_online_performance().items()):
        assert step == i
        assert metric.accuracy == (1.0 if (i % 100) < 50 else 0.0), (i, step, metric)

    metrics = env.get_average_online_performance()
    assert isinstance(metrics, ClassificationMetrics)
    # Since we guessed the correct class only during the first 50 steps.
    assert metrics.accuracy == 0.5
    assert metrics.n_samples == 100


def test_measure_performance_wrapper_odd_vs_even():
    env = make_dummy_env(n_samples=100, batch_size=1)
    env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)

    for i, (observations, rewards) in enumerate(env):
        assert observations is not None
        assert rewards is None or rewards.y is None
        assert (observations.x == i).all()

        # Only guess correctly for the first 50 steps.
        action = Actions(y_pred=np.array([i if i % 2 == 0 else 0]))
        rewards = env.send(action)
        assert (rewards.y == i).all()
    assert i == 99

    assert set(env.get_online_performance().keys()) == set(range(100))
    for i, (step, metric) in enumerate(env.get_online_performance().items()):
        assert step == i
        if step % 2 == 0:
            assert metric.accuracy == 1.0, (i, step, metric)
        else:
            assert metric.accuracy == 0.0, (i, step, metric)

    metrics = env.get_average_online_performance()
    assert isinstance(metrics, ClassificationMetrics)
    # Since we guessed the correct class only during the first 50 steps.
    assert metrics.accuracy == 0.5
    assert metrics.n_samples == 100


def test_measure_performance_wrapper_odd_vs_even_passive():
    dataset = TensorDataset(
        torch.arange(100).reshape([100, 1, 1, 1]) * torch.ones([100, 3, 32, 32]),
        torch.arange(100),
    )
    pretend_to_be_active = False
    env = PassiveEnvironment(
        dataset, batch_size=1, n_classes=100, pretend_to_be_active=pretend_to_be_active
    )
    env = TypedObjectsWrapper(
        env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
    )
    env = MeasureSLPerformanceWrapper(env, first_epoch_only=False)

    for i, (observations, rewards) in enumerate(env):
        assert observations is not None
        assert rewards is None or rewards.y is None
        assert (observations.x == i).all()

        # Only guess correctly for the first 50 steps.
        action = Actions(y_pred=np.array([i if i % 2 == 0 else 0]))
        rewards = env.send(action)
        assert (rewards.y == i).all()
    assert i == 99

    assert set(env.get_online_performance().keys()) == set(range(100))
    for i, (step, metric) in enumerate(env.get_online_performance().items()):
        assert step == i
        if step % 2 == 0:
            assert metric.accuracy == 1.0, (i, step, metric)
        else:
            assert metric.accuracy == 0.0, (i, step, metric)

    metrics = env.get_average_online_performance()
    assert isinstance(metrics, ClassificationMetrics)
    # Since we guessed the correct class only during the first 50 steps.
    assert metrics.accuracy == 0.5
    assert metrics.n_samples == 100


def test_last_batch():
    """Test what happens with the last batch, in the case where the batch size doesn't
    divide the dataset equally.
    """
    env = make_dummy_env(n_samples=110, batch_size=20)
    env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)

    for i, (obs, rew) in enumerate(env):
        assert rew is None
        if i != 5:
            assert obs.batch_size == 20, i
        else:
            assert obs.batch_size == 10, i
        actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size])
        rewards = env.send(actions)
        assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all()

    perf = env.get_average_online_performance()
    assert perf.accuracy == 1.0
    assert perf.n_samples == 110


from sequoia.methods.models.base_model import BaseModel


def test_last_batch_baseline_model():
    """BUG: Baseline method is doing something weird at the last batch, and I dont know quite why."""
    n_samples = 110
    batch_size = 20

    # Note: the y's here are different.
    dataset = TensorDataset(
        torch.arange(n_samples).reshape([n_samples, 1, 1, 1]) * torch.ones([n_samples, 3, 32, 32]),
        torch.zeros(n_samples, dtype=int),
    )
    pretend_to_be_active = False
    env = PassiveEnvironment(
        dataset,
        batch_size=batch_size,
        n_classes=n_samples,
        pretend_to_be_active=pretend_to_be_active,
    )
    env = TypedObjectsWrapper(
        env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
    )
    env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)

    # FIXME: Hacky setup: Should instead have a way of using a 'test' setting with a
    # configurable in-memory test dataset.
    setting = ClassIncrementalSetting()
    setting.train_env = env
    model = BaseModel(setting=setting, hparams=BaseModel.HParams(), config=Config(debug=True))

    for i, (obs, rew) in enumerate(env):
        obs = dataclasses.replace(
            obs, task_labels=torch.ones([obs.x.shape[0]], device=obs.x.device)
        )
        assert rew is None
        forward_pass = model.training_step((obs, rew), batch_idx=i)
        loss = model.training_step_end([forward_pass])
        print(loss)

    perf = env.get_average_online_performance()
    assert perf.n_samples == 110


@pytest.mark.parametrize("drop_last", [False, True])
def test_delayed_actions(drop_last: bool):
    """Test that whenever some intermediate between the env and the Method is
    caching some of the observations, the actions and rewards still end up lining up.

    This is just to replicate what's happening in Pytorch Lightning, where they use some
    function to check if the batch is the last one or not, and was causing issue before.
    """
    env = make_dummy_env(n_samples=110, batch_size=20, drop_last=drop_last)
    env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)
    i = 0

    for i, ((obs, rew), is_last) in enumerate(with_is_last(env)):
        print(i, obs.batch_size)
        assert rew is None
        if i != 5:
            assert obs.batch_size == 20, i
        else:
            assert obs.batch_size == 10, i
        actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size])
        rewards = env.send(actions)
        assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all()
    assert i == (4 if drop_last else 5)
    assert is_last

    for i, ((obs, rew), is_last) in enumerate(with_is_last(env)):
        print(i)
        # We get rewards now that we're outside of the first epoch.
        assert rew is not None
        if i < 5:
            assert obs.batch_size == 20, i
        else:
            assert obs.batch_size == 10, i

        # actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size])
        # rewards = env.send(actions)
        # assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all()
    assert i == 4 if drop_last else 5
    assert len(list(env)) == 5 if drop_last else 6
    assert len(list(with_is_last(env))) == 5 if drop_last else 6

    perf = env.get_average_online_performance()
    assert perf.accuracy == 1.0
    # BUG: The number of samples for the metrics isn't quite right, should include the
    # last batch, even if it doesn't have a 'full' batch.
    assert perf.n_samples == (100 if drop_last else 110)


================================================
FILE: sequoia/settings.puml
================================================
@startuml settings

!include gym.puml
!include pytorch_lightning.puml
' !include common.puml
'  TODO: there must be a better way to show only one thing from a
' package, without having to import all the package and then 
' remove everything but that one thing!
remove gym.spaces
remove Wrapper
' remove common

namespace torch {
    class DataLoader
    class Tensor
}


package settings {
    ' !include base/base.puml

    abstract class Setting extends SettingABC {
        ' 'root' setting.
        -- static (class) attributes --
        + {static} Observations: Type[Observations]    
        + {static} Actions: Type[Actions]
        + {static} Rewards: Type[Rewards]

        .. attributes ..

        + observation_space: Space 
        + action_space: Space 
        + reward_space: Space

        .. methods ..

        {abstract} + apply(Method): Results
    }
    
    package assumptions as settings.assumptions {
        package continual as settings.assumptions.continual {
            abstract class ContinualAssumption extends Setting {
            }
        }
        package incremental as settings.assumptions.incremental {
            abstract class IncrementalAssumption extends ContinualAssumption {
                + nb_tasks: int
                + task_labels_at_train_time: bool
                + task_labels_at_test_time: bool
                + {field} known_task_boundaries_at_train_time: bool = True (constant)
                + {field} known_task_boundaries_at_test_time: bool = True (constant)
                ' TODO: THis is actually a constant atm, even for ContinualRL
                ' doesn't have this set to 'true', since there is only one task,
                ' so there aren't an 'task boundaries' to speak of.
                + {field} smooth_task_boundaries: bool
                - _current_task_id: int
                + train_loop()
                + test_loop()

            }

            abstract class IncrementalObservations extends Observations {
                + task_labels: Optional[Tensor]
            }

            abstract class IncrementalResults extends Results {

            }
        }
        ' package task_incremental as settings.assumptions.task_incremental {
        '     abstract class TaskIncrementalAssumption extends IncrementalAssumption {
        '     }
        ' }

        ' package iid as settings.assumptions.iid {
        '     abstract class TraditionalSLSetting extends TaskIncrementalSLSetting {
        '     }
        ' }
    }

    package passive as settings.passive {
        class PassiveEnvironment implements Environment {}
        abstract class SLSetting extends Setting {
            {abstract} + train_dataloader(): PassiveEnvironment
            {abstract} + val_dataloader(): PassiveEnvironment
            {abstract} + test_dataloader(): PassiveEnvironment
            + dataset: str
            + available_datasets: dict
        }
        ' PassiveEnvironment extends DataLoader
        
        package cl as settings.passive.cl {
            class ClassIncrementalSetting implements SLSetting, IncrementalAssumption {
                {static} + Results: Type[Results] = IncrementalSLResults
                + nb_tasks: int
                + task_labels_at_train_time: bool = True
                + task_labels_at_test_time: bool = False
                + transforms: List[Transforms]
                + class_order: Optional[List[int]] = None
                + relabel: bool = False
            }

            class IncrementalSLResults implements IncrementalResults {}
            package domain_incremental as settings.passive.cl.domain_incremental {
                class DomainIncrementalSetting extends ClassIncrementalSetting {
                    + relabel: bool = True
                }

                 
            }

            package task_incremental as settings.passive.cl.task_incremental {
                class TaskIncrementalSLSetting extends ClassIncrementalSetting {
                    {field} + task_labels_at_train_time: bool = True (constant)
                    {field} + task_labels_at_test_time: bool = True (constant)
                }
                ' class TaskIncrementalResults extends IncrementalSLResults{}
               
                package multi_task as settings.passive.cl.task_incremental.multi_task {
                    class MultiTaskSetting extends TaskIncrementalSLSetting {
                    }
                }
            }
            package iid as settings.passive.cl.iid {
                class TraditionalSLSetting extends TaskIncrementalSLSetting, DomainIncrementalSetting {
                    {field} + nb_tasks: int = 1 (constant)
                }
                class IIDResults extends IncrementalSLResults{}
            }
        }
    }

    package active as settings.active {
        'note: This is currently called GymDataLoader in the repo.
        class ActiveEnvironment extends Environment {}
        abstract class RLSetting extends Setting {
            {abstract} + train_dataloader(): ActiveEnvironment
            {abstract} + val_dataloader(): ActiveEnvironment
            {abstract} + test_dataloader(): ActiveEnvironment
        }

        package continual as settings.active.continual {
            class ContinualRLSetting implements RLSetting, IncrementalAssumption {
                {static} + Results: Type[Results] = RLResults

                + dataset: str = "cartpole"
                + nb_tasks: int = 1
                + train_max_steps: int = 10000
                + max_episodes: Optional[int] = None
                + steps_per_task: Optional[int] = None
                + episodes_per_task: Optional[int] = None
                + test_steps_per_task: int = 1000
                + test_steps: Optional[int] = None

                + smooth_task_boundaries: bool = True
                
                + train_task_schedule: dict
                + val_task_schedule: dict
                + test_task_schedule: dict
                + task_noise_std: float

                + train_wrappers: List[gym.Wrapper]
                + valid_wrappers: List[gym.Wrapper]
                + test_wrappers: List[gym.Wrapper]

                + add_done_to_observations: bool = False
            }
            
            class RLResults implements IncrementalResults
            
            package incremental as settings.active.continual.incremental {
                class IncrementalRLSetting extends ContinualRLSetting {
                    + nb_tasks: int = 10
                    {field} + smooth_task_boundaries: bool = False (constant)
                    + task_labels_at_train_time: bool = True
                    + task_labels_at_test_time: bool = False
                }

                package task_incremental_rl as settings.active.incremental.task_incremental_rl {
                    class TaskIncrementalRLSetting extends IncrementalRLSetting {
                        {field} + task_labels_at_train_time: bool = True (constant)
                        {field} + task_labels_at_test_time: bool = True (constant)
                    }

                    package stationary as settings.active.incremental.task_incremental_rl.stationary {
                        class RLSetting extends TaskIncrementalRLSetting {
                            {field} + nb_tasks: int = 1 (constant)
                        }
                    }
                }
            }
        }
    }
}

IncrementalAssumption -left-> IncrementalResults : produces
IncrementalAssumption -down-> IncrementalObservations : envs yield
ClassIncrementalSetting -left-> IncrementalSLResults : produces
TaskIncrementalSLSetting -left-> TaskIncrementalResults : produces
TraditionalSLSetting -left-> IIDResults : produces

SLSetting --> PassiveEnvironment : uses
RLSetting -right-> ActiveEnvironment : uses
ContinualRLSetting -> RLResults : produces

@enduml


================================================
FILE: sequoia/utils/__init__.py
================================================
""" Miscelaneous utility functions. """
import sys

# from .generic_functions import *
from .generic_functions.singledispatchmethod import singledispatchmethod
from .logging_utils import get_logger
from .parseable import Parseable
from .serialization import Serializable
from .encode import encode

# from .utils import


================================================
FILE: sequoia/utils/categorical.py
================================================
from typing import Any, Iterable, Optional, Union

import torch
from torch import Tensor
from torch.distributions import Categorical as Categorical_


class Categorical(Categorical_):
    """Simple little addition to the `torch.distributions.Categorical`,
    allowing it to be 'split' into a sequence of distributions (to help with the
    splitting in the output
    heads)
    """

    def __init__(
        self,
        probs: Optional[Tensor] = None,
        logits: Optional[Tensor] = None,
        validate_args: bool = None,
    ):
        super().__init__(probs=probs, logits=logits, validate_args=validate_args)
        self._device: torch.device = probs.device if probs is not None else logits.device

    def __getitem__(self, index: Optional[int]) -> "Categorical":
        return Categorical(logits=self.logits[index])
        # return Categorical(probs=self.probs[index])

    def __iter__(self) -> Iterable["Categorical"]:
        for index in range(self.logits.shape[0]):
            yield self[index]

    def __add__(self, other: Union["Categorical_", Any]) -> "Categorical":
        # Idea:, how about we return a wrapped version of `self` whose
        # 'sample' returns self.sample() + `other`?
        return NotImplemented

    def __mul__(self, other: Union["Categorical_", Any]) -> "Categorical":
        # Idea: Idea, how about we return a wrapped version of `self` whose
        # 'sample' returns self.sample() * `other`?
        return NotImplemented

    @property
    def device(self) -> torch.device:
        """The device of the tensors of this distribution.

        @lebrice: Not sure why this isn't already part of torch.Distribution base-class.
        """
        return self._device

    def to(self, device: Union[str, torch.device]) -> "Categorical":
        """Moves this distribution to another device.

        @lebrice: Not sure why this isn't already part of torch.Distribution base-class.
        """
        return type(self)(logits=self.logits.to(device=device))


================================================
FILE: sequoia/utils/data_utils.py
================================================
import os
from pathlib import Path
from typing import Dict, Iterable, Iterator, Sized, Tuple

import numpy as np
import torch
from torch import Tensor, nn
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import CIFAR100, VisionDataset

from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


def train_valid_split(
    train_dataset: VisionDataset, valid_fraction: float = 0.2
) -> Tuple[VisionDataset, VisionDataset]:
    n = len(train_dataset)
    valid_len: int = int((n * valid_fraction))
    train_len: int = n - valid_len

    indices = np.arange(n, dtype=int)
    np.random.shuffle(indices)

    valid_indices = indices[:valid_len]
    train_indices = indices[valid_len:]
    train = Subset(train_dataset, train_indices)
    valid = Subset(train_dataset, valid_indices)
    logger.info(f"Training samples: {len(train)}, Valid samples: {len(valid)}")
    return train, valid


def unbatch(dataloader: Iterable[Tuple[Tensor, Tensor]]) -> Iterable[Tuple[Tensor, Tensor]]:
    """Unbatches a dataloader.
    NOTE: this is a generator for a single pass through the dataloader, not multiple.
    """
    for batch in dataloader:
        if isinstance(batch, tuple):
            yield from zip(*batch)
        else:
            yield from batch


class unlabeled(Iterable[Tuple[Tensor]], Sized):
    """Given a DataLoader, returns an Iterable that drops the labels."""

    def __init__(self, labeled_dataloader: DataLoader):
        self.loader = labeled_dataloader

    def __iter__(self) -> Iterator[Tuple[Tensor]]:
        for batch in self.loader:
            assert isinstance(batch, tuple)
            x = batch[0]
            yield x,

    def __len__(self) -> int:
        return len(self.loader)


def keep_in_memory(dataset: VisionDataset) -> None:
    """Converts the dataset's `data` and `targets` attributes to Tensors.

    This has the consequence of keeping the entire dataset in memory.
    """

    if hasattr(dataset, "data") and not isinstance(dataset.data, (np.ndarray, Tensor)):
        dataset.data = torch.as_tensor(dataset.data)
    if not isinstance(dataset.targets, (np.ndarray, Tensor)):
        dataset.targets = torch.as_tensor(dataset.targets)

    if isinstance(dataset, CIFAR100):
        # TODO: Cifar100 seems to want its 'data' to a numpy ndarray.
        dataset.data = np.asarray(dataset.data)


class FixChannels(nn.Module):
    """Transform that fixes the number of channels in input images.

    For instance, if the input shape is:
    [28, 28] -> [3, 28, 28] (copy the image three times)
    [1, 28, 28] -> [3, 28, 28] (same idea)
    [10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.)

    """

    def __call__(self, x: Tensor) -> Tensor:
        if x.ndim == 2:
            x = x.reshape([1, *x.shape])
            x = x.repeat(3, 1, 1)
        if x.ndim == 3 and x.shape[0] == 1:
            x = x.repeat(3, 1, 1)
        if x.ndim == 4 and x.shape[1] == 1:
            x = x.repeat(1, 3, 1, 1)
        return x


def get_imagenet_location() -> Path:
    from socket import gethostname

    hostname = gethostname()
    # For each hostname prefix, the location where the torchvision ImageNet dataset can be found.
    # TODO: Add the location for your own machine.
    imagenet_locations: Dict[str, Path] = {
        "mila": Path("/network/datasets/imagenet.var/imagenet_torchvision"),
        "": Path("/network/datasets/imagenet.var/imagenet_torchvision"),
    }
    for prefix, v in imagenet_locations.items():
        if hostname.startswith(prefix):
            return v
    if "IMAGENET_DIR" in os.environ:
        return Path(os.environ["IMAGENET_DIR"])
    raise RuntimeError(
        f"Could not find the ImageNet dataset on this machine with hostname "
        f"{hostname}. Known <prefix --> location> pairs: {imagenet_locations}"
    )


================================================
FILE: sequoia/utils/encode.py
================================================
""" Registers more datatypes to be used by the 'encode' function from
simple-parsing when serializing objects to json or yaml.
"""
import enum
import inspect
from pathlib import Path
from typing import Any, List, Type, Union

import numpy as np
import torch
from simple_parsing.helpers.serialization import encode, register_decoding_fn
from torch import Tensor, nn, optim

# Register functions for decoding Tensor and ndarray fields from json/yaml.
register_decoding_fn(Tensor, torch.as_tensor)
register_decoding_fn(np.ndarray, np.asarray)
register_decoding_fn(Type[nn.Module], lambda v: v)
register_decoding_fn(Type[optim.Optimizer], lambda v: v)

# NOTE: Uncomment this to enable logging tensors as-is when calling to_dict on a
# Serializable dataclass
@encode.register(Tensor)
def no_op_encode(value: Any):
    return value


# TODO: Look deeper into how things are pickled and moved by pytorch-lightning.
# Right now there is a warning by pytorch-lightning saying that some metrics
# will not be included in a checkpoint because they are lists instead of Tensors.
# This is because they got encoded with the function below when they shouldn't
# have.
# @encode.register(Tensor)
@encode.register(np.ndarray)
def encode_tensor(obj: Union[Tensor, np.ndarray]) -> List:
    return obj.tolist()


@encode.register
def encode_type(obj: type) -> List:
    if inspect.isclass(obj):
        return str(obj.__qualname__)
    elif inspect.isfunction(obj):
        return str(obj.__name__)
    return str(obj)


@encode.register
def encode_path(obj: Path) -> str:
    return str(obj)


@encode.register
def encode_device(obj: torch.device) -> str:
    return str(obj)


@encode.register
def encode_enum(value: enum.Enum):
    return value.value


================================================
FILE: sequoia/utils/generic_functions/__init__.py
================================================
""" Defines a bunch of single-dispatch generic functions, that are applicable
on structured objects, numpy arrays, tensors, spaces, etc.
"""
from ._namedtuple import NamedTuple, is_namedtuple
from .concatenate import concatenate
from .detach import detach
from .move import move
from .replace import replace
from .singledispatchmethod import singledispatchmethod
from .slicing import get_slice, set_slice
from .stack import stack
from .to_from_tensor import from_tensor, to_tensor


================================================
FILE: sequoia/utils/generic_functions/_namedtuple.py
================================================
""" Small 'patch' for the NamedTuple type, just so we can use
isinstance(obj, NamedTuple) and issubclass(some_class, NamedTuple) work
correctly.
"""
from inspect import isclass
from typing import Any, NamedTuple, Type


def is_namedtuple(obj: Any) -> bool:
    """Taken from https://stackoverflow.com/a/62692640/6388696"""
    return isinstance(obj, tuple) and hasattr(obj, "_asdict") and hasattr(obj, "_fields")


def is_namedtuple_type(obj: Type) -> bool:
    """Taken from https://stackoverflow.com/a/62692640/6388696"""
    return obj is NamedTuple or (
        isclass(obj)
        and issubclass(obj, tuple)
        and hasattr(obj, "_asdict")
        and hasattr(obj, "_fields")
    )


================================================
FILE: sequoia/utils/generic_functions/_namedtuple_test.py
================================================
from typing import NamedTuple

import pytest

from sequoia.utils.generic_functions._namedtuple import is_namedtuple, is_namedtuple_type


class DummyTuple(NamedTuple):
    a: int
    b: str


def test_is_namedtuple():
    bob = DummyTuple(1, "bob")
    assert is_namedtuple(bob)


def test_is_namedtuple_type():
    assert is_namedtuple_type(DummyTuple)
    assert is_namedtuple_type(NamedTuple)
    assert not is_namedtuple_type(tuple)
    assert not is_namedtuple_type(list)
    assert not is_namedtuple_type(dict)


@pytest.mark.xfail(reason="Not sure this is actually a good idea.")
def test_instance_check():
    bob = DummyTuple(1, "bob")
    assert isinstance(bob, DummyTuple)
    assert isinstance(bob, NamedTuple)
    assert isinstance(bob, tuple)


@pytest.mark.xfail(reason="Not sure this is actually a good idea.")
def test_instance_check():
    assert issubclass(DummyTuple, NamedTuple)
    assert issubclass(DummyTuple, tuple)
    assert issubclass(DummyTuple, DummyTuple)
    assert not issubclass(list, DummyTuple)
    assert not issubclass(tuple, DummyTuple)
    assert not issubclass(NamedTuple, DummyTuple)


================================================
FILE: sequoia/utils/generic_functions/concatenate.py
================================================
""" Generic function for concatenating ndarrays/tensors/distributions/Mappings
etc.

Extremely similar to `stack.py`, but concatenates along the described axis.
"""

from collections.abc import Mapping
from functools import singledispatch
from typing import Any, Dict, List, Sequence, TypeVar, Union

import numpy as np
import torch
from continuum import TaskSet
from continuum.tasks import concat as _continuum_concat
from torch import Tensor
from torch.utils.data import ChainDataset, ConcatDataset, Dataset, IterableDataset

from sequoia.utils.categorical import Categorical

T = TypeVar("T")


# @overload
# def concatenate(first_item: List[T], **kwargs) -> Sequence[T]:
#     ...

# @overload
# def concatenate(first_item: T, *others: T, **kwargs) -> Sequence[T]:
#     ...


@singledispatch
def concatenate(first_item: Union[T, List[T]], *others: T, **kwargs) -> Union[Sequence[T], Any]:
    # By default, if we don't know how to handle the item type, just
    # returns an ndarray with with all the items.

    if not others:
        # If this was called like concatenate(tensor_list), then we just split off
        # the list of items.
        assert isinstance(first_item, (list, tuple))
        if len(first_item) == 1:
            # Called like `concatenate([some_tensor])` -> returns `some_tensor`.
            return first_item[0]
        assert len(first_item) > 1
        items = first_item
        return concatenate(items[0], *items[1:], **kwargs)

    return np.asarray([first_item, *others], **kwargs)


@concatenate.register(type(None))
def _concatenate_ndarrays(first_item: None, *others: None, **kwargs) -> None:
    # NOTE: Concatenating a list of 'None' values will produce a single None output rather
    # than an ndarray of Nones.
    assert not any(other is not None for other in others)
    return None


@concatenate.register(np.ndarray)
def _concatenate_ndarrays(first_item: np.ndarray, *others: np.ndarray, **kwargs) -> np.ndarray:
    if not first_item.shape:
        # can't concatenate 0-dimensional arrays, so we stack them instead:
        return np.stack([first_item, *others], **kwargs)
    return np.concatenate([first_item, *others], **kwargs)


@concatenate.register(Tensor)
def _concatenate_tensors(first_item: Tensor, *others: Tensor, **kwargs) -> Tensor:
    if not first_item.shape:
        # can't concatenate 0-dimensional tensors, so we stack them instead.
        return torch.stack([first_item, *others], **kwargs)
    return torch.cat([first_item, *others], **kwargs)


@concatenate.register(Mapping)
def _concatenate_dicts(first_item: Dict, *others: Dict, **kwargs) -> Dict:
    return type(first_item)(
        **{
            key: concatenate(first_item[key], *(other[key] for other in others), **kwargs)
            for key in first_item.keys()
        }
    )


@concatenate.register(Categorical)
def _concatenate_distributions(
    first_item: Categorical, *others: Categorical, **kwargs
) -> Categorical:
    return Categorical(
        logits=torch.cat([first_item.logits, *(other.logits for other in others)], *kwargs)
    )


@concatenate.register
def _concatenate_tasksets(first_item: TaskSet, *others: TaskSet) -> TaskSet:
    return _continuum_concat([first_item, *others])


@concatenate.register(Dataset)
def _concatenate_datasets(first_item: Dataset[T], *others: Dataset[T]) -> ConcatDataset[T]:
    return ConcatDataset([first_item, *others])


@concatenate.register
def _concatenate_iterable_datasets(
    first_item: IterableDataset, *others: IterableDataset
) -> ChainDataset:
    return ChainDataset([first_item, *others])


================================================
FILE: sequoia/utils/generic_functions/detach.py
================================================
from collections.abc import Mapping
from functools import singledispatch
from typing import Any, Dict, Sequence, TypeVar

import numpy as np

from sequoia.utils.generic_functions._namedtuple import is_namedtuple

from ..categorical import Categorical

T = TypeVar("T")


@singledispatch
def detach(value: T) -> T:
    """Detaches a value when possible, else returns the value unchanged."""
    if hasattr(value, "detach") and callable(value.detach):
        return value.detach()
    raise NotImplementedError(f"Don't know how to detach value {value}!")
    # else:
    #     return value


@detach.register(np.ndarray)
@detach.register(type(None))
@detach.register(str)
@detach.register(int)
@detach.register(bool)
@detach.register(float)
def no_op_detach(v: Any) -> Any:
    return v


@detach.register(list)
@detach.register(tuple)
@detach.register(set)
def _detach_sequence(x: Sequence[T]) -> Sequence[T]:
    if is_namedtuple(x):
        return type(x)(*[detach(v) for v in x])
    return type(x)(detach(v) for v in x)


@detach.register(Mapping)
def _detach_dict(d: Dict[str, Any]) -> Dict[str, Any]:
    """Detaches all the keys and tensors in a dict, as well as all nested dicts."""
    return type(d)(**{detach(k): detach(v) for k, v in d.items()})


@detach.register
def _detach_categorical(v: Categorical) -> Categorical:
    return type(v)(logits=v.logits.detach())


================================================
FILE: sequoia/utils/generic_functions/move.py
================================================
"""Defines a singledispatch function to move objects to a given device.
"""
from functools import singledispatch
from typing import Dict, Sequence, TypeVar, Union

import torch

from sequoia.utils.generic_functions._namedtuple import is_namedtuple

T = TypeVar("T")
K = TypeVar("K")
V = TypeVar("V")


@singledispatch
def move(x: T, device: Union[str, torch.device]) -> T:
    """Moves x to the specified device if possible, else returns x unchanged.
    NOTE: This works for Tensors or any collection of Tensors.
    """
    if hasattr(x, "to") and callable(x.to) and device:
        return x.to(device=device)
    return x


@move.register(dict)
def move_dict(x: Dict[K, V], device: Union[str, torch.device]) -> Dict[K, V]:
    return type(x)(**{move(k, device): move(v, device) for k, v in x.items()})


@move.register(list)
@move.register(tuple)
@move.register(set)
def move_sequence(x: Sequence[T], device: Union[str, torch.device]) -> Sequence[T]:
    if is_namedtuple(x):
        return type(x)(*[move(v, device) for v in x])
    return type(x)(move(v, device) for v in x)


================================================
FILE: sequoia/utils/generic_functions/replace.py
================================================
""" Generic function for replacing items in an object. """

import dataclasses
from collections.abc import Sequence
from functools import singledispatch
from typing import Dict, Tuple, TypeVar

from gym import spaces

from sequoia.utils.generic_functions._namedtuple import is_namedtuple

T = TypeVar("T")


class Dataclass(type):
    """Used so we can do `isinstance(obj, Dataclass)`, or maybe even
    register dataclass handlers for singledispatch generic functions.
    """

    def __instancecheck__(self, instance) -> bool:
        # Return true if instance should be considered a (direct or indirect)
        # instance of class. If defined, called to implement
        # isinstance(instance, class).
        return dataclasses.is_dataclass(instance)

    def __subclasscheck__(self, subclass) -> bool:
        # Return true if subclass should be considered a (direct or indirect)
        # subclass of class. If defined, called to implement
        # issubclass(subclass, class).
        return dataclasses.is_dataclass(subclass)


@singledispatch
def replace(obj: T, **items) -> T:
    """Replaces the value at `key` in `obj` with `new_value`. Returns the
    modified object, either in-place (same instance as obj) or new.
    """
    raise NotImplementedError(
        f"TODO: Don't know how to set items '{items}' in obj {obj}, "
        f"(no handler registered for objects of type {obj}."
    )


@replace.register(Dataclass)
def _replace_dataclass_attribute(obj: Dataclass, **items) -> Dataclass:
    assert dataclasses.is_dataclass(obj)
    return dataclasses.replace(obj, **items)


@replace.register(dict)
def _replace_dict_item(obj: Dict, **items) -> Dict:
    assert isinstance(obj, dict)
    assert all(
        key in obj for key in items
    ), "replace should only be used to replace items, not to add new ones."
    new_obj = obj.copy()
    new_obj.update(items)
    return new_obj


@replace.register(list)
@replace.register(tuple)
def _replace_sequence_items(obj: Sequence, **items) -> Tuple:
    if is_namedtuple(obj):
        return obj._replace(**items)
    return type(obj)(items[i] if i in items else val for i, val in enumerate(obj))


@replace.register
def _replace_dict_items(obj: spaces.Dict, **items) -> Dict:
    """Handler for Dict spaces."""
    return type(obj)(replace(obj.spaces, **items))


================================================
FILE: sequoia/utils/generic_functions/replace_test.py
================================================
""" Tests for the `replace` generic function. """


================================================
FILE: sequoia/utils/generic_functions/singledispatchmethod.py
================================================
""" Little 'patch' that imports a backport of 'singledispatchmethod', if the
python version is < 3.8.
"""
import sys

if sys.version_info >= (3, 8):
    from functools import singledispatchmethod  # type: ignore
else:
    try:
        pass
    except ImportError as e:
        print(f"Couldn't import singledispatchmethod: {e}")
        print(
            "Since you're running python version below 3.8, you need to "
            "install the backport for singledispatchmethod (which was added "
            "to functools in python 3.8), using the following command:\n"
            "> pip install singledispatchmethod"
        )
        exit()


================================================
FILE: sequoia/utils/generic_functions/slicing.py
================================================
""" Extendable utility functions for getting and settings slices of arbitrarily
nested objects.

"""
from functools import singledispatch
from typing import Any, Dict, Sequence, Tuple, TypeVar

import numpy as np
from torch import Tensor

from ._namedtuple import is_namedtuple

K = TypeVar("K")
V = TypeVar("V")
T = TypeVar("T")


@singledispatch
def get_slice(value: T, indices: Sequence[int]) -> T:
    """Returns a slices of `value` at the given indices."""
    if value is None:
        return None
    return value[indices]


@get_slice.register(dict)
def _get_dict_slice(value: Dict[K, V], indices: Sequence[int]) -> Dict[K, V]:
    return type(value)((k, get_slice(v, indices)) for k, v in value.items())


@get_slice.register(tuple)
def _get_tuple_slice(value: Tuple[T, ...], indices: Sequence[int]) -> Tuple[T, ...]:
    # NOTE: we use type(value)( ... ) to create the output dicts or tuples, in
    # case a subclass of tuple or dict is being used (e.g. NamedTuples).
    if is_namedtuple(value):
        return type(value)(*[get_slice(v, indices) for v in value])
    return type(value)([get_slice(v, indices) for v in value])


@singledispatch
def set_slice(target: Any, indices: Sequence[int], values: Sequence[Any]) -> None:
    """Sets `values` at positions `indices` in `target`.

    Modifies the `target` in-place.
    """
    target[indices] = values


from sequoia.utils.categorical import Categorical


@set_slice.register
def _set_slice_categorical(
    target: Categorical, indices: Sequence[int], values: Sequence[Any]
) -> None:
    target.logits[indices] = values.logits


@set_slice.register(np.ndarray)
def _set_slice_ndarray(target: np.ndarray, indices: Sequence[int], values: Sequence[Any]) -> None:
    if isinstance(indices, Tensor):
        indices = indices.cpu().numpy()
    if isinstance(values, Tensor):
        values = values.cpu().numpy()
    target[indices] = values


@set_slice.register(Tensor)
def _set_slice_ndarray(target: Tensor, indices: Sequence[int], values: Sequence[Any]) -> None:
    target[indices] = values


@set_slice.register(dict)
def _set_dict_slice(
    target: Dict[K, Sequence[V]], indices: Sequence[int], values: Dict[K, Sequence[V]]
) -> None:
    for key, target_values in target.items():
        set_slice(target_values, indices, values[key])


@set_slice.register(tuple)
def _set_tuple_slice(target: Tuple[T, ...], indices: Sequence[int], values: Tuple[T, ...]) -> None:
    assert isinstance(values, tuple)
    assert len(target) == len(values)
    for target_item, values_item in zip(target, values):
        set_slice(target_item, indices, values_item)


================================================
FILE: sequoia/utils/generic_functions/slicing_test.py
================================================
from typing import NamedTuple

import numpy as np
import pytest

from .slicing import get_slice, set_slice


class DummyTuple(NamedTuple):
    a: np.ndarray
    b: np.ndarray


@pytest.mark.parametrize(
    "source, indices, expected",
    [
        (np.arange(10), np.arange(5), np.arange(5)),
        (
            {"a": np.arange(10), "b": np.arange(10)},
            np.arange(5),
            {"a": np.arange(5), "b": np.arange(5)},
        ),
        (({"a": np.arange(10)}, np.arange(10) + 5), 3, ({"a": 3}, 8)),
        (  # Test with namedtuples.
            {
                "a": np.array([0, 1, 2]),
                "b": DummyTuple(a=np.zeros([3, 4]), b=np.ones([5, 4])),
            },
            np.arange(2),
            {"a": np.array([0, 1]), "b": DummyTuple(a=np.zeros([2, 4]), b=np.ones([2, 4]))},
        ),
    ],
)
def test_get_slice(source, indices, expected):
    assert str(get_slice(source, indices)) == str(expected)


@pytest.mark.parametrize(
    "target, indices, values, result",
    [
        (
            np.arange(10, dtype=float),
            np.arange(5),
            np.zeros(5),
            np.concatenate([np.zeros(5), np.arange(5) + 5.0]),
        ),
        (
            {"a": np.arange(10, dtype=float), "b": np.zeros(10)},
            np.arange(10),
            {"a": np.ones(10), "b": np.ones(10)},
            {"a": np.ones(10), "b": np.ones(10)},
        ),
        (
            ({"a": np.arange(10)}, np.arange(10) + 5),
            0,
            ({"a": 3}, 8),
            (
                {"a": np.concatenate([np.array([3]), 1 + np.arange(9)])},
                np.concatenate([np.array([8]), 6 + np.arange(9)]),
            ),
        ),
        (  # Test with NamedTuples.
            {
                "a": np.array([0, 1, 2]),
                "b": DummyTuple(a=np.zeros(5), b=np.ones(5)),
            },
            np.arange(2),
            {"a": np.array([5, 7]), "b": DummyTuple(a=np.ones(2), b=np.zeros(2))},
            {
                "a": np.array([5, 7, 2]),
                "b": DummyTuple(
                    a=np.array([1.0, 1.0, 0.0, 0.0, 0.0]), b=np.array([0.0, 0.0, 1.0, 1.0, 1.0])
                ),
            },
        ),
    ],
)
def test_set_slice(target, indices, values, result):
    set_slice(target, indices, values)
    assert str(target) == str(result)


@pytest.mark.xfail(
    reason="Removed the 'concatenate' generic function, since "
    "there wasn't really a use for it anywhere."
)
@pytest.mark.parametrize(
    "a, b, kwargs, expected",
    [
        (np.array([0, 1, 2]), np.array([3, 4, 5, 6]), {}, np.arange(7)),
        (
            {
                "a": np.array([0, 1, 2]),
                "b": DummyTuple(a=np.zeros(3), b=np.ones(3)),
            },
            {
                "a": np.array([3, 4, 5]),
                "b": DummyTuple(a=np.zeros(4), b=np.ones(4)),
            },
            {},
            {
                "a": np.array([0, 1, 2, 3, 4, 5]),
                "b": DummyTuple(a=np.zeros(7), b=np.ones(7)),
            },
        ),
        (
            {
                "a": np.array([[0], [1], [2]]),  # [3, 1]
                "b": DummyTuple(a=np.zeros([1, 4]), b=np.ones([1, 4])),
            },
            {
                "a": np.array([[3], [4], [5], [6]]),  # shape [4, 1]
                "b": DummyTuple(a=np.zeros([2, 4]), b=np.ones([3, 4])),
            },
            {"axis": 0},
            {
                "a": np.array([[0], [1], [2], [3], [4], [5], [6]]),
                "b": DummyTuple(a=np.zeros([3, 4]), b=np.ones([4, 4])),
            },
        ),
    ],
)
def test_concat(a, b, kwargs, expected):
    from .slicing import concatenate

    assert str(concatenate(a, b, **kwargs)) == str(expected)


================================================
FILE: sequoia/utils/generic_functions/stack.py
================================================
""" Generic function for concatenating ndarrays/tensors/distributions/Mappings
etc.
"""
from collections.abc import Mapping
from functools import singledispatch
from typing import Any, Dict, List, TypeVar, Union

import numpy as np
import torch
from torch import Tensor

from sequoia.utils.categorical import Categorical

T = TypeVar("T")


# @overload
# def stack(first_item: List[T]) -> Sequence[T]:
#     ...

# @overload
# def stack(first_item: T, *others: T) -> Sequence[T]:
#     ...


@singledispatch
def stack(first_item: Union[T, List[T]], *others: T, **kwargs) -> Any:
    # By default, if we don't know how to handle the item type, just
    # return an ndarray with with all the items.
    # note: We could also try to return a tensor, rather than an ndarray
    # but I'd rather keep it simple for now.
    if not others:
        # If this was called like stack(tensor_list), then we just split off
        # the list of items.
        if first_item is None:
            # Stacking a list of 'None' items returns None.
            return None
        assert isinstance(first_item, (list, tuple)), first_item
        # assert len(first_item) > 1, first_item
        items = first_item
        return stack(items[0], *items[1:], **kwargs)
    np_stack_kwargs = kwargs.copy()
    if "dim" in np_stack_kwargs:
        np_stack_kwargs["axis"] = np_stack_kwargs.pop("dim")
    return np.stack([first_item, *others], **np_stack_kwargs)


@stack.register(type(None))
def _stack_none(first_item: None, *others: None, **kwargs) -> Union[None, np.ndarray]:
    # TODO: Should we return an ndarray with 'None' entries, of dtype np.object_? or
    # just a single None?
    # Opting for a single None for now, as it's easier to work with. (`v is None` works)
    if all(v is None for v in others):
        return None
    return np.array([first_item, *others])
    # if not others:
    #     return None
    # return np.array([None, *others])


@stack.register(np.ndarray)
def _stack_ndarrays(first_item: np.ndarray, *others: np.ndarray, **kwargs) -> np.ndarray:
    return np.stack([first_item, *others], **kwargs)


@stack.register(Tensor)
def _stack_tensors(first_item: Tensor, *others: Tensor, **kwargs) -> Tensor:
    return torch.stack([first_item, *others], **kwargs)


@stack.register(Mapping)
def _stack_dicts(first_item: Dict, *others: Dict, **kwargs) -> Dict:
    return type(first_item)(
        **{
            key: stack(first_item[key], *(other[key] for other in others), **kwargs)
            for key in first_item.keys()
        }
    )


@stack.register(Categorical)
def _stack_distributions(first_item: Categorical, *others: Categorical, **kwargs) -> Categorical:
    return Categorical(
        logits=torch.stack([first_item.logits, *(other.logits for other in others)], **kwargs)
    )


================================================
FILE: sequoia/utils/generic_functions/to_from_tensor.py
================================================
from functools import singledispatch
from typing import Any, Dict, Mapping, Optional, Tuple, TypeVar, Union

import numpy as np
import torch
from gym import Space, spaces
from torch import Tensor

T = TypeVar("T")


@singledispatch
def from_tensor(space: Space, sample: Union[Tensor, Any]) -> Union[np.ndarray, Any]:
    """Converts a Tensor into a sample from the given space."""
    if isinstance(sample, Tensor):
        return sample.cpu().numpy()
    return sample


@from_tensor.register
def _(space: spaces.Discrete, sample: Tensor) -> int:
    if isinstance(sample, Tensor):
        v = sample.item()
        int_v = int(v)
        if int_v != v:
            raise ValueError(f"Value {sample} isn't an integer, so it can't be from space {space}!")
        return int_v
    elif isinstance(sample, np.ndarray):
        assert sample.size == 1, sample
        return int(sample)
    return sample


@from_tensor.register
def _(
    space: spaces.Dict, sample: Dict[str, Union[Tensor, Any]]
) -> Dict[str, Union[np.ndarray, Any]]:
    return {key: from_tensor(space[key], value) for key, value in sample.items()}


from sequoia.utils.generic_functions._namedtuple import is_namedtuple


@from_tensor.register
def _(space: spaces.Tuple, sample: Tuple[Union[Tensor, Any]]) -> Tuple[Union[np.ndarray, Any]]:
    if not isinstance(sample, tuple):
        # BUG: Sometimes instead of having a sample of Tuple(Discrete(2))
        # be `(1,)`, its `array([1])` instead.
        sample = tuple(sample)
    values_gen = (from_tensor(space[i], value) for i, value in enumerate(sample))
    if is_namedtuple(sample):
        return type(sample)(*values_gen)
    return tuple(values_gen)


@singledispatch
def to_tensor(
    space: Space, sample: Union[np.ndarray, Any], device: torch.device = None
) -> Union[np.ndarray, Any]:
    """Converts a sample from the given space into a Tensor."""
    if sample is None:
        return sample
    return torch.as_tensor(sample, device=device)


@to_tensor.register
def _(
    space: spaces.MultiBinary, sample: np.ndarray, device: torch.device = None
) -> Dict[str, Union[Tensor, Any]]:
    return torch.as_tensor(sample, device=device, dtype=torch.bool)


@to_tensor.register
def _(
    space: spaces.Tuple,
    sample: Tuple[Union[np.ndarray, Any], ...],
    device: torch.device = None,
) -> Tuple[Union[Tensor, Any], ...]:
    if sample is None:
        assert all(isinstance(item_space, Sparse) for item_space in space.spaces)
        assert all(item_space.sparsity == 1.0 for item_space in space.spaces)
        # todo: What to do in this context?
        return None
        return np.full(
            [
                len(space.spaces),
            ],
            fill_value=None,
            dtype=np.object_,
        )
    if any(v is None for v in sample):
        assert False, (space, sample, device)
    return tuple(to_tensor(subspace, sample[i], device) for i, subspace in enumerate(space.spaces))


from typing import NamedTuple

from sequoia.common.spaces.named_tuple import NamedTupleSpace


@to_tensor.register
def _(space: NamedTupleSpace, sample: NamedTuple, device: torch.device = None):
    return space.dtype(
        **{
            key: to_tensor(space[i], sample[i], device=device)
            for i, key in enumerate(space._spaces.keys())
        }
    )


from sequoia.common.spaces.sparse import Sparse


@to_tensor.register(Sparse)
def sparse_sample_to_tensor(
    space: Sparse, sample: Union[Optional[Any], np.ndarray], device: torch.device = None
) -> Optional[Union[Tensor, np.ndarray]]:
    if space.sparsity == 1.0:
        if isinstance(space.base, spaces.MultiDiscrete):
            assert all(v == None for v in sample)
            return np.array([None if v == None else v for v in sample])
        if sample is not None:
            assert isinstance(sample, np.ndarray) and sample.dtype == np.object
            assert not sample.shape
        return None
    if space.sparsity == 0.0:
        # Do we need to convert dtypes here though?
        return to_tensor(space.base, sample, device)
    # 0 < sparsity < 1
    if isinstance(sample, np.ndarray) and sample.dtype == np.object:
        return np.array([None if v == None else v for v in sample])

    assert False, (space, sample)


================================================
FILE: sequoia/utils/logging_utils.py
================================================
import inspect
import logging
from functools import wraps
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, TypeVar, Union

import torch.multiprocessing as mp
import tqdm
from torch import Tensor

from sequoia.utils.utils import unique_consecutive

logging.basicConfig(
    format="%(asctime)s,%(msecs)d %(levelname)-8s [%(name)s:%(lineno)d] %(message)s",
    datefmt="%Y-%m-%d:%H:%M:%S",
    level=logging.INFO,
)
logging.getLogger("simple_parsing").setLevel(logging.ERROR)
root_logger = logging.getLogger("")
T = TypeVar("T")


def pbar(dataloader: Iterable[T], description: str = "", *args, **kwargs) -> Iterable[T]:
    kwargs.setdefault("dynamic_ncols", True)
    pbar = tqdm.tqdm(dataloader, *args, **kwargs)
    if description:
        pbar.set_description(description)
    return pbar


def get_logger(name: str, level: int = None) -> logging.Logger:
    """Gets a logger for the given file. Sets a nice default format.
    TODO: figure out if we should add handlers, etc.
    """
    name_is_path: bool = False
    try:
        p = Path(name)
        if p.exists():
            name = str(p.absolute().relative_to(Path.cwd()).as_posix())
            name_is_path = True
    except:
        pass
    from sys import argv

    logger = root_logger.getChild(name)

    debug_flags: List[str] = ["-d", "--debug", "-vv", "-vvv" "--verbose"]

    if level is None and any(v in argv for v in debug_flags):
        level = logging.DEBUG
    if level is None:
        level = logging.INFO
    logger.setLevel(level)

    # if the name is already something like foo.py:256
    # if not name_is_path and name[-1].isdigit():
    #     formatter = logging.Formatter('%(asctime)s, %(levelname)-8s log [%(name)s] %(message)s')
    # sh = logging.StreamHandler(sys.stdout)
    # sh.setFormatter(formatter)
    # sh.setLevel(level)
    # logger.addHandler(sh)
    # logger = logging.getLogger(name)
    # tqdm_handler = TqdmLoggingHandler()
    # tqdm_handler.setLevel(level)
    # logger.addHandler(tqdm_handler)
    return logger


def log_calls(function: Callable, level=logging.INFO) -> Callable:
    """Decorates a function and logs the calls to it and the passed args."""

    callerframerecord = inspect.stack()[1]  # 0 represents this line
    # 1 represents line at caller
    frame = callerframerecord[0]
    info = inspect.getframeinfo(frame)

    p = Path(info.filename)
    name = str(p.absolute().relative_to(Path.cwd()).as_posix())
    logger = get_logger(f"{name}:{info.lineno}")

    @wraps(function)
    def _wrapped(*args, **kwargs):
        process_name = mp.current_process().name
        logger.log(
            level,
            (
                f"Process {process_name} called {function.__name__} with "
                f"args={args} and kwargs={kwargs}."
            ),
        )
        return function(*args, **kwargs)

    return _wrapped


def get_new_file(file: Path) -> Path:
    """Creates a new file, adding _{i} suffixes until the file doesn't exist.

    Args:
        file (Path): A path.

    Returns:
        Path: a path that is new. Might have a new _{i} suffix.
    """
    if not file.exists():
        return file
    else:
        i = 0
        file_i = file.with_name(file.stem + f"_{i}" + file.suffix)
        while file_i.exists():
            i += 1
            file_i = file.with_name(file.stem + f"_{i}" + file.suffix)
        file = file_i
    return file


def cleanup(
    message: Dict[str, Union[Dict, str, float, Any]],
    sep: str = "/",
    keys_to_remove: List[str] = None,
) -> Dict[str, Union[float, Tensor]]:
    """Cleanup a message dict before it is logged to wandb.

    TODO: Describe what this does in more detail.

    Args:
        message (Dict[str, Union[Dict, str, float, Any]]): [description]
        sep (str, optional): [description]. Defaults to "/".

    Returns:
        Dict[str, Union[float, Tensor]]: Cleaned up dict.
    """
    # Flatten the log dictionary
    from sequoia.utils.utils import flatten_dict

    message = flatten_dict(message, separator=sep)

    keys_to_remove = keys_to_remove or []

    for k in list(message.keys()):
        if any(flag in k for flag in keys_to_remove):
            message.pop(k)
            continue

        v = message.pop(k)
        # Example input:
        # "Task_losses/Task1/losses/Test/losses/rotate/losses/270/metrics/270/accuracy"
        # Simplify the key, by getting rid of all the '/losses/' and '/metrics/' etc.
        things_to_remove: List[str] = [f"{sep}losses{sep}", f"{sep}metrics{sep}"]
        for thing in things_to_remove:
            while thing in k:
                k = k.replace(thing, sep)
        # --> "Task_losses/Task1/Test/rotate/270/270/accuracy"

        # Get rid of repetitive modifiers (ex: "/270/270" above)
        parts = k.split(sep)
        parts = [s for s in parts if not s.isspace()]
        k = sep.join(unique_consecutive(parts))
        # Will become:
        # "Task_losses/Task1/Test/rotate/270/accuracy"
        message[k] = v
    return message


class TqdmLoggingHandler(logging.Handler):
    def __init__(self, level=logging.NOTSET):
        super().__init__(level)

    def emit(self, record):
        try:
            msg = self.format(record)
            tqdm.tqdm.write(msg)
            self.flush()
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            self.handleError(record)


================================================
FILE: sequoia/utils/module_dict.py
================================================
""" Typed wrapper around `nn.ModuleDict`, just that just adds a get method. """
from typing import Any, MutableMapping, TypeVar, Union

from torch import nn

M = TypeVar("M", bound=nn.Module)
T = TypeVar("T")


class ModuleDict(nn.ModuleDict, MutableMapping[str, M]):
    def get(self, key: str, default: Any = None) -> Union[M, Any]:
        """Returns the module at `self[key]` if present, else `default`.

        Args:
            key (str): a key.
            default (Union[M, nn.Module], optional): Default value to return.
                Defaults to None.

        Returns:
            Union[Optional[nn.Module], Optional[M]]: The nn.Module at that key.
        """
        return self[key] if key in self else default


================================================
FILE: sequoia/utils/parseable.py
================================================
import dataclasses
import shlex
import sys
from argparse import Namespace
from dataclasses import is_dataclass
from typing import List, Optional, Tuple, Type, TypeVar, Union

from pytorch_lightning import LightningDataModule
from simple_parsing import ArgumentParser

from sequoia.utils.utils import camel_case

from .logging_utils import get_logger

logger = get_logger(__name__)
P = TypeVar("P", bound="Parseable")


class Parseable:
    _argv: Optional[List[str]] = None

    @classmethod
    def add_argparse_args(cls, parser: ArgumentParser) -> None:
        """Add the command-line arguments for this class to the given parser.

        Override this if you don't use simple-parsing to add the args.

        Parameters
        ----------
        parser : ArgumentParser
            The ArgumentParser.
        """
        if is_dataclass(cls):
            dest = camel_case(cls.__qualname__)
            parser.add_arguments(cls, dest=dest)
        elif issubclass(cls, LightningDataModule):
            # TODO: Test this case out (using a LightningDataModule as a Setting).
            super().add_argparse_args(parser)  # type: ignore
        else:
            raise NotImplementedError(
                f"Don't know how to add command-line arguments for class "
                f"{cls}, since it isn't a dataclass and doesn't override the "
                f"`add_argparse_args` method!\n"
                f"Either make class {cls} a dataclass and add command-line "
                f"arguments as fields, or add an implementation for the "
                f"`add_argparse_args` and `from_argparse_args` classmethods."
            )

    @classmethod
    def from_argparse_args(cls: Type[P], args: Namespace) -> P:
        """Extract the parsed command-line arguments from the namespace and
        return an instance of class `cls`.

        Override this if you don't use simple-parsing.

        Parameters
        ----------
        args : Namespace
            The namespace containing all the parsed command-line arguments.
        dest : str, optional
            The , by default None

        Returns
        -------
        cls
            An instance of the class `cls`.
        """
        if is_dataclass(cls):
            dest = camel_case(cls.__qualname__)
            return getattr(args, dest)

        # if issubclass(cls, LightningDataModule):
        #     # TODO: Test this case out (using a LightningDataModule as a Setting).
        #     return super()._from_argparse_args(args)  # type: ignore

        raise NotImplementedError(
            f"Don't know how to extract the command-line arguments for class "
            f"{cls} from the namespace, since {cls} isn't a dataclass and "
            f"doesn't override the `from_argparse_args` classmethod."
        )

    @classmethod
    def from_args(
        cls: Type[P], argv: Union[str, List[str]] = None, reorder: bool = True, strict: bool = True
    ) -> P:
        """Parse an instance of this class from the command-line args.

        Parameters
        ----------
        cls : Type[P]
            The class to instantiate. This only supports dataclasses by default.
            For other classes, you'll have to implement this method yourself.
        argv : Union[str, List[str]], optional
            The command-line string or list of string arguments in the style of
            sys.argv. Could also be the unused_args returned by
            .from_known_args(), for example. By default None
        reorder : bool, optional
            Wether to attempt to re-order positional arguments. Only really
            useful when using subparser actions. By default True.
        strict : bool, optional
            Wether to raise an error if there are extra arguments. By default
            False

            TODO: Might be a good idea to actually change this default to 'True'
            to avoid potential subtle bugs in various places. This would however
            make the code slightly more difficult to read, since we'd have to
            pass some unused_args around. Also might be a problem when the same
            argument e.g. batch_size (at some point) is in both the Setting and
            the Method, because then the arg would be 'consumed', and not passed
            to the second parser in the chain.

        Returns
        -------
        P
            The parsed instance of this class.

        Raises
        ------
        NotImplementedError
            [description]
        """
        # if not is_dataclass(cls):
        #     raise NotImplementedError(
        #         f"Don't know how to create an instance of class {cls} from the "
        #         f"command-line, as it isn't a dataclass. You'll have to "
        #         f"override the `from_args` or `from_known_args` classmethods."
        #     )
        if isinstance(argv, str):
            argv = shlex.split(argv)
        instance, unused_args = cls.from_known_args(
            argv=argv,
            reorder=reorder,
            strict=strict,
        )
        assert not (strict and unused_args), "an error should have been raised"
        return instance

    @classmethod
    def from_known_args(
        cls, argv: Union[str, List[str]] = None, reorder: bool = True, strict: bool = False
    ) -> Tuple[P, List[str]]:
        # if not is_dataclass(cls):
        #     raise NotImplementedError(
        #         f"Don't know how to parse an instance of class {cls} from the "
        #         f"command-line, as it isn't a dataclass or doesn't have the "
        #         f"`add_arpargse_args` and `from_argparse_args` classmethods. "
        #         f"You'll have to override the `from_known_args` classmethod."
        #     )

        if argv is None:
            argv = sys.argv[1:]
        logger.debug(f"parsing an instance of class {cls} from argv {argv}")
        if isinstance(argv, str):
            argv = shlex.split(argv)

        parser = ArgumentParser(description=cls.__doc__, add_dest_to_option_strings=False)
        cls.add_argparse_args(parser)
        # TODO: Set temporarily on the class, so its accessible in the class constructor
        cls_argv = cls._argv
        cls._argv = argv

        instance: P
        if strict:
            args = parser.parse_args(argv)
            unused_args = []
        else:
            args, unused_args = parser.parse_known_args(argv, attempt_to_reorder=reorder)
            if unused_args:
                logger.debug(
                    RuntimeWarning(f"Unknown/unused args when parsing class {cls}: {unused_args}")
                )
        instance = cls.from_argparse_args(args)
        # Save the argv that were used to create the instance on its `_argv`
        # attribute.
        instance._argv = argv
        cls._argv = cls_argv
        return instance, unused_args

    def upgrade(self, target_type: Type[P]) -> P:
        """Upgrades the hparams `self` to the given `target_type`, filling in
        any missing values by parsing them from the command-line.

        If `self` was created from the command-line, then the same argv that
        were used to create `self` will be used to create the new object.

        Returns
        -------
        type(self).HParams
            Hparams of the type `self.HParams`, with the original values
            preserved and any new values parsed from the command-line.
        """
        # NOTE: This (getting the wrong hparams class) could happen for
        # instance when parsing a BaseMethod from the command-line, the
        # default type of hparams on the method is BaseModel.HParams,
        # whose `output_head` field doesn't have the right type exactly.
        current_type = type(self)
        current_hparams = dataclasses.asdict(self)
        # NOTE: If a value is not at its current default, keep it.
        default_hparams = target_type()
        missing_fields = [
            f.name
            for f in dataclasses.fields(target_type)
            if f.name not in current_hparams
            or current_hparams[f.name] == getattr(current_type(), f.name, None)
            or current_hparams[f.name] == getattr(default_hparams, f.name)
        ]
        logger.warning(
            RuntimeWarning(
                f"Upgrading the hparams from type {current_type} to "
                f"type {target_type}. This will try to fetch the values for "
                f"the missing fields {missing_fields} from the command-line. "
            )
        )
        # Get the missing values

        if self._argv:
            return target_type.from_args(argv=self._argv, strict=False)
        hparams = target_type.from_args(argv=self._argv, strict=False)
        for missing_field in missing_fields:
            current_hparams[missing_field] = getattr(hparams, missing_field)
        return target_type(**current_hparams)

    # @classmethod
    # def fields(cls) -> Dict[str, Field]:
    #     return {f.name: f for f in dataclasses.fields(cls)}


================================================
FILE: sequoia/utils/plotting.py
================================================
from dataclasses import dataclass
from typing import List

import matplotlib.pyplot as plt


def autolabel(axis, rects: List[plt.Rectangle], bar_height_scale: float = 1.0):
    """Attach a text label above each bar in *rects*, displaying its height.

    Taken from https://matplotlib.org/gallery/lines_bars_and_markers/barchart.html#sphx-glr-gallery-lines-bars-and-markers-barchart-py
    """
    for rect in rects:
        height = rect.get_height()
        bottom = rect.get_y()
        value = height / bar_height_scale
        if value != 0.0:
            axis.annotate(
                f"{value:.0%}",
                xy=(rect.get_x() + rect.get_width() / 2, bottom + height),
                xytext=(0, 3),  # 3 points vertical offset
                textcoords="offset points",
                ha="center",
                va="bottom",
            )


def maximize_figure():
    fig_manager = plt.get_current_fig_manager()
    try:
        fig_manager.window.showMaximized()
    except:
        try:
            fig_manager.window.state("zoomed")  # works fine on Windows!
        except:
            try:
                fig_manager.frame.Maximize(True)
            except:
                print("Couldn't maximize the figure.")


@dataclass
class PlotSectionLabel:
    """Used to label a section of a plot between `start_step` and `stop_step` with a label of `description`."""

    start_step: int
    stop_step: int
    description: str = ""

    @property
    def middle(self) -> float:
        return (self.start_step + self.stop_step) / 2

    @property
    def width(self) -> int:
        return self.stop_step - self.start_step

    def annotate(self, ax: plt.Axes, height: float = -0.1):
        """Annotate the corresponding region of the axis.

        Adds vertical lines at the `start_step` and `end_step` along with a text
        label for the description in between.


        Args:
            ax (plt.Axes): An Axis to annotate.
            height (float): The height at which to place the text.
        """
        ax.axvline(self.start_step, linestyle=":", color="gray")
        ax.axvline(self.stop_step, linestyle=":", color="gray")
        ax.text(self.middle, height, self.description, ha="center")


================================================
FILE: sequoia/utils/pretrained_utils.py
================================================
from typing import Callable, Optional, Tuple, Union

from torch import nn

from sequoia.utils.logging_utils import get_logger

logger = get_logger(__name__)


def get_pretrained_encoder(
    encoder_model: Callable,
    pretrained: bool = True,
    freeze_pretrained_weights: bool = False,
    new_hidden_size: int = None,
) -> Tuple[nn.Module, int]:
    """Returns a pretrained encoder on ImageNet from `torchvision.models`

    If `new_hidden_size` is True, will try to replace the classification layer
    block with a `nn.Linear(<h>, new_hidden_size)`, where <h> corresponds to the
    hidden size of the model. This last layer will always be trainable, even if
    `freeze_pretrained_weights` is True.

    Args:
        encoder_model (Callable): Which encoder model to use. Should usually be
            one of the models in the `torchvision.models` module.
        pretrained (bool, optional): Wether to try and download the pretrained
            weights. Defaults to True.
        freeze_pretrained_weights (bool, optional): Wether the pretrained
            (downloaded) weights should be frozen. Has no effect when
            `pretrained` is False. Defaults to False.
        new_hidden_size (int): The hidden size of the resulting model.

    Returns:
        Tuple[nn.Module, int]: the pretrained encoder, with the classification
            head removed, and the resulting output size (hidden dims)
    """

    logger.debug(f"Using encoder model {encoder_model.__name__}")
    logger.debug(f"pretrained: {pretrained}")
    logger.debug(f"freezing the pretrained weights: {freeze_pretrained_weights}")
    try:
        encoder = encoder_model(pretrained=pretrained)
    except TypeError as e:
        encoder = encoder_model()

    if pretrained and freeze_pretrained_weights:
        # Fix the parameters of the model.
        for param in encoder.parameters():
            param.requires_grad = False

    replace_classifier = new_hidden_size is not None
    # We want to replace the last layer (the classification layer) with a
    # projection from their hidden space dimension to ours.
    new_classifier: Optional[nn.Linear] = None
    classifier = None
    if not replace_classifier:
        # We will create the 'new classifier' but then not add it.
        # this allows us to also get the 'hidden_size' of the resulting encoder.
        new_hidden_size = 1

    for attr in ["classifier", "fc"]:
        if hasattr(encoder, attr):
            classifier: Union[nn.Sequential, nn.Linear] = getattr(encoder, attr)
            new_classifier: Optional[nn.Linear] = None

            # Get the number of input features.
            if isinstance(classifier, nn.Linear):
                new_classifier = nn.Linear(
                    in_features=classifier.in_features, out_features=new_hidden_size
                )
            elif isinstance(classifier, nn.Sequential):
                # if there is a classifier "block", get the number of
                # features from the first encountered dense layer.
                for layer in classifier.children():
                    if isinstance(layer, nn.Linear):
                        new_classifier = nn.Linear(layer.in_features, new_hidden_size)
                        break
            break

    if new_classifier is None:
        raise RuntimeError(
            f"Can't detect the hidden size of the model '{encoder_model.__name__}'!"
            f" (last layer is :{classifier}).\n"
        )

    if not replace_classifier:
        new_hidden_size = new_classifier.in_features
        new_classifier = nn.Sequential()
    else:
        logger.debug(
            f"Replacing the attribute '{attr}' of the "
            f"{encoder_model.__name__} model with a new classifier: "
            f"{new_classifier}"
        )
    setattr(encoder, attr, new_classifier)
    return encoder, new_hidden_size


================================================
FILE: sequoia/utils/readme.py
================================================
import os
import textwrap
from contextlib import redirect_stdout
from inspect import getsourcefile
from io import StringIO
from pathlib import Path
from typing import TYPE_CHECKING, List, Type

from sequoia.settings import Setting

if TYPE_CHECKING:
    from sequoia.settings import Setting

# NOTE: Update this if we move this `readme.py` somewhere else.
SEQUOIA_ROOT_DIR = Path(os.path.abspath(os.path.dirname(__file__))).parent.parent


def get_relative_path_to(something: Type) -> Path:
    """Attempts to give the relative path from the current working directory to the
    file where somethign is defined. If that's not possible, returns an absolute path
    instead.
    """
    # This isn't quite right: Should be a relative path to the source file:
    current_dir = Path.cwd()
    source_file = Path(getsourcefile(something)).relative_to(current_dir)
    return source_file


def get_tree_string(
    root_setting: Type["Setting"] = Setting,
    with_methods: bool = False,
    with_assumptions: bool = False,
    with_docstrings: bool = False,
) -> str:
    """Get a string representation of the tree!

    I want to return something like this:
    ```
    "Setting"
    ├── active
    │   └── rl
    ├── base
    └── passive
        └── cl
            └── task_incremental
                └── iid
    ```
    """
    if with_assumptions:
        raise NotImplementedError(
            f"TODO: display the assumptions for each setting into the tree string " f"somehow."
        )
    setting: Type["Setting"] = root_setting
    # prefix: str = ""

    message: List[str] = []
    source_file = get_relative_path_to(setting)
    message += [f"{setting.get_name()} found in [{setting.__name__}]({source_file})"]
    applicable_methods = setting.get_applicable_methods()

    n_children = len(setting.get_immediate_children())
    bar = "│" if n_children else " "

    if with_docstrings:
        p = f"{bar}  "
        docstring = setting.__doc__
        # Note: why not use something like textwrap.indent?
        message.extend([p + line for line in docstring.splitlines()])
        message += [p]

    if with_methods:
        p = f"{bar}  "
        message += [f"{p} Applicable methods: "]
        for method in applicable_methods:
            source_file = get_relative_path_to(method)
            message += [f"{p}  * [{method.__name__}]({source_file})"]
        message += [f"{p} "]

    # message = "\n".join(message) + "\n"
    # print(f"Children: {setting.get_children()}")
    # print(f"Children[0]'s children: {setting.get_children()[0].children}")

    for i, child_setting in enumerate(setting.get_immediate_children()):
        # Recurse!
        child_message = get_tree_string(child_setting)

        child_message_lines = child_message.splitlines()
        for j, line in enumerate(child_message_lines):
            first: str = "x  "  # just for debugging, shouldn't be an x left after.
            if j == 0:
                if i == n_children - 1:
                    # Last child uses different graphic
                    first = "└──"
                else:
                    first = "├──"
            else:
                if i == n_children - 1:
                    first = "   "
                else:
                    first = "│  "
            message += [first + line]

    first_line = f"─ {message[0]}\n"
    message_str = "\n".join(message[1:])
    message_str = textwrap.indent(message_str, "  ")
    return first_line + message_str


def get_tree_string_markdown(
    root_setting: Type["Setting"] = Setting,
    with_methods: bool = False,
    with_docstring: bool = False,
):
    """Get a string representation of the tree!

    I want to return something like this:

    - "Setting"
        - active
            - rl
    - base
        - passive
            - cl
                - task_incremental
                    * iid

    """
    setting = root_setting

    message_lines: List[str] = []
    source_file = get_relative_path_to(setting)
    message_lines += [f"- ## [{setting.__name__}]({source_file})"]

    applicable_methods = setting.get_applicable_methods()
    tab = "  "

    if with_docstring:
        message_lines += [""]
        docstring: str = setting.__doc__
        docstring_lines = docstring.splitlines()
        # The first line is always less indented than the rest, which looks weird:
        first_line = docstring_lines[0].lstrip()
        # Remove the common indent in the rest of the docstring lines:
        other_lines = textwrap.dedent("\n".join(docstring_lines[1:]))
        # re-indent the docstring, with all equal indentation now:
        docstring = first_line + "\n" + other_lines
        # docstring = textwrap.shorten(docstring, replace_whitespace=False, width=130)
        # docstring = textwrap.fill(docstring, max_lines=10)
        # print(setting)
        # print(docstring)
        # exit()
        docstring = textwrap.indent(docstring, tab)

        message_lines.extend(docstring.splitlines())
        message_lines += [""]

    if with_methods:
        message_lines += [""]
        message_lines += ["Applicable methods: "]
        for method in applicable_methods:
            source_file = get_relative_path_to(method)
            message_lines += [f" * [{method.__name__}]({source_file})"]
        message_lines += [""]

    # message = "\n".join(message) + "\n"
    # print(f"Children: {setting.get_children()}")
    # print(f"Children[0]'s children: {setting.get_children()[0].children}")

    for child_setting in setting.get_immediate_children():
        child_message = get_tree_string_markdown(
            child_setting, with_methods=with_methods, with_docstring=with_docstring
        )
        child_message = textwrap.indent(child_message, tab)
        message_lines += [""]
        message_lines.extend(child_message.splitlines())
        message_lines += [""]

    return "\n".join(message_lines)


def print_methods():
    from sequoia.methods import all_methods

    for method in all_methods:
        source_file = get_relative_path_to(method)
        target_setting: Type["Setting"] = method.target_setting
        setting_file = get_relative_path_to(target_setting)
        method_name = method.__name__

        if method.get_family() != "methods":
            method_name = method.get_family() + "." + method_name

        print(f"- ## [{method_name}]({source_file}) ")
        print()
        print(f"\t - Target setting: [{target_setting.__name__}]({setting_file})")
        print()
        docstring: str = method.__doc__
        docstring_lines = docstring.splitlines()
        # The first line is always less indented than the rest, which looks weird:
        first_line = docstring_lines[0].lstrip()
        # Remove the common indent in the rest of the docstring lines:
        other_lines = textwrap.dedent("\n".join(docstring_lines[1:]))
        # re-indent the docstring, with all equal indentation now:
        docstring = first_line + "\n" + other_lines
        print(textwrap.indent(docstring, "\t"))


def add_stuff_to_readme(readme_path=Path("README.md"), settings: bool = True, methods: bool = True):
    token = "<!-- MAKETREE -->\n"
    assert settings or methods
    lines: List[str] = []
    with open(readme_path) as f:
        with StringIO(f.read()) as f:
            lines = f.readlines()
            if token not in lines:
                print("didn't find token!")
                exit()
            tree_index = lines.index(token) + 1

    # print(get_tree_string_markdown(with_methods=False, with_docstring=True))
    # exit()

    with open(readme_path, "w") as f:
        # with nullcontext():
        with redirect_stdout(f):
            # with nullcontext():
            # reversed insert?
            # Print the existing lines back:
            print(*lines[: tree_index + 1], sep="")
            if settings:
                print("\n\n## Available Settings:\n")
                print()
                print(get_tree_string_markdown(with_methods=False, with_docstring=True))
                print()
            # print("```")
            # print(get_tree_string())
            # print("```")
            if methods:
                print("\n\n## Registered Methods (so far):\n")
                print_methods()
                print()


if __name__ == "__main__":
    # print(get_tree_string())
    # print(get_tree_string_markdown(with_methods=False, with_docstring=True))
    add_stuff_to_readme(readme_path=Path("sequoia/settings/README.md"), methods=False)
    add_stuff_to_readme(readme_path=Path("sequoia/methods/README.md"), settings=False)


================================================
FILE: sequoia/utils/serialization.py
================================================
from dataclasses import dataclass, fields
from inspect import isfunction
from pathlib import Path
from typing import Any, Dict, Iterable, Tuple, Type, TypeVar, Union, get_type_hints

import torch
from simple_parsing.helpers import Serializable as SerializableBase
from simple_parsing.helpers.serialization import register_decoding_fn

from sequoia.utils.generic_functions import detach

from .generic_functions.detach import detach
from .generic_functions.move import move
from .logging_utils import get_logger
from .utils import dict_union

register_decoding_fn(torch.device, torch.device)

T = TypeVar("T")
logger = get_logger(__name__)


def cpu(x: Any) -> Any:
    return move(x, "cpu")


class Pickleable:
    """Helps make a class pickleable."""

    def __getstate__(self):
        """We implement this to just make sure to detach the tensors if any
        before pickling.
        """
        # We use `vars(self)` to get all the attributes, not just the fields.
        state_dict = vars(self)
        return cpu(detach(state_dict))

    def __setstate__(self, state: Dict):
        # logger.debug(f"__setstate__ was called")
        self.__dict__.update(state)


S = TypeVar("S", bound="Serializable")


@dataclass
class Serializable(SerializableBase, Pickleable, decode_into_subclasses=True):  # type: ignore
    # NOTE: This currently doesn't add much compared to `Serializable` from simple-parsing apart
    # from not dropping the keys.

    def save(self, path: Union[str, Path], **kwargs) -> None:
        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        # Save to temp file, so we don't corrupt the save file.
        save_path_tmp = path.with_name(path.stem + "_temp" + path.suffix)
        # write out to the temp file.
        super().save(save_path_tmp, **kwargs)
        # Rename the temp file to the right path, overwriting it if it exists.
        save_path_tmp.replace(path)

    def detach(self: S) -> S:
        return type(self)(
            **detach(
                {
                    field.name: getattr(self, field.name)
                    for field in fields(self)
                    if field.metadata.get("to_dict", True)
                }
            )
        )

    def to(self, device: Union[str, torch.device]):
        """Returns a new object with all the attributes 'moved' to `device`.

        NOTE: This doesn't implement anything related to the other args like
        memory format or dtype.
        TODO: Maybe add something to convert everything that is a Tensor or
        numpy array to a given dtype?
        """
        return type(self)(**{name: move(item, device) for name, item in self.items()})

    def items(self) -> Iterable[Tuple[str, Any]]:
        for field in fields(self):
            yield field.name, getattr(self, field.name)

    def cpu(self):
        return self.to("cpu")

    def cuda(self, device: Union[str, torch.device] = None):
        return self.to(device or "cuda")

    def merge(self, other: "Serializable") -> "Serializable":
        """Overwrite values in `self` present in 'other' with the values from
        `other`.
        Also merges child elements recursively.

        Returns a new object, i.e. this doesn't modify `self` in-place.
        """
        self_dict = self.to_dict()
        if isinstance(other, SerializableBase):
            other = other.to_dict()
        elif not isinstance(other, dict):
            raise RuntimeError(f"Can't merge self with {other}.")
        return type(self).from_dict(dict_union(self_dict, other))


class decode:
    @staticmethod
    def register(fn_or_type: Type = None):
        """Decorator to be used to register a decoding function for a given type.

        This can be used in two different ways. The type annotation can either be
        explicit, like so:
        ```python
        @decode.register(SomeType)
        def decode_some_type(v: str):
           return SomeType(v)  # return an instance of SomeType from a string.
        ```
        or implicitly determined through the return type annotation, like so:
        ```
        @decode.register
        def decode_some_type(v: str) -> SomeType:
           (...)
        ```

        In the end, this just calls `register_decoding_fn(SomeType, decode_some_type)`.
        """

        def _wrapper(fn):
            if fn_or_type is not None:
                type_ = fn_or_type
            else:
                type_hints = get_type_hints(fn)
                if "return" not in type_hints:
                    raise RuntimeError(
                        f"Need to either explicitly pass a type to `register`, or use "
                        f"a return type annotation (e.g. `-> Foo:`) on the function!"
                    )
                type_ = type_hints["return"]
            register_decoding_fn(type_, fn)
            return fn

        if isfunction(fn_or_type):
            fn = fn_or_type
            fn_or_type = None
            return _wrapper(fn)
        return _wrapper


================================================
FILE: sequoia/utils/utils.py
================================================
""" Miscelaneous utility functions. """
import functools
import hashlib
import inspect
import itertools
import operator
import re
import warnings
from collections import defaultdict
from dataclasses import Field, fields
from functools import reduce
from inspect import getsourcefile, isclass
from itertools import filterfalse, groupby
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union

from simple_parsing import field
from torch import Tensor, cuda

cuda_available = cuda.is_available()
gpus_available = cuda.device_count()

T = TypeVar("T")
K = TypeVar("K")
V = TypeVar("V")

Dataclass = TypeVar("Dataclass")


def field_dict(dataclass: Dataclass) -> Dict[str, Field]:
    return {field.name: field for field in fields(dataclass)}


def mean(values: Iterable[T]) -> T:
    values = list(values)
    return sum(values) / len(values)


def pairwise(iterable: Iterable[T]) -> Iterable[Tuple[T, T]]:
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)


def n_consecutive(items: Iterable[T], n: int = 2, yield_last_batch=True) -> Iterable[Tuple[T, ...]]:
    """Collect data into chunks of up to `n` elements.

    When `yield_last_batch` is True, the final chunk (which might have fewer
    than `n` items) will also be yielded.

    >>> list(n_consecutive("ABCDEFG", 3))
    [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)]
    """
    values: List[T] = []
    for item in items:
        values.append(item)
        if len(values) == n:
            yield tuple(values)
            values.clear()
    if values and yield_last_batch:
        yield tuple(values)


def fix_channels(x_batch: Tensor) -> Tensor:
    # TODO: Move this to data_utils.py
    if x_batch.dim() == 3:
        return x_batch.unsqueeze(1)
    else:
        if x_batch.shape[1] != min(x_batch.shape[1:]):
            return x_batch.transpose(1, -1)
        else:
            return x_batch


def to_dict_of_lists(list_of_dicts: Iterable[Dict[str, Any]]) -> Dict[str, List[Tensor]]:
    """Returns a dict of lists given a list of dicts.

    Assumes that all dictionaries have the same keys as the first dictionary.

    Args:
        list_of_dicts (Iterable[Dict[str, Any]]): An iterable of dicts.

    Returns:
        Dict[str, List[Tensor]]: A Dict of lists.
    """
    result: Dict[str, List[Any]] = defaultdict(list)
    for i, d in enumerate(list_of_dicts):
        for key, value in d.items():
            result[key].append(value)
        assert d.keys() == result.keys(), f"Dict {d} at index {i} does not contain all the keys!"
    return result


def add_prefix(some_dict: Dict[str, T], prefix: str = "", sep=" ") -> Dict[str, T]:
    """Adds the given prefix to all the keys in the dictionary that don't already start with it.

    Parameters
    ----------
    - some_dict : Dict[str, T]

        Some dictionary.
    - prefix : str, optional, by default ""

        A string prefix to append.

    - sep : str, optional, by default " "

        A string separator to add between the `prefix` and the existing keys
        (which do no start by `prefix`).


    Returns
    -------
    Dict[str, T]
        A new dictionary where all keys start with the prefix.


    Examples:
    -------
    >>> add_prefix({"a": 1}, prefix="bob", sep="")
    {'boba': 1}
    >>> add_prefix({"a": 1}, prefix="bob")
    {'bob a': 1}
    >>> add_prefix({"a": 1}, prefix="a")
    {'a': 1}
    >>> add_prefix({"a": 1}, prefix="a ")
    {'a': 1}
    >>> add_prefix({"a": 1}, prefix="a", sep="/")
    {'a': 1}
    """
    if not prefix:
        return some_dict
    result: Dict[str, T] = type(some_dict)()

    if sep and prefix.endswith(sep):
        prefix = prefix.rstrip(sep)

    for key, value in some_dict.items():
        new_key = key if key.startswith(prefix) else (prefix + sep + key)
        result[new_key] = value
    return result


def loss_str(loss_tensor: Tensor) -> str:
    loss = loss_tensor.item()
    if loss == 0:
        return "0"
    elif abs(loss) < 1e-3 or abs(loss) > 1e3:
        return f"{loss:.1e}"
    else:
        return f"{loss:.3f}"


def set_seed(seed: int):
    """Set the pytorch/numpy random seed."""
    import random

    import numpy as np
    import torch

    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


def compute_identity(size: int = 16, **sample) -> str:
    """Compute a unique hash out of a dictionary

    Parameters
    ----------
    size: int
        size of the unique hash

    **sample:
        Dictionary to compute the hash from

    """
    sample_hash = hashlib.sha256()

    for k, v in sorted(sample.items()):
        sample_hash.update(k.encode("utf8"))

        if isinstance(v, dict):
            sample_hash.update(compute_identity(size, **v).encode("utf8"))
        else:
            sample_hash.update(str(v).encode("utf8"))

    return sample_hash.hexdigest()[:size]


def prod(iterable: Iterable[T]) -> T:
    """Like sum() but returns the product of all numbers in the iterable.

    >>> prod(range(1, 5))
    24
    """
    return reduce(operator.mul, iterable, 1)


def common_fields(a, b) -> Iterable[Tuple[str, Tuple[Field, Field]]]:
    # If any attributes are common to both the Experiment and the State,
    # copy them over to the Experiment.
    a_fields = fields(a)
    b_fields = fields(b)
    for field_a in a_fields:
        name_a: str = field_a.name
        value_a = getattr(a, field_a.name)
        for field_b in b_fields:
            name_b: str = field_b.name
            value_b = getattr(b, field_b.name)
            if name_a == name_b:
                yield name_a, (value_a, value_b)


def add_dicts(d1: Dict, d2: Dict, add_values=True) -> Dict:
    result = d1.copy()
    for key, v2 in d2.items():
        if key not in d1:
            result[key] = v2
        elif isinstance(v2, dict):
            result[key] = add_dicts(d1[key], v2, add_values=add_values)
        elif not add_values:
            result[key] = v2
        else:
            result[key] = d1[key] + v2
    return result


def rsetattr(obj: Any, attr: str, val: Any) -> None:
    """Taken from https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties"""
    pre, _, post = attr.rpartition(".")
    return setattr(rgetattr(obj, pre) if pre else obj, post, val)


# using wonder's beautiful simplification: https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427


def rgetattr(obj: Any, attr: str, *args):
    """Taken from https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties"""

    def _getattr(obj, attr):
        return getattr(obj, attr, *args)

    return functools.reduce(_getattr, [obj] + attr.split("."))


def is_nonempty_dir(path: Path) -> bool:
    return path.is_dir() and len(list(path.iterdir())) > 0


D = TypeVar("D", bound=Dict)


def flatten_dict(d: D, separator: str = "/") -> D:
    """Flattens the given nested dict, adding `separator` between keys at different nesting levels.

    Args:
        d (Dict): A nested dictionary
        separator (str, optional): Separator to use. Defaults to "/".

    Returns:
        Dict: A flattened dictionary.
    """
    result = type(d)()
    for k, v in d.items():
        if isinstance(v, dict):
            for ki, vi in flatten_dict(v, separator=separator).items():
                key = f"{k}{separator}{ki}"
                result[key] = vi
        else:
            result[k] = v
    return result


def unique_consecutive(iterable: Iterable[T], key: Callable[[T], Any] = None) -> Iterable[T]:
    """List unique elements, preserving order. Remember only the element just seen.

    NOTE: If `key` is passed, it is only used to test for equality, the outputs of `key`
    for each sample won't be returned.

    >>> list(unique_consecutive('AAAABBBCCDAABBB'))
    ['A', 'B', 'C', 'D', 'A', 'B']
    >>> list(unique_consecutive('ABBCcAD', str.lower))
    ['A', 'B', 'C', 'A', 'D']

    Recipe taken from itertools docs: https://docs.python.org/3/library/itertools.html
    """
    return map(next, map(operator.itemgetter(1), groupby(iterable, key)))


def unique_consecutive_with_index(
    iterable: Iterable[T], key: Callable[[T], Any] = None
) -> Iterable[Tuple[int, T]]:
    """List unique elements, preserving order. Remember only the element just seen.
    Yields tuples of the index and the values.

    NOTE: If `key` is passed, it is only used to test for equality, the outputs of `key`
    for each sample won't be returned. If you want to save some compute, use a map as
    the input.

    >>> list(unique_consecutive_with_index('AAAABBBCCDAABBB'))
    [(0, 'A'), (4, 'B'), (7, 'C'), (9, 'D'), (10, 'A'), (12, 'B')]
    >>> list(unique_consecutive_with_index('ABBCcAD', str.lower))
    [(0, 'A'), (1, 'B'), (3, 'C'), (5, 'A'), (6, 'D')]
    """

    _key = lambda i_v: key(i_v[1]) if key is not None else i_v[1]
    for v, group_iterator in groupby(enumerate(iterable), _key):
        index, first_val = next(group_iterator)
        yield index, first_val


def roundrobin(*iterables: Iterable[T]) -> Iterable[T]:
    """
    roundrobin('ABC', 'D', 'EF') --> A D E B F C

    Recipe taken from itertools docs: https://docs.python.org/3/library/itertools.html
    """
    # Recipe credited to George Sakkis
    num_active = len(iterables)
    nexts = itertools.cycle(iter(it).__next__ for it in iterables)
    while num_active:
        try:
            for next_ in nexts:
                yield next_()
        except StopIteration:
            # Remove the iterator we just exhausted from the cycle.
            num_active -= 1
            nexts = itertools.cycle(itertools.islice(nexts, num_active))


def take(iterable: Iterable[T], n: Optional[int]) -> Iterable[T]:
    """Takes only the first `n` elements from `iterable`.

    if `n` is None, returns the entire iterable.
    """
    return itertools.islice(iterable, n) if n is not None else iterable


def camel_case(name):
    s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
    s2 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
    while "__" in s2:
        s2 = s2.replace("__", "_")
    return s2


def constant(v: T, **kwargs) -> T:
    metadata = kwargs.setdefault("metadata", {})
    metadata["constant"] = v
    metadata["decoding_fn"] = lambda _: v
    metadata["to_dict"] = lambda _: v
    return field(default=v, init=False, **kwargs)


def flag(default: bool, *args, **kwargs):
    return field(default=default, nargs="?", *args, **kwargs)


def dict_union(*dicts: Dict[K, V], recurse: bool = True, dict_factory=dict) -> Dict[K, V]:
    """Simple dict union until we use python 3.9

    If `recurse` is True, also does the union of nested dictionaries.
    NOTE: The returned dictionary has keys sorted alphabetically.

    >>> a = dict(a=1, b=2, c=3)
    >>> b = dict(c=5, d=6, e=7)
    >>> dict_union(a, b)
    {'a': 1, 'b': 2, 'c': 5, 'd': 6, 'e': 7}
    >>> a = dict(a=1, b=dict(c=2, d=3))
    >>> b = dict(a=2, b=dict(c=3, e=6))
    >>> dict_union(a, b)
    {'a': 2, 'b': {'c': 3, 'd': 3, 'e': 6}}
    """
    result: Dict = dict_factory()
    if not dicts:
        return result
    assert len(dicts) >= 1
    all_keys: Set[str] = set()
    all_keys.update(*dicts)
    all_keys = sorted(all_keys)

    # Create a neat generator of generators, to save some memory.
    all_values: Iterable[Tuple[V, Iterable[K]]] = (
        (k, (d[k] for d in dicts if k in d)) for k in all_keys
    )
    for k, values in all_values:
        sub_dicts: List[Dict] = []
        new_value: V = None
        n_values = 0
        for v in values:
            if isinstance(v, dict) and recurse:
                sub_dicts.append(v)
            else:
                # Overwrite the new value for that key.
                new_value = v
            n_values += 1

        if len(sub_dicts) == n_values and recurse:
            # We only get here if all values for key `k` were dictionaries,
            # and if recurse was True.
            new_value = dict_union(*sub_dicts, recurse=True, dict_factory=dict_factory)

        result[k] = new_value
    return result


K = TypeVar("K")
V = TypeVar("V")
M = TypeVar("M")


def zip_dicts(*dicts: Dict[K, V], missing: M = None) -> Iterable[Tuple[K, Tuple[Union[M, V], ...]]]:
    """Iterator over the union of all keys, giving the value from each dict if
    present, else `missing`.
    """
    # If any attributes are common to both the Experiment and the State,
    # copy them over to the Experiment.
    keys = set(itertools.chain(*dicts))
    for key in keys:
        yield (key, tuple(d.get(key, missing) for d in dicts))


def dict_intersection(*dicts: Dict[K, V]) -> Iterable[Tuple[K, Tuple[V, ...]]]:
    """Gives back an iterator over the keys and values common to all dicts."""
    dicts = [dict(d.items()) for d in dicts]
    common_keys = set(dicts[0])
    for d in dicts:
        common_keys.intersection_update(d)
    for key in common_keys:
        yield (key, tuple(d[key] for d in dicts))


def try_get(d: Dict[K, V], *keys: K, default: V = None) -> Optional[V]:
    for k in keys:
        try:
            return d[k]
        except KeyError:
            pass
    return default


def remove_suffix(s: str, suffix: str) -> str:
    """Remove the suffix from string s if present.
    Doing this manually until we start using python 3.9.

    >>> remove_suffix("bob.com", ".com")
    'bob'
    >>> remove_suffix("Henrietta", "match")
    'Henrietta'
    """
    i = s.rfind(suffix)
    if i == -1:
        # return s if not found.
        return s
    return s[:i]


def remove_prefix(s: str, prefix: str) -> str:
    """Remove the prefix from string s if present.
    Doing this manually until we start using python 3.9.

    >>> remove_prefix("bob.com", "bo")
    'b.com'
    >>> remove_prefix("Henrietta", "match")
    'Henrietta'
    """
    if not s.startswith(prefix):
        return s
    return s[len(prefix) :]


def get_all_subclasses_of(cls: Type[T]) -> Iterable[Type[T]]:
    scope_dict: Dict = globals()
    for name, var in scope_dict.items():
        if isclass(var) and issubclass(var, cls):
            yield var


def get_all_concrete_subclasses_of(cls: Type[T]) -> Iterable[Type[T]]:
    yield from filterfalse(inspect.isabstract, get_all_subclasses_of(cls))


def get_path_to_source_file(cls: Type) -> Path:
    """Attempts to give a relative path to the given source path. If not possible, then
    gives back an absolute path to the source file instead.
    """
    cwd = Path.cwd()
    source_file = getsourcefile(cls)
    assert isinstance(source_file, str), f"can't locate source file for {cls}?"
    source_path = Path(source_file).absolute()
    try:
        return source_path.relative_to(cwd)
    except ValueError:
        # If we can't find the relative path, for instance when sequoia is
        # installed in site_packages (not with `pip install -e .``), give back
        # the absolute path instead.
        return source_path


def constant_property(fixed_value: T) -> T:
    def constant_field(v: T, **kwargs) -> T:
        metadata = kwargs.setdefault("metadata", {})
        metadata["constant"] = v
        metadata["decoding_fn"] = lambda _: v
        metadata["to_dict"] = lambda _: v
        return field(default=v, init=False, **kwargs)

    def setter(_, value: Any):
        if isinstance(value, property):
            # This happens in the __init__ that is generated by dataclasses, so we
            # do nothing here.
            pass
        elif value != fixed_value:
            raise RuntimeError(RuntimeWarning(f"This attribute is fixed at value {fixed_value}."))

    def getter(_) -> T:
        return fixed_value

    return property(fget=getter, fset=setter)


def deprecated_property(old_name: str, new_name: str):
    """Marks a property as being deprecated, redirectly any changes to its value to the
    property with name 'new_name'.
    """

    def setter(self, value: Any):
        warnings.warn(
            DeprecationWarning(f"'{old_name}' property is deprecated, use '{new_name}' instead."),
            category=DeprecationWarning,
            stacklevel=2,
        )
        if isinstance(value, property):
            # This happens in the __init__ that is generated by dataclasses, so we
            # do nothing here.
            pass
        else:
            setattr(self, new_name, value)
        # raise RuntimeError(f"'{old_name}' property is deprecated, use '{new_name}' instead.")

    def getter(self):
        warnings.warn(
            DeprecationWarning(f"'{old_name}' property is deprecated, use '{new_name}' instead."),
            category=DeprecationWarning,
            stacklevel=2,
        )
        return getattr(self, new_name)

    doc = f"Deprecated property, Please use '{new_name}' instead."
    return property(fget=getter, fset=setter, doc=doc)


if __name__ == "__main__":
    import doctest

    doctest.testmod()


================================================
FILE: setup.cfg
================================================
[versioneer]
VCS=git
style=pep440-post
versionfile_source=sequoia/_version.py
versionfile_build=sequoia/_version.py
tag_prefix=v
parentdir_prefix=sequoia-

[metadata]
license_file=LICENSE

================================================
FILE: setup.py
================================================
import os
from typing import Dict, List, Union

from setuptools import find_packages, setup

import versioneer

with open(os.path.join(os.path.dirname(__file__), "requirements.txt"), "r") as file:
    lines = [ln.strip() for ln in file.readlines()]

packages_to_export = find_packages(where=".", exclude=["tests*", "examples*"], include="sequoia*")

required_packages = [line for line in lines if line and not line.startswith("#")]

extras_require: Dict[str, Union[str, List[str]]] = {
    "monsterkong": [
        "meta_monsterkong @ git+https://github.com/lebrice/MetaMonsterkong.git#egg=meta_monsterkong"
    ],
    "atari": ["gym[atari] @ git+https://www.github.com/lebrice/gym@easier_custom_spaces#egg=gym"],
    "hpo": ["orion>=0.1.15", "orion.algo.skopt>=0.1.6"],
    "avalanche": [
        "gdown",  # BUG: Avalanche needs this to download cub200 dataset.
        "avalanche @ git+https://github.com/ContinualAI/avalanche.git@83b3cb9a92b75a59c1b9d31fc6f0dce9436e5fc5#egg=avalanche-lib",
    ],
    # NOTE: Removing this for now, because it has very strict requirements, and includes
    # a lot of copy-pasted code, and doesn't really add anything compared to metaworld.
    # This isn't right.
    # "mtenv": [
    #     "mtenv @ git+https://github.com/facebookresearch/mtenv.git@main#egg='mtenv[metaworld]'"
    # ],
    "ctrl": "ctrl-benchmark==0.0.4",
    "mujoco": [
        "mujoco_py",
    ],
    "metaworld": [
        "metaworld @ git+https://github.com/rlworkgroup/metaworld.git@29fe5d6d95cf9ad86f63eac38db8c0aef3837994#egg=metaworld"
    ],
    "sb3": "stable-baselines3==1.2.0",
}
# Add-up all the optional requirements, and then remove any duplicates.
extras_require["all"] = sum(
    [
        extra_requirements if isinstance(extra_requirements, list) else [extra_requirements]
        for extra_requirements in extras_require.values()
    ],
    [],
)
extras_require["all"] = list(set(extras_require["all"]))

extras_require["no_mujoco"] = sum(
    [
        extra_dependencies if isinstance(extra_dependencies, list) else [extra_dependencies]
        for extra_name, extra_dependencies in extras_require.items()
        if extra_name not in ["all", "mujoco", "metaworld"]
    ],
    [],
)
extras_require["no_mujoco"] = list(set(extras_require["no_mujoco"]))

setup(
    name="sequoia",
    version=versioneer.get_version(),
    cmdclass=versioneer.get_cmdclass(),
    description="The Research Tree - A playground for research at the intersection of Continual, Reinforcement, and Self-Supervised Learning.",
    url="https://github.com/lebrice/Sequoia",
    author="Fabrice Normandin",
    author_email="fabrice.normandin@gmail.com",
    license="GPLv3",
    packages=packages_to_export,
    extras_require=extras_require,
    install_requires=required_packages,
    python_requires=">=3.7",
    tests_require=["pytest"],
    classifiers=[
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.7",
        "Programming Language :: Python :: 3.8",
        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
    ],
    entry_points={
        "console_scripts": [
            "sequoia = sequoia.main:main",
            # TODO: This entry-point is added temporarily while we redesign the
            # command-line API (See https://github.com/lebrice/Sequoia/issues/47)
            # "sequoia_sweep = sequoia.experiments.hpo_sweep:main",
        ],
    },
)


================================================
FILE: versioneer.py
================================================
# Version: 0.19

"""The Versioneer - like a rocketeer, but for versions.

The Versioneer
==============

* like a rocketeer, but for versions!
* https://github.com/python-versioneer/python-versioneer
* Brian Warner
* License: Public Domain
* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
* [![Latest Version][pypi-image]][pypi-url]
* [![Build Status][travis-image]][travis-url]

This is a tool for managing a recorded version number in distutils-based
python projects. The goal is to remove the tedious and error-prone "update
the embedded version string" step from your release process. Making a new
release should be as easy as recording a new tag in your version-control
system, and maybe making new tarballs.


## Quick Install

* `pip install versioneer` to somewhere in your $PATH
* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
* run `versioneer install` in your source tree, commit the results
* Verify version information with `python setup.py version`

## Version Identifiers

Source trees come from a variety of places:

* a version-control system checkout (mostly used by developers)
* a nightly tarball, produced by build automation
* a snapshot tarball, produced by a web-based VCS browser, like github's
  "tarball from tag" feature
* a release tarball, produced by "setup.py sdist", distributed through PyPI

Within each source tree, the version identifier (either a string or a number,
this tool is format-agnostic) can come from a variety of places:

* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
  about recent "tags" and an absolute revision-id
* the name of the directory into which the tarball was unpacked
* an expanded VCS keyword ($Id$, etc)
* a `_version.py` created by some earlier build step

For released software, the version identifier is closely related to a VCS
tag. Some projects use tag names that include more than just the version
string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
needs to strip the tag prefix to extract the version identifier. For
unreleased software (between tags), the version identifier should provide
enough information to help developers recreate the same tree, while also
giving them an idea of roughly how old the tree is (after version 1.2, before
version 1.3). Many VCS systems can report a description that captures this,
for example `git describe --tags --dirty --always` reports things like
"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
uncommitted changes).

The version identifier is used for multiple purposes:

* to allow the module to self-identify its version: `myproject.__version__`
* to choose a name and prefix for a 'setup.py sdist' tarball

## Theory of Operation

Versioneer works by adding a special `_version.py` file into your source
tree, where your `__init__.py` can import it. This `_version.py` knows how to
dynamically ask the VCS tool for version information at import time.

`_version.py` also contains `$Revision$` markers, and the installation
process marks `_version.py` to have this marker rewritten with a tag name
during the `git archive` command. As a result, generated tarballs will
contain enough information to get the proper version.

To allow `setup.py` to compute a version too, a `versioneer.py` is added to
the top level of your source tree, next to `setup.py` and the `setup.cfg`
that configures it. This overrides several distutils/setuptools commands to
compute the version when invoked, and changes `setup.py build` and `setup.py
sdist` to replace `_version.py` with a small static file that contains just
the generated version data.

## Installation

See [INSTALL.md](./INSTALL.md) for detailed installation instructions.

## Version-String Flavors

Code which uses Versioneer can learn about its version string at runtime by
importing `_version` from your main `__init__.py` file and running the
`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
import the top-level `versioneer.py` and run `get_versions()`.

Both functions return a dictionary with different flavors of version
information:

* `['version']`: A condensed version string, rendered using the selected
  style. This is the most commonly used value for the project's version
  string. The default "pep440" style yields strings like `0.11`,
  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
  below for alternative styles.

* `['full-revisionid']`: detailed revision identifier. For Git, this is the
  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".

* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
  commit date in ISO 8601 format. This will be None if the date is not
  available.

* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
  this is only accurate if run in a VCS checkout, otherwise it is likely to
  be False or None

* `['error']`: if the version string could not be computed, this will be set
  to a string describing the problem, otherwise it will be None. It may be
  useful to throw an exception in setup.py if this is set, to avoid e.g.
  creating tarballs with a version string of "unknown".

Some variants are more useful than others. Including `full-revisionid` in a
bug report should allow developers to reconstruct the exact code being tested
(or indicate the presence of local changes that should be shared with the
developers). `version` is suitable for display in an "about" box or a CLI
`--version` output: it can be easily compared against release notes and lists
of bugs fixed in various releases.

The installer adds the following text to your `__init__.py` to place a basic
version in `YOURPROJECT.__version__`:

    from ._version import get_versions
    __version__ = get_versions()['version']
    del get_versions

## Styles

The setup.cfg `style=` configuration controls how the VCS information is
rendered into a version string.

The default style, "pep440", produces a PEP440-compliant string, equal to the
un-prefixed tag name for actual releases, and containing an additional "local
version" section with more detail for in-between builds. For Git, this is
TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
that this commit is two revisions ("+2") beyond the "0.11" tag. For released
software (exactly equal to a known tag), the identifier will only contain the
stripped tag, e.g. "0.11".

Other styles are available. See [details.md](details.md) in the Versioneer
source tree for descriptions.

## Debugging

Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
to return a version of "0+unknown". To investigate the problem, run `setup.py
version`, which will run the version-lookup code in a verbose mode, and will
display the full contents of `get_versions()` (including the `error` string,
which may help identify what went wrong).

## Known Limitations

Some situations are known to cause problems for Versioneer. This details the
most significant ones. More can be found on Github
[issues page](https://github.com/python-versioneer/python-versioneer/issues).

### Subprojects

Versioneer has limited support for source trees in which `setup.py` is not in
the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
two common reasons why `setup.py` might not be in the root:

* Source trees which contain multiple subprojects, such as
  [Buildbot](https://github.com/buildbot/buildbot), which contains both
  "master" and "slave" subprojects, each with their own `setup.py`,
  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
  distributions (and upload multiple independently-installable tarballs).
* Source trees whose main purpose is to contain a C library, but which also
  provide bindings to Python (and perhaps other languages) in subdirectories.

Versioneer will look for `.git` in parent directories, and most operations
should get the right version string. However `pip` and `setuptools` have bugs
and implementation details which frequently cause `pip install .` from a
subproject directory to fail to find a correct version string (so it usually
defaults to `0+unknown`).

`pip install --editable .` should work correctly. `setup.py install` might
work too.

Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
some later version.

[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
this issue. The discussion in
[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
issue from the Versioneer side in more detail.
[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
pip to let Versioneer work correctly.

Versioneer-0.16 and earlier only looked for a `.git` directory next to the
`setup.cfg`, so subprojects were completely unsupported with those releases.

### Editable installs with setuptools <= 18.5

`setup.py develop` and `pip install --editable .` allow you to install a
project into a virtualenv once, then continue editing the source code (and
test) without re-installing after every change.

"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
convenient way to specify executable scripts that should be installed along
with the python package.

These both work as expected when using modern setuptools. When using
setuptools-18.5 or earlier, however, certain operations will cause
`pkg_resources.DistributionNotFound` errors when running the entrypoint
script, which must be resolved by re-installing the package. This happens
when the install happens with one version, then the egg_info data is
regenerated while a different version is checked out. Many setup.py commands
cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
a different virtualenv), so this can be surprising.

[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
this one, but upgrading to a newer version of setuptools should probably
resolve it.


## Updating Versioneer

To upgrade your project to a new release of Versioneer, do the following:

* install the new Versioneer (`pip install -U versioneer` or equivalent)
* edit `setup.cfg`, if necessary, to include any new configuration settings
  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
* re-run `versioneer install` in your source tree, to replace
  `SRC/_version.py`
* commit any changed files

## Future Directions

This tool is designed to make it easily extended to other version-control
systems: all VCS-specific components are in separate directories like
src/git/ . The top-level `versioneer.py` script is assembled from these
components by running make-versioneer.py . In the future, make-versioneer.py
will take a VCS name as an argument, and will construct a version of
`versioneer.py` that is specific to the given VCS. It might also take the
configuration arguments that are currently provided manually during
installation by editing setup.py . Alternatively, it might go the other
direction and include code from all supported VCS systems, reducing the
number of intermediate scripts.

## Similar projects

* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
  dependency
* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
  versioneer

## License

To make Versioneer easier to embed, all its code is dedicated to the public
domain. The `_version.py` that it creates is also in the public domain.
Specifically, both are released under the Creative Commons "Public Domain
Dedication" license (CC0-1.0), as described in
https://creativecommons.org/publicdomain/zero/1.0/ .

[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
[pypi-url]: https://pypi.python.org/pypi/versioneer/
[travis-image]:
https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer

"""

import configparser
import errno
import json
import os
import re
import subprocess
import sys


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""


def get_root():
    """Get the project root directory.

    We require that all commands are run from the project root, i.e. the
    directory that contains setup.py, setup.cfg, and versioneer.py .
    """
    root = os.path.realpath(os.path.abspath(os.getcwd()))
    setup_py = os.path.join(root, "setup.py")
    versioneer_py = os.path.join(root, "versioneer.py")
    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
        # allow 'python path/to/setup.py COMMAND'
        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
        setup_py = os.path.join(root, "setup.py")
        versioneer_py = os.path.join(root, "versioneer.py")
    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
        err = (
            "Versioneer was unable to run the project root directory. "
            "Versioneer requires setup.py to be executed from "
            "its immediate directory (like 'python setup.py COMMAND'), "
            "or in a way that lets it use sys.argv[0] to find the root "
            "(like 'python path/to/setup.py COMMAND')."
        )
        raise VersioneerBadRootError(err)
    try:
        # Certain runtime workflows (setup.py install/develop in a setuptools
        # tree) execute all dependencies in a single python process, so
        # "versioneer" may be imported multiple times, and python's shared
        # module-import table will cache the first one. So we can't use
        # os.path.dirname(__file__), as that will find whichever
        # versioneer.py was first imported, even in later projects.
        me = os.path.realpath(os.path.abspath(__file__))
        me_dir = os.path.normcase(os.path.splitext(me)[0])
        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
        if me_dir != vsr_dir:
            print(
                "Warning: build in %s is using versioneer.py from %s"
                % (os.path.dirname(me), versioneer_py)
            )
    except NameError:
        pass
    return root


def get_config_from_root(root):
    """Read the project setup.cfg file to determine Versioneer config."""
    # This might raise EnvironmentError (if setup.cfg is missing), or
    # configparser.NoSectionError (if it lacks a [versioneer] section), or
    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
    # the top of versioneer.py for instructions on writing your setup.cfg .
    setup_cfg = os.path.join(root, "setup.cfg")
    parser = configparser.ConfigParser()
    with open(setup_cfg, "r") as f:
        parser.read_file(f)
    VCS = parser.get("versioneer", "VCS")  # mandatory

    def get(parser, name):
        if parser.has_option("versioneer", name):
            return parser.get("versioneer", name)
        return None

    cfg = VersioneerConfig()
    cfg.VCS = VCS
    cfg.style = get(parser, "style") or ""
    cfg.versionfile_source = get(parser, "versionfile_source")
    cfg.versionfile_build = get(parser, "versionfile_build")
    cfg.tag_prefix = get(parser, "tag_prefix")
    if cfg.tag_prefix in ("''", '""'):
        cfg.tag_prefix = ""
    cfg.parentdir_prefix = get(parser, "parentdir_prefix")
    cfg.verbose = get(parser, "verbose")
    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


# these dictionaries contain VCS-specific tools
LONG_VERSION_PY = {}
HANDLERS = {}


def register_vcs_handler(vcs, method):  # decorator
    """Create decorator to mark a method as the handler of a VCS."""

    def decorate(f):
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
        HANDLERS[vcs][method] = f
        return f

    return decorate


def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
    """Call the given command(s)."""
    assert isinstance(commands, list)
    p = None
    for c in commands:
        try:
            dispcmd = str([c] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            p = subprocess.Popen(
                [c] + args,
                cwd=cwd,
                env=env,
                stdout=subprocess.PIPE,
                stderr=(subprocess.PIPE if hide_stderr else None),
            )
            break
        except EnvironmentError:
            e = sys.exc_info()[1]
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %s" % dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %s" % (commands,))
        return None, None
    stdout = p.communicate()[0].strip().decode()
    if p.returncode != 0:
        if verbose:
            print("unable to run %s (error)" % dispcmd)
            print("stdout was %s" % stdout)
        return None, p.returncode
    return stdout, p.returncode


LONG_VERSION_PY[
    "git"
] = r'''
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.

# This file is released into the public domain. Generated by
# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)

"""Git implementation of _version.py."""

import errno
import os
import re
import subprocess
import sys


def get_keywords():
    """Get the keywords needed to look up the version information."""
    # these strings will be replaced by git during git-archive.
    # setup.py/versioneer.py will grep for the variable names, so they must
    # each be defined on a line of their own. _version.py will just call
    # get_keywords().
    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
    return keywords


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""


def get_config():
    """Create, populate and return the VersioneerConfig() object."""
    # these strings are filled in when 'setup.py versioneer' creates
    # _version.py
    cfg = VersioneerConfig()
    cfg.VCS = "git"
    cfg.style = "%(STYLE)s"
    cfg.tag_prefix = "%(TAG_PREFIX)s"
    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
    cfg.verbose = False
    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


LONG_VERSION_PY = {}
HANDLERS = {}


def register_vcs_handler(vcs, method):  # decorator
    """Create decorator to mark a method as the handler of a VCS."""
    def decorate(f):
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
        HANDLERS[vcs][method] = f
        return f
    return decorate


def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
                env=None):
    """Call the given command(s)."""
    assert isinstance(commands, list)
    p = None
    for c in commands:
        try:
            dispcmd = str([c] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
                                 stdout=subprocess.PIPE,
                                 stderr=(subprocess.PIPE if hide_stderr
                                         else None))
            break
        except EnvironmentError:
            e = sys.exc_info()[1]
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %%s" %% dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %%s" %% (commands,))
        return None, None
    stdout = p.communicate()[0].strip().decode()
    if p.returncode != 0:
        if verbose:
            print("unable to run %%s (error)" %% dispcmd)
            print("stdout was %%s" %% stdout)
        return None, p.returncode
    return stdout, p.returncode


def versions_from_parentdir(parentdir_prefix, root, verbose):
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for i in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {"version": dirname[len(parentdir_prefix):],
                    "full-revisionid": None,
                    "dirty": False, "error": None, "date": None}
        else:
            rootdirs.append(root)
            root = os.path.dirname(root)  # up a level

    if verbose:
        print("Tried directories %%s but none started with prefix %%s" %%
              (str(rootdirs), parentdir_prefix))
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords = {}
    try:
        f = open(versionfile_abs, "r")
        for line in f.readlines():
            if line.strip().startswith("git_refnames ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["refnames"] = mo.group(1)
            if line.strip().startswith("git_full ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["full"] = mo.group(1)
            if line.strip().startswith("git_date ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["date"] = mo.group(1)
        f.close()
    except EnvironmentError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
    """Get version information from git keywords."""
    if not keywords:
        raise NotThisMethod("no keywords at all, weird")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = set([r.strip() for r in refnames.strip("()").split(",")])
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %%d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = set([r for r in refs if re.search(r'\d', r)])
        if verbose:
            print("discarding '%%s', no digits" %% ",".join(refs - tags))
    if verbose:
        print("likely tags: %%s" %% ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix):]
            if verbose:
                print("picking %%s" %% r)
            return {"version": r,
                    "full-revisionid": keywords["full"].strip(),
                    "dirty": False, "error": None,
                    "date": date}
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {"version": "0+unknown",
            "full-revisionid": keywords["full"].strip(),
            "dirty": False, "error": "no suitable tags", "date": None}


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
                          hide_stderr=True)
    if rc != 0:
        if verbose:
            print("Directory %%s not under git control" %% root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
                                          "--always", "--long",
                                          "--match", "%%s*" %% tag_prefix],
                                   cwd=root)
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[:git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
        if not mo:
            # unparseable. Maybe git-describe is misbehaving?
            pieces["error"] = ("unable to parse git-describe output: '%%s'"
                               %% describe_out)
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%%s' doesn't start with prefix '%%s'"
                print(fmt %% (full_tag, tag_prefix))
            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
                               %% (full_tag, tag_prefix))
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix):]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
                                    cwd=root)
        pieces["distance"] = int(count_out)  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
                       cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def plus_or_dot(pieces):
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces):
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_pre(pieces):
    """TAG[.post0.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += ".post0.dev%%d" %% pieces["distance"]
    else:
        # exception #1
        rendered = "0.post0.dev%%d" %% pieces["distance"]
    return rendered


def render_pep440_post(pieces):
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%%s" %% pieces["short"]
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%%s" %% pieces["short"]
    return rendered


def render_pep440_old(pieces):
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces):
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces):
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces, style):
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {"version": "unknown",
                "full-revisionid": pieces.get("long"),
                "dirty": None,
                "error": pieces["error"],
                "date": None}

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%%s'" %% style)

    return {"version": rendered, "full-revisionid": pieces["long"],
            "dirty": pieces["dirty"], "error": None,
            "date": pieces.get("date")}


def get_versions():
    """Get version information or return default if unable to do so."""
    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
    # __file__, we can work backwards from there to the root. Some
    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
    # case we can only use expanded keywords.

    cfg = get_config()
    verbose = cfg.verbose

    try:
        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
                                          verbose)
    except NotThisMethod:
        pass

    try:
        root = os.path.realpath(__file__)
        # versionfile_source is the relative path from the top of the source
        # tree (where the .git directory might live) to this file. Invert
        # this to find the root from __file__.
        for i in cfg.versionfile_source.split('/'):
            root = os.path.dirname(root)
    except NameError:
        return {"version": "0+unknown", "full-revisionid": None,
                "dirty": None,
                "error": "unable to find root of source tree",
                "date": None}

    try:
        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
        return render(pieces, cfg.style)
    except NotThisMethod:
        pass

    try:
        if cfg.parentdir_prefix:
            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
    except NotThisMethod:
        pass

    return {"version": "0+unknown", "full-revisionid": None,
            "dirty": None,
            "error": "unable to compute version", "date": None}
'''


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords = {}
    try:
        f = open(versionfile_abs, "r")
        for line in f.readlines():
            if line.strip().startswith("git_refnames ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["refnames"] = mo.group(1)
            if line.strip().startswith("git_full ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["full"] = mo.group(1)
            if line.strip().startswith("git_date ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
                    keywords["date"] = mo.group(1)
        f.close()
    except EnvironmentError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
    """Get version information from git keywords."""
    if not keywords:
        raise NotThisMethod("no keywords at all, weird")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = set([r.strip() for r in refnames.strip("()").split(",")])
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = set([r for r in refs if re.search(r"\d", r)])
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
        print("likely tags: %s" % ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix) :]
            if verbose:
                print("picking %s" % r)
            return {
                "version": r,
                "full-revisionid": keywords["full"].strip(),
                "dirty": False,
                "error": None,
                "date": date,
            }
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {
        "version": "0+unknown",
        "full-revisionid": keywords["full"].strip(),
        "dirty": False,
        "error": "no suitable tags",
        "date": None,
    }


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = run_command(
        GITS,
        ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix],
        cwd=root,
    )
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[: git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
        if not mo:
            # unparseable. Maybe git-describe is misbehaving?
            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%s' doesn't start with prefix '%s'"
                print(fmt % (full_tag, tag_prefix))
            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix) :]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
        pieces["distance"] = int(count_out)  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def do_vcs_install(manifest_in, versionfile_source, ipy):
    """Git-specific installation logic for Versioneer.

    For Git, this means creating/changing .gitattributes to mark _version.py
    for export-subst keyword substitution.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]
    files = [manifest_in, versionfile_source]
    if ipy:
        files.append(ipy)
    try:
        me = __file__
        if me.endswith(".pyc") or me.endswith(".pyo"):
            me = os.path.splitext(me)[0] + ".py"
        versioneer_file = os.path.relpath(me)
    except NameError:
        versioneer_file = "versioneer.py"
    files.append(versioneer_file)
    present = False
    try:
        f = open(".gitattributes", "r")
        for line in f.readlines():
            if line.strip().startswith(versionfile_source):
                if "export-subst" in line.strip().split()[1:]:
                    present = True
        f.close()
    except EnvironmentError:
        pass
    if not present:
        f = open(".gitattributes", "a+")
        f.write("%s export-subst\n" % versionfile_source)
        f.close()
        files.append(".gitattributes")
    run_command(GITS, ["add", "--"] + files)


def versions_from_parentdir(parentdir_prefix, root, verbose):
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for i in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {
                "version": dirname[len(parentdir_prefix) :],
                "full-revisionid": None,
                "dirty": False,
                "error": None,
                "date": None,
            }
        else:
            rootdirs.append(root)
            root = os.path.dirname(root)  # up a level

    if verbose:
        print(
            "Tried directories %s but none started with prefix %s"
            % (str(rootdirs), parentdir_prefix)
        )
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


SHORT_VERSION_PY = """
# This file was generated by 'versioneer.py' (0.19) from
# revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file.

import json

version_json = '''
%s
'''  # END VERSION_JSON


def get_versions():
    return json.loads(version_json)
"""


def versions_from_file(filename):
    """Try to determine the version from _version.py if present."""
    try:
        with open(filename) as f:
            contents = f.read()
    except EnvironmentError:
        raise NotThisMethod("unable to read _version.py")
    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S)
    if not mo:
        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON", contents, re.M | re.S)
    if not mo:
        raise NotThisMethod("no version_json in _version.py")
    return json.loads(mo.group(1))


def write_to_version_file(filename, versions):
    """Write the given version number to the given _version.py file."""
    os.unlink(filename)
    contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": "))
    with open(filename, "w") as f:
        f.write(SHORT_VERSION_PY % contents)

    print("set %s to '%s'" % (filename, versions["version"]))


def plus_or_dot(pieces):
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces):
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_pre(pieces):
    """TAG[.post0.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += ".post0.dev%d" % pieces["distance"]
    else:
        # exception #1
        rendered = "0.post0.dev%d" % pieces["distance"]
    return rendered


def render_pep440_post(pieces):
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
    return rendered


def render_pep440_old(pieces):
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces):
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces):
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces, style):
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {
            "version": "unknown",
            "full-revisionid": pieces.get("long"),
            "dirty": None,
            "error": pieces["error"],
            "date": None,
        }

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%s'" % style)

    return {
        "version": rendered,
        "full-revisionid": pieces["long"],
        "dirty": pieces["dirty"],
        "error": None,
        "date": pieces.get("date"),
    }


class VersioneerBadRootError(Exception):
    """The project root directory is unknown or missing key files."""


def get_versions(verbose=False):
    """Get the project version from whatever source is available.

    Returns dict with two keys: 'version' and 'full'.
    """
    if "versioneer" in sys.modules:
        # see the discussion in cmdclass.py:get_cmdclass()
        del sys.modules["versioneer"]

    root = get_root()
    cfg = get_config_from_root(root)

    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
    handlers = HANDLERS.get(cfg.VCS)
    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
    verbose = verbose or cfg.verbose
    assert cfg.versionfile_source is not None, "please set versioneer.versionfile_source"
    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"

    versionfile_abs = os.path.join(root, cfg.versionfile_source)

    # extract version from first of: _version.py, VCS command (e.g. 'git
    # describe'), parentdir. This is meant to work for developers using a
    # source checkout, for users of a tarball created by 'setup.py sdist',
    # and for users of a tarball/zipball created by 'git archive' or github's
    # download-from-tag feature or the equivalent in other VCSes.

    get_keywords_f = handlers.get("get_keywords")
    from_keywords_f = handlers.get("keywords")
    if get_keywords_f and from_keywords_f:
        try:
            keywords = get_keywords_f(versionfile_abs)
            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
            if verbose:
                print("got version from expanded keyword %s" % ver)
            return ver
        except NotThisMethod:
            pass

    try:
        ver = versions_from_file(versionfile_abs)
        if verbose:
            print("got version from file %s %s" % (versionfile_abs, ver))
        return ver
    except NotThisMethod:
        pass

    from_vcs_f = handlers.get("pieces_from_vcs")
    if from_vcs_f:
        try:
            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
            ver = render(pieces, cfg.style)
            if verbose:
                print("got version from VCS %s" % ver)
            return ver
        except NotThisMethod:
            pass

    try:
        if cfg.parentdir_prefix:
            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
            if verbose:
                print("got version from parentdir %s" % ver)
            return ver
    except NotThisMethod:
        pass

    if verbose:
        print("unable to compute version")

    return {
        "version": "0+unknown",
        "full-revisionid": None,
        "dirty": None,
        "error": "unable to compute version",
        "date": None,
    }


def get_version():
    """Get the short version string for this project."""
    return get_versions()["version"]


def get_cmdclass(cmdclass=None):
    """Get the custom setuptools/distutils subclasses used by Versioneer.

    If the package uses a different cmdclass (e.g. one from numpy), it
    should be provide as an argument.
    """
    if "versioneer" in sys.modules:
        del sys.modules["versioneer"]
        # this fixes the "python setup.py develop" case (also 'install' and
        # 'easy_install .'), in which subdependencies of the main project are
        # built (using setup.py bdist_egg) in the same python process. Assume
        # a main project A and a dependency B, which use different versions
        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
        # sys.modules by the time B's setup.py is executed, causing B to run
        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
        # sandbox that restores sys.modules to it's pre-build state, so the
        # parent is protected against the child's "import versioneer". By
        # removing ourselves from sys.modules here, before the child build
        # happens, we protect the child from the parent's versioneer too.
        # Also see https://github.com/python-versioneer/python-versioneer/issues/52

    cmds = {} if cmdclass is None else cmdclass.copy()

    # we add "version" to both distutils and setuptools
    from distutils.core import Command

    class cmd_version(Command):
        description = "report generated version string"
        user_options = []
        boolean_options = []

        def initialize_options(self):
            pass

        def finalize_options(self):
            pass

        def run(self):
            vers = get_versions(verbose=True)
            print("Version: %s" % vers["version"])
            print(" full-revisionid: %s" % vers.get("full-revisionid"))
            print(" dirty: %s" % vers.get("dirty"))
            print(" date: %s" % vers.get("date"))
            if vers["error"]:
                print(" error: %s" % vers["error"])

    cmds["version"] = cmd_version

    # we override "build_py" in both distutils and setuptools
    #
    # most invocation pathways end up running build_py:
    #  distutils/build -> build_py
    #  distutils/install -> distutils/build ->..
    #  setuptools/bdist_wheel -> distutils/install ->..
    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
    #  setuptools/install -> bdist_egg ->..
    #  setuptools/develop -> ?
    #  pip install:
    #   copies source tree to a tempdir before running egg_info/etc
    #   if .git isn't copied too, 'git describe' will fail
    #   then does setup.py bdist_wheel, or sometimes setup.py install
    #  setup.py egg_info -> ?

    # we override different "build_py" commands for both environments
    if "build_py" in cmds:
        _build_py = cmds["build_py"]
    elif "setuptools" in sys.modules:
        from setuptools.command.build_py import build_py as _build_py
    else:
        from distutils.command.build_py import build_py as _build_py

    class cmd_build_py(_build_py):
        def run(self):
            root = get_root()
            cfg = get_config_from_root(root)
            versions = get_versions()
            _build_py.run(self)
            # now locate _version.py in the new build/ directory and replace
            # it with an updated value
            if cfg.versionfile_build:
                target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

    cmds["build_py"] = cmd_build_py

    if "setuptools" in sys.modules:
        from setuptools.command.build_ext import build_ext as _build_ext
    else:
        from distutils.command.build_ext import build_ext as _build_ext

    class cmd_build_ext(_build_ext):
        def run(self):
            root = get_root()
            cfg = get_config_from_root(root)
            versions = get_versions()
            _build_ext.run(self)
            if self.inplace:
                # build_ext --inplace will only build extensions in
                # build/lib<..> dir with no _version.py to write to.
                # As in place builds will already have a _version.py
                # in the module dir, we do not need to write one.
                return
            # now locate _version.py in the new build/ directory and replace
            # it with an updated value
            target_versionfile = os.path.join(self.build_lib, cfg.versionfile_source)
            print("UPDATING %s" % target_versionfile)
            write_to_version_file(target_versionfile, versions)

    cmds["build_ext"] = cmd_build_ext

    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
        from cx_Freeze.dist import build_exe as _build_exe

        # nczeczulin reports that py2exe won't like the pep440-style string
        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
        # setup(console=[{
        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
        #   "product_version": versioneer.get_version(),
        #   ...

        class cmd_build_exe(_build_exe):
            def run(self):
                root = get_root()
                cfg = get_config_from_root(root)
                versions = get_versions()
                target_versionfile = cfg.versionfile_source
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

                _build_exe.run(self)
                os.unlink(target_versionfile)
                with open(cfg.versionfile_source, "w") as f:
                    LONG = LONG_VERSION_PY[cfg.VCS]
                    f.write(
                        LONG
                        % {
                            "DOLLAR": "$",
                            "STYLE": cfg.style,
                            "TAG_PREFIX": cfg.tag_prefix,
                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
                        }
                    )

        cmds["build_exe"] = cmd_build_exe
        del cmds["build_py"]

    if "py2exe" in sys.modules:  # py2exe enabled?
        from py2exe.distutils_buildexe import py2exe as _py2exe

        class cmd_py2exe(_py2exe):
            def run(self):
                root = get_root()
                cfg = get_config_from_root(root)
                versions = get_versions()
                target_versionfile = cfg.versionfile_source
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

                _py2exe.run(self)
                os.unlink(target_versionfile)
                with open(cfg.versionfile_source, "w") as f:
                    LONG = LONG_VERSION_PY[cfg.VCS]
                    f.write(
                        LONG
                        % {
                            "DOLLAR": "$",
                            "STYLE": cfg.style,
                            "TAG_PREFIX": cfg.tag_prefix,
                            "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                            "VERSIONFILE_SOURCE": cfg.versionfile_source,
                        }
                    )

        cmds["py2exe"] = cmd_py2exe

    # we override different "sdist" commands for both environments
    if "sdist" in cmds:
        _sdist = cmds["sdist"]
    elif "setuptools" in sys.modules:
        from setuptools.command.sdist import sdist as _sdist
    else:
        from distutils.command.sdist import sdist as _sdist

    class cmd_sdist(_sdist):
        def run(self):
            versions = get_versions()
            self._versioneer_generated_versions = versions
            # unless we update this, the command will keep using the old
            # version
            self.distribution.metadata.version = versions["version"]
            return _sdist.run(self)

        def make_release_tree(self, base_dir, files):
            root = get_root()
            cfg = get_config_from_root(root)
            _sdist.make_release_tree(self, base_dir, files)
            # now locate _version.py in the new base_dir directory
            # (remembering that it may be a hardlink) and replace it with an
            # updated value
            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
            print("UPDATING %s" % target_versionfile)
            write_to_version_file(target_versionfile, self._versioneer_generated_versions)

    cmds["sdist"] = cmd_sdist

    return cmds


CONFIG_ERROR = """
setup.cfg is missing the necessary Versioneer configuration. You need
a section like:

 [versioneer]
 VCS = git
 style = pep440
 versionfile_source = src/myproject/_version.py
 versionfile_build = myproject/_version.py
 tag_prefix =
 parentdir_prefix = myproject-

You will also need to edit your setup.py to use the results:

 import versioneer
 setup(version=versioneer.get_version(),
       cmdclass=versioneer.get_cmdclass(), ...)

Please read the docstring in ./versioneer.py for configuration instructions,
edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
"""

SAMPLE_CONFIG = """
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.

[versioneer]
#VCS = git
#style = pep440
#versionfile_source =
#versionfile_build =
#tag_prefix =
#parentdir_prefix =

"""

INIT_PY_SNIPPET = """
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
"""


def do_setup():
    """Do main VCS-independent setup function for installing Versioneer."""
    root = get_root()
    try:
        cfg = get_config_from_root(root)
    except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e:
        if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
            print("Adding sample versioneer config to setup.cfg", file=sys.stderr)
            with open(os.path.join(root, "setup.cfg"), "a") as f:
                f.write(SAMPLE_CONFIG)
        print(CONFIG_ERROR, file=sys.stderr)
        return 1

    print(" creating %s" % cfg.versionfile_source)
    with open(cfg.versionfile_source, "w") as f:
        LONG = LONG_VERSION_PY[cfg.VCS]
        f.write(
            LONG
            % {
                "DOLLAR": "$",
                "STYLE": cfg.style,
                "TAG_PREFIX": cfg.tag_prefix,
                "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                "VERSIONFILE_SOURCE": cfg.versionfile_source,
            }
        )

    ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
    if os.path.exists(ipy):
        try:
            with open(ipy, "r") as f:
                old = f.read()
        except EnvironmentError:
            old = ""
        if INIT_PY_SNIPPET not in old:
            print(" appending to %s" % ipy)
            with open(ipy, "a") as f:
                f.write(INIT_PY_SNIPPET)
        else:
            print(" %s unmodified" % ipy)
    else:
        print(" %s doesn't exist, ok" % ipy)
        ipy = None

    # Make sure both the top-level "versioneer.py" and versionfile_source
    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
    # they'll be copied into source distributions. Pip won't be able to
    # install the package without this.
    manifest_in = os.path.join(root, "MANIFEST.in")
    simple_includes = set()
    try:
        with open(manifest_in, "r") as f:
            for line in f:
                if line.startswith("include "):
                    for include in line.split()[1:]:
                        simple_includes.add(include)
    except EnvironmentError:
        pass
    # That doesn't cover everything MANIFEST.in can do
    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
    # it might give some false negatives. Appending redundant 'include'
    # lines is safe, though.
    if "versioneer.py" not in simple_includes:
        print(" appending 'versioneer.py' to MANIFEST.in")
        with open(manifest_in, "a") as f:
            f.write("include versioneer.py\n")
    else:
        print(" 'versioneer.py' already in MANIFEST.in")
    if cfg.versionfile_source not in simple_includes:
        print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source)
        with open(manifest_in, "a") as f:
            f.write("include %s\n" % cfg.versionfile_source)
    else:
        print(" versionfile_source already in MANIFEST.in")

    # Make VCS-specific changes. For git, this means creating/changing
    # .gitattributes to mark _version.py for export-subst keyword
    # substitution.
    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
    return 0


def scan_setup_py():
    """Validate the contents of setup.py against Versioneer's expectations."""
    found = set()
    setters = False
    errors = 0
    with open("setup.py", "r") as f:
        for line in f.readlines():
            if "import versioneer" in line:
                found.add("import")
            if "versioneer.get_cmdclass()" in line:
                found.add("cmdclass")
            if "versioneer.get_version()" in line:
                found.add("get_version")
            if "versioneer.VCS" in line:
                setters = True
            if "versioneer.versionfile_source" in line:
                setters = True
    if len(found) != 3:
        print("")
        print("Your setup.py appears to be missing some important items")
        print("(but I might be wrong). Please make sure it has something")
        print("roughly like the following:")
        print("")
        print(" import versioneer")
        print(" setup( version=versioneer.get_version(),")
        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
        print("")
        errors += 1
    if setters:
        print("You should remove lines like 'versioneer.VCS = ' and")
        print("'versioneer.versionfile_source = ' . This configuration")
        print("now lives in setup.cfg, and should be removed from setup.py")
        print("")
        errors += 1
    return errors


if __name__ == "__main__":
    cmd = sys.argv[1]
    if cmd == "setup":
        errors = do_setup()
        errors += scan_setup_py()
        if errors:
            sys.exit(1)