Repository: justusschock/delira Branch: master Commit: cd3ad277d6fa Files: 236 Total size: 1.2 MB Directory structure: gitextract__xkadp9f/ ├── .codecov.yml ├── .gitattributes ├── .github/ │ └── ISSUE_TEMPLATE/ │ ├── bug_report.md │ ├── feature_request.md │ └── question.md ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── AUTHORS.rst ├── CODEOWNERS ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── delira/ │ ├── __init__.py │ ├── _backends.py │ ├── _debug_mode.py │ ├── _version.py │ ├── data_loading/ │ │ ├── __init__.py │ │ ├── augmenter.py │ │ ├── data_loader.py │ │ ├── data_manager.py │ │ ├── dataset.py │ │ ├── load_utils.py │ │ ├── numba_transform.py │ │ └── sampler/ │ │ ├── __init__.py │ │ ├── abstract.py │ │ ├── batch.py │ │ ├── random.py │ │ ├── sequential.py │ │ └── weighted.py │ ├── io/ │ │ ├── __init__.py │ │ ├── chainer.py │ │ ├── sklearn.py │ │ ├── tf.py │ │ └── torch.py │ ├── logging/ │ │ ├── __init__.py │ │ ├── base_backend.py │ │ ├── base_logger.py │ │ ├── logging_context.py │ │ ├── registry.py │ │ ├── tensorboard_backend.py │ │ ├── visdom_backend.py │ │ └── writer_backend.py │ ├── models/ │ │ ├── __init__.py │ │ ├── abstract_network.py │ │ └── backends/ │ │ ├── __init__.py │ │ ├── chainer/ │ │ │ ├── __init__.py │ │ │ ├── abstract_network.py │ │ │ └── data_parallel.py │ │ ├── sklearn/ │ │ │ ├── __init__.py │ │ │ └── abstract_network.py │ │ ├── tf_eager/ │ │ │ ├── __init__.py │ │ │ ├── abstract_network.py │ │ │ └── data_parallel.py │ │ ├── tf_graph/ │ │ │ ├── __init__.py │ │ │ └── abstract_network.py │ │ ├── torch/ │ │ │ ├── __init__.py │ │ │ ├── abstract_network.py │ │ │ ├── data_parallel.py │ │ │ └── utils.py │ │ └── torchscript/ │ │ ├── __init__.py │ │ └── abstract_network.py │ ├── training/ │ │ ├── __init__.py │ │ ├── backends/ │ │ │ ├── __init__.py │ │ │ ├── chainer/ │ │ │ │ ├── __init__.py │ │ │ │ ├── experiment.py │ │ │ │ ├── trainer.py │ │ │ │ └── utils.py │ │ │ ├── sklearn/ │ │ │ │ ├── __init__.py │ │ │ │ ├── experiment.py │ │ │ │ ├── trainer.py │ │ │ │ └── utils.py │ │ │ ├── tf_eager/ │ │ │ │ ├── __init__.py │ │ │ │ ├── experiment.py │ │ │ │ ├── trainer.py │ │ │ │ └── utils.py │ │ │ ├── tf_graph/ │ │ │ │ ├── __init__.py │ │ │ │ ├── experiment.py │ │ │ │ ├── trainer.py │ │ │ │ └── utils.py │ │ │ ├── torch/ │ │ │ │ ├── __init__.py │ │ │ │ ├── experiment.py │ │ │ │ ├── trainer.py │ │ │ │ └── utils.py │ │ │ └── torchscript/ │ │ │ ├── __init__.py │ │ │ ├── experiment.py │ │ │ └── trainer.py │ │ ├── base_experiment.py │ │ ├── base_trainer.py │ │ ├── callbacks/ │ │ │ ├── __init__.py │ │ │ ├── abstract_callback.py │ │ │ ├── early_stopping.py │ │ │ ├── logging_callback.py │ │ │ └── pytorch_schedulers.py │ │ ├── losses.py │ │ ├── metrics.py │ │ ├── predictor.py │ │ └── utils.py │ └── utils/ │ ├── __init__.py │ ├── codecs.py │ ├── config.py │ ├── context_managers.py │ ├── decorators.py │ ├── dict_reductions.py │ ├── messenger.py │ ├── path.py │ └── time.py ├── docker/ │ └── Dockerfile ├── docs/ │ ├── Makefile │ ├── _api/ │ │ └── _build/ │ │ ├── delira/ │ │ │ ├── backend_resolution.rst │ │ │ ├── class_hierarchy.rst │ │ │ ├── data_loading/ │ │ │ │ ├── arbitrary_data.rst │ │ │ │ ├── data_loading.rst │ │ │ │ ├── dataloader.rst │ │ │ │ ├── datamanager.rst │ │ │ │ ├── dataset.rst │ │ │ │ ├── nii.rst │ │ │ │ ├── sampler.rst │ │ │ │ └── utils.rst │ │ │ ├── debug_mode.rst │ │ │ ├── delira.io.rst │ │ │ ├── delira.rst │ │ │ ├── delira.utils.rst │ │ │ ├── logging/ │ │ │ │ ├── backends.rst │ │ │ │ ├── base_logger.rst │ │ │ │ ├── handlers.rst │ │ │ │ ├── logging.rst │ │ │ │ ├── logging_context.py │ │ │ │ ├── logging_context.rst │ │ │ │ ├── registry.py │ │ │ │ ├── registry.rst │ │ │ │ ├── tensorboard_backend.py │ │ │ │ ├── visdom_backend.py │ │ │ │ └── writer_backend.py │ │ │ ├── models/ │ │ │ │ ├── chainer.rst │ │ │ │ ├── models.rst │ │ │ │ ├── sklearn.rst │ │ │ │ ├── tfeager.rst │ │ │ │ ├── tfgraph.rst │ │ │ │ ├── torch.rst │ │ │ │ └── torchscript.rst │ │ │ └── training/ │ │ │ ├── backends/ │ │ │ │ ├── backends.rst │ │ │ │ ├── chainer.rst │ │ │ │ ├── sklearn.rst │ │ │ │ ├── tfeager.rst │ │ │ │ ├── tfgraph.rst │ │ │ │ ├── torch.rst │ │ │ │ └── torchscript.rst │ │ │ ├── callbacks.rst │ │ │ ├── experiment.rst │ │ │ ├── losses.rst │ │ │ ├── metrics.rst │ │ │ ├── parameters.rst │ │ │ ├── predictor.rst │ │ │ ├── trainer.rst │ │ │ ├── training.rst │ │ │ └── utils.rst │ │ └── modules.rst │ ├── classification_pytorch.rst │ ├── conda.yml │ ├── conf.py │ ├── custom_backend.rst │ ├── gan_pytorch.rst │ ├── getting_started.rst │ ├── index.rst │ ├── requirements.txt │ ├── segmentation_2d_pytorch.rst │ ├── segmentation_3d_pytorch.rst │ └── tutorial_delira.rst ├── notebooks/ │ ├── classification_examples/ │ │ ├── chainer.ipynb │ │ ├── pytorch.ipynb │ │ ├── sklearn.ipynb │ │ ├── tf_eager.ipynb │ │ ├── tf_graph.ipynb │ │ └── torchscript.ipynb │ ├── custom_backend.ipynb │ ├── gan_pytorch.ipynb │ ├── segmentation_2d_pytorch.ipynb │ ├── segmentation_3d_pytorch.ipynb │ └── tutorial_delira.ipynb ├── paper/ │ ├── paper.bib │ └── paper.md ├── pytest.ini ├── requirements/ │ ├── base.txt │ ├── chainer.txt │ ├── tensorflow.txt │ └── torch.txt ├── scripts/ │ └── ci/ │ ├── build_docs.sh │ ├── install_before_docs.sh │ ├── install_before_style_check.sh │ ├── install_before_tests.sh │ ├── run_style_checks.sh │ └── run_tests.sh ├── setup.cfg ├── setup.py ├── tests/ │ ├── __init__.py │ ├── data_loading/ │ │ ├── __init__.py │ │ ├── test_augmenters.py │ │ ├── test_data_loader.py │ │ ├── test_data_manager.py │ │ ├── test_dataset.py │ │ ├── test_numba_transforms.py │ │ ├── test_sampler.py │ │ └── utils.py │ ├── io/ │ │ ├── __init__.py │ │ ├── test_chainer.py │ │ ├── test_sklearn.py │ │ ├── test_tf.py │ │ └── test_torch.py │ ├── logging/ │ │ ├── __init__.py │ │ ├── test_logging_frequency.py │ │ ├── test_logging_outside_trainer.py │ │ └── test_single_threaded_logging.py │ ├── models/ │ │ ├── __init__.py │ │ ├── data_parallel/ │ │ │ ├── __init__.py │ │ │ ├── test_chainer.py │ │ │ └── test_torch.py │ │ └── test_abstract_models.py │ ├── training/ │ │ ├── __init__.py │ │ ├── backends/ │ │ │ ├── __init__.py │ │ │ ├── test_chainer.py │ │ │ ├── test_sklearn.py │ │ │ ├── test_tf_eager.py │ │ │ ├── test_tf_graph.py │ │ │ ├── test_torch.py │ │ │ ├── test_torchscript.py │ │ │ └── utils.py │ │ ├── test_losses_torch.py │ │ └── test_metrics.py │ └── utils/ │ ├── __init__.py │ ├── dict_reductions.py │ ├── test_codecs.py │ ├── test_config.py │ └── test_messenger.py └── versioneer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .codecov.yml ================================================ comment: off coverage: status: project: default: target: auto threshold: 0.50 base: auto patch: off ignore: - "tests/" - "notebooks/" - "*/__init.py" ================================================ FILE: .gitattributes ================================================ delira/_version.py export-subst ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Report a bug and give us a minimal example to reproduce it title: "[Bug]" labels: bug assignees: '' --- **Description** What happens? What should happen? **Environment** * OS: * Python version: * `delira` version * How did you install `delira`? [ pip | source | conda | docker ] **Reproduction** Give us a minimal example to reproduce the error **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Request a feature title: "[FeatureRequest]" labels: new feature assignees: '' --- **Description** What should be added/changed? **Feature History** What have you tried so far? **Proposal** How could the feature be implemented? *Are you able/willing to implement the feature yourself (with some guidance from us)? **Additional context** Add any other context about the feature request here. ================================================ FILE: .github/ISSUE_TEMPLATE/question.md ================================================ --- name: Question about: Ask a question/for support title: "[Question]" labels: question assignees: '' --- **Description** What happens? What should happen? **Environment** * OS: * Python version: * `delira` version * How did you install `delira`? [ pip | source | conda | docker ] * Machine Specs: * Minimal working Example: ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ # pycharm .idea/ .DS_Store .idea .vscode .pytest_cache # delira config */.delira # backend extensions *.pkl events.* *.pt *.pth *.ptj *.chain *.meta # Test results */UnnamedExperiment/* ================================================ FILE: .readthedocs.yml ================================================ # .readthedocs.yml version: 2 formats: - epub - pdf - htmlzip # python: # version: 3.7 # install: # - requirements: docs/requirements.txt # - method: setuptools # system_packages: false build: image: latest conda: environment: docs/conda.yml ================================================ FILE: .travis.yml ================================================ language: python matrix: include: # basic tests withut a backend - name: "Unittests Python 3.5 No Backend" python: 3.5 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="None" - name: "Unittests Python 3.6 No Backend" python: 3.6 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="None" - name: "Unittests Python 3.7 No Backend" python: 3.7 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="None" # SCIKIT-LEARN BACKEND TESTS - name: "Unittests Python 3.5 Sklearn Backend" python: 3.5 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Sklearn" - name: "Unittests Python 3.6 Sklearn Backend" python: 3.6 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Sklearn" - name: "Unittests Python 3.7 Sklearn Backend" python: 3.7 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Sklearn" # TENSORFLOW EAGER BACKEND TESTS - name: "Unittests Python 3.5 TF Eager Backend" python: 3.5 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TFEager" - name: "Unittests Python 3.6 TF Eager Backend" python: 3.6 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TFEager" - name: "Unittests Python 3.7 TF Eager Backend" python: 3.7 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TFEager" # TENSORFLOW GRAPH BACKEND TESTS - name: "Unittests Python 3.5 TF Graph Backend" python: 3.5 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TFGraph" - name: "Unittests Python 3.6 TF Graph Backend" python: 3.6 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TFGraph" - name: "Unittests Python 3.7 TF Graph Backend" python: 3.7 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TFGraph" # PYTORCH BACKEND TESTS - name: "Unittests Python 3.5 Torch Backend" python: 3.5 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Torch" - name: "Unittests Python 3.6 Torch Backend" python: 3.6 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Torch" - name: "Unittests Python 3.7 Torch Backend" python: 3.7 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Torch" # TORCHSCRIPT BACKEND TESTS - name: "Unittests Python 3.5 TorchScript Backend" python: 3.5 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TorchScript" - name: "Unittests Python 3.6 TorchScript Backend" python: 3.6 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TorchScript" - name: "Unittests Python 3.7 TorchScript Backend" python: 3.7 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="TorchScript" # CHAINER BACKEND TESTS - name: "Unittests Python 3.5 Chainer Backend" python: 3.5 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Chainer" - name: "Unittests Python 3.6 Chainer Backend" python: 3.6 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Chainer" - name: "Unittests Python 3.7 Chainer Backend" python: 3.7 dist: xenial env: - TEST_TYPE="unittests" - BACKEND="Chainer" # STATIC CHECKS - name: "Static Style Checks" python: 3.7 dist: xenial env: - TEST_TYPE="style-check" - name: "Documentation" python: 3.7 dist: xenial env: - TEST_TYPE="docs" # command to install dependencies before_install: - if [[ "$TEST_TYPE" == "unittests" ]]; then bash scripts/ci/install_before_tests.sh; elif [[ "$TEST_TYPE" == "docs" ]]; then bash scripts/ci/install_before_docs.sh; else bash scripts/ci/install_before_style_check.sh; pip install -r docs/requirements.txt; fi install: - pip install --no-deps . # command to run tests script: # run tests or stylechecks - if [[ "$TEST_TYPE" == "unittests" ]]; then bash scripts/ci/run_tests.sh; elif [[ "$TEST_TYPE" == "docs" ]]; then bash scripts/ci/build_docs.sh; else bash scripts/ci/run_style_checks.sh; fi after_script: - if [[ "$TEST_TYPE" == "unittests" ]]; then codecov; before_deploy: - cd $TRAVIS_BUILD_DIR deploy: - provider: pages skip_cleanup: true github_token: $GITHUB_TOKEN # Set in travis-ci.org dashboard, marked secure keep-history: true on: branch: master condition: $TEST_TYPE = Docs local_dir: docs/_build/html - provider: pypi user: $PYPI_USERNAME password: $PYPI_PASSWORD on: tags: true distributions: "sdist bdist_wheel" skip_existing: true condition: $TEST_TYPE = style-check ================================================ FILE: AUTHORS.rst ================================================ Authors ========== **Core Development Team:** - Justus Schock: `GitHub `_ | `LinkedIn `_ | `Google Scholar `_ | `E-Mail `_ - Michael Baumgartner: `GitHub `_ | `LinkedIn `_ - Oliver Rippel: `GitHub `_ | `LinkedIn `_ | `Google Scholar `_ - Christoph Haarburger: `GitHub `_ | `LinkedIn `_ | `Google Scholar `_ **Contributions:** - Nicolas Horst - Alexander Moriz ================================================ FILE: CODEOWNERS ================================================ # Use this CODEOWNERS file for automatically request reviews from owners at PRs. # For Details see https://help.github.com/en/articles/about-code-owners # The order of the codeowners is simply alphabetically. # General Namespace (versioning backend resolution etc.) /delira/* @justusschock # DataLoading /delira/data_loading/ @justusschock @mibaumgartner # IO /delira/io/ @justusschock /delira/io/tf.py @ORippler # Logging /delira/logging/ @justusschock @ORippler # Models /delira/models/* @justusschock /delira/models/backends/* @justusschock /delira/models/backends/chainer/ @justusschock /delira/models/backends/sklearn/ @justusschock /delira/models/backends/tf_eager/ @justusschock @ORippler /delira/models/backends/tf_graph/ @ORippler /delira/models/backends/torch/ @justusschock @mibaumgartner /delira/models/backends/torchscript/ @justusschock # Training /delira/training/__init__.py @justusschock /delira/training/base_experiment.py @justusschock @mibaumgartner @ORippler /delira/training/base_trainer.py @justusschock @mibaumgartner @ORippler /delira/training/losses.py @mibaumgartner /delira/training/metrics.py @justusschock @mibaumgartner /delira/training/parameters.py @justusschock @mibaumgartner /delira/training/predictor.py @justusschock @mibaumgartner @ORippler /delira/training/utils.py @justusschock /delira/training/backends/* @justusschock /delira/training/backends/chainer/ @justusschock /delira/training/backends/sklearn/ @justusschock /delira/training/backends/tf_eager/ @justusschock @ORippler /delira/training/backends/tf_graph/ @ORippler /delira/taining/backends/torch/ @justusschock @mibaumgartner /delira/training/backends/torchscript/ @justusschock /delira/training/callbacks/ @justusschock # Utils /delira/utils/ @justusschock @mibaumgartner # Global repo stuff /* @justusschock /docker/ @haarburger /docs/ @justusschock /notebooks/* @mibaumgartner /paper/ @haarburger /requirements/ @haarburger @justusschock @mibaumgartner @ORippler /scripts/ci/ @justusschock # Tests /tests/* @justusschock /tests/data_loading @justusschock @mibaumgartner /tests/io/ @justusschock @ORippler /logging/ @justusschock @ORippler /tests/models/ @justusschock /tests/training/* @mibaumgarnter /tests/training/backends/ @justusschock ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to `delira` If you are interested in contributing to `delira`, you will either * implement a new feature or * fix a bug. For both types of contribution, the process is roughly the same: 1. File an issue at [this repo] and discuss the issue with us! Maybe we can give you some hints towards implementation/fixing. 2. Create your own fork of `delira` 3. In your own fork, start a new branch for the implementation of your issue. Make sure to include basic unittests (We know, that the current code is not that well tested so far, but we want to change this in future). > **Note:** To improve readability and maintainability, [PEP8 Style](https://www.python.org/dev/peps/pep-0008/) should always be followed (no exceptions). > **Note:** To ensure our CI/CD running correctly, you should *never* use relative imports but absolute ones. > **Note:** If you added a feature, you should also add it to the documentation 4. After finishing the coding part, send a pull request to [this repo] 5. Afterwards, have a look at your pull request since we might suggest some changes. If you are not familiar with creating a Pull Request, here are some guides: - http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request - https://help.github.com/articles/creating-a-pull-request/ ## Development Install To develop `delira` on your machine, here are some tips: 1. Uninstall all existing installs of `delira`: ``` conda uninstall delira pip uninstall delira pip uninstall delira # run this command twice ``` 2. Clone a copy of `delira` from source: ``` git clone https://github.com/justusschock/delira.git cd delira ``` 3. Install `delira` in `build develop` mode: Install it via ``` python setup.py build develop ``` or ``` pip install -e . ``` This mode will symlink the python files from the current local source tree into the python install. Hence, if you modify a python file, you do not need to reinstall `delira` again and again In case you want to reinstall, make sure that you uninstall `delira` first by running `pip uninstall delira` and `python setup.py clean`. Then you can install in `build develop` mode again. ## Unit testing Unittests are located under `test/`. Run the entire test suite with ``` python test/run_test.py ``` or run individual test files, like `python test/test_dummy.py`, for individual test suites. ### Better local unit tests with unittest Testing is done with a `unittest` suite ## Writing documentation `delira` uses [numpy style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html) for formatting docstrings. Length of line inside docstrings block must be limited to 80 characters to fit into Jupyter documentation popups. [this repo]: https://github.com/delira-dev/delira ================================================ FILE: LICENSE ================================================ GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License. Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . ================================================ FILE: MANIFEST.in ================================================ include requirements/*.txt include *.md include LICENSE include notebooks/*.ipynb include setup.cfg include versioneer.py include delira/_version.py ================================================ FILE: README.md ================================================ [](https://join.slack.com/t/deliradev/shared_invite/enQtNjI1MjA4MjQzMzQ2LTUzNTQ0MjQyNjJjNzgyODczY2Y1YjYxNjA3ZmQ0MGFhODhkYzQ4M2RjMGM1YWM3YWU5MDM0ZjdiNTQ4MmQ0ZDk) [![PyPI version](https://badge.fury.io/py/delira.svg)](https://badge.fury.io/py/delira) [![Build Status](https://travis-ci.com/delira-dev/delira.svg?branch=master)](https://travis-ci.com/delira-dev/delira) [![Documentation Status](https://readthedocs.org/projects/delira/badge/?version=master)](https://delira.readthedocs.io/en/master/?badge=master) [![codecov](https://codecov.io/gh/justusschock/delira/branch/master/graph/badge.svg)](https://codecov.io/gh/delira-dev/delira) [![DOI](http://joss.theoj.org/papers/10.21105/joss.01488/status.svg)](https://doi.org/10.21105/joss.01488) ![logo](docs/_static/logo/delira.svg "delira - A Backend Agnostic High Level Deep Learning Library") # delira - A Backend Agnostic High Level Deep Learning Library Authors: [Justus Schock, Michael Baumgartner, Oliver Rippel, Christoph Haarburger](AUTHORS.rst) Copyright (C) 2020 by RWTH Aachen University http://www.rwth-aachen.de License: This software is dual-licensed under: • Commercial license (please contact: lfb@lfb.rwth-aachen.de) • AGPL (GNU Affero General Public License) open source license ## Introduction `delira` is designed to work as a backend agnostic high level deep learning library. You can choose among several computation [backends](#choose-backend). It allows you to compare different models written for different backends without rewriting them. For this case, `delira` couples the entire training and prediction logic in backend-agnostic modules to achieve identical behavior for training in all backends. `delira` is designed in a very modular way so that almost everything is easily exchangeable or customizable. A (non-comprehensive) list of the features included in `delira`: * Dataset loading * Dataset sampling * Augmentation (multi-threaded) including 3D images with any number of channels (based on [`batchgenerators`](https://github.com/MIC-DKFZ/batchgenerators)) * A generic trainer class that implements the training process for all [backends](#choose-backend) * Training monitoring using [Visdom](https://github.com/facebookresearch/visdom) or [Tensorboard](https://www.tensorflow.org/guide/summaries_and_tensorboard) * Model save and load functions * Already impelemented Datasets * Many operations and utilities for medical imaging ## What about the name? `delira` started as a library to enable deep learning research and fast prototyping in medical imaging (especially in radiology). That's also where the name comes from: `delira` was an acronym for **DE**ep **L**earning **I**n **RA**diology*. To adapt many other use cases we changed the framework's focus quite a bit, although we are still having many medical-related utilities and are working on constantly factoring them out. ## Installation ### Choose Backend You may choose a backend from the list below. If your desired backend is not listed and you want to add it, please open an issue (it should not be hard at all) and we will guide you during the process of doing so. | Backend | Binary Installation | Source Installation | Notes | |-----------------------------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------| | None | `pip install delira` | `pip install git+https://github.com/delira-dev/delira.git` | Training not possible if backend is not installed separately | | [`torch`](https://pytorch.org) | `pip install delira[torch]` | `git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[torch]` | `delira` with `torch` backend supports mixed-precision training via [NVIDIA/apex](https://github.com/NVIDIA/apex.git) (must be installed separately). | | [`torchscript`](https://pytorch.org/docs/stable/jit.html) | `pip install delira[torchscript]` | `git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[torchscript]` | The `torchscript` backend currently supports only single-GPU-training | | [`tensorflow eager`](https://www.tensorflow.org/) | `pip install delira[tensorflow]` | `git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[tensorflow]` | the `tensorflow` backend is still very experimental and lacks some [features](https://github.com/delira-dev/delira/issues/47) | | [`tensorflow graph`](https://www.tensorflow.org/) | `pip install delira[tensorflow]` | `git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[tensorflow]` | the `tensorflow` backend is still very experimental and lacks some [features](https://github.com/delira-dev/delira/issues/47) | | [`scikit-learn`](https://scikit-learn.org/stable/) | `pip install delira` | `pip install git+https://github.com/delira-dev/delira.git` | / | | [`chainer`](https://chainer.org/) | `pip install delira[chainer]` | `git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[chainer]` | / | Full | `pip install delira[full]` | `git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[full]` | All backends will be installed. | ### Docker The easiest way to use `delira` is via docker (with the [nvidia-runtime](https://github.com/NVIDIA/nvidia-docker) for GPU-support) and using the [Dockerfile](docker/Dockerfile) or the [prebuild-images](https://cloud.docker.com/u/justusschock/repository/docker/justusschock/delira). ### Chat We have a [community chat on slack](https://deliradev.slack.com). If you need an invitation, just follow [this link](https://join.slack.com/t/deliradev/shared_invite/enQtNjI1MjA4MjQzMzQ2LTUzNTQ0MjQyNjJjNzgyODczY2Y1YjYxNjA3ZmQ0MGFhODhkYzQ4M2RjMGM1YWM3YWU5MDM0ZjdiNTQ4MmQ0ZDk). ## Getting Started The best way to learn how to use is to have a look at the [tutorial notebook](notebooks/tutorial_delira.ipynb). Example implementations for classification problems, segmentation approaches and GANs are also provided in the [notebooks](notebooks) folder. ## Documentation The docs are hosted on [ReadTheDocs/Delira](https://delira.rtfd.io). The documentation of the latest master branch can always be found at the project's [github page](https://delira-dev.github.io/delira/). ## Contributing If you find a bug or have an idea for an improvement, please have a look at our [contribution guideline](CONTRIBUTING.md). ================================================ FILE: delira/__init__.py ================================================ from delira._debug_mode import get_current_debug_mode, switch_debug_mode, \ set_debug_mode from delira._backends import get_backends, seed_all from ._version import get_versions as _get_versions import warnings warnings.simplefilter('default', DeprecationWarning) warnings.simplefilter('ignore', ImportWarning) __version__ = _get_versions()['version'] del _get_versions ================================================ FILE: delira/_backends.py ================================================ import os import json from delira._version import get_versions as _get_versions # to register new possible backends, they have to be added to this list. # each backend should consist of a tuple of length 2 with the first entry # being the package import name and the second being the backend abbreviation. # E.g. TensorFlow's package is named 'tensorflow' but if the package is found, # it will be considered as 'tf' later on __POSSIBLE_BACKENDS = (("torch", "torch"), ("tensorflow", "tf"), ("chainer", "chainer"), ("sklearn", "sklearn")) __BACKENDS = () def _determine_backends(): """ Internal Helper Function to determine the currently valid backends by trying to import them. The valid backends are not returned, but appended to the global ``__BACKENDS`` variable """ _config_file = __file__.replace("_backends.py", ".delira") # look for config file to determine backend # if file exists: load config into environment variables if not os.path.isfile(_config_file): _backends = {} # try to import all possible backends to determine valid backends import importlib for curr_backend in __POSSIBLE_BACKENDS: try: assert len(curr_backend) == 2 assert all([isinstance(_tmp, str) for _tmp in curr_backend]), \ "All entries in current backend must be strings" # check if backend can be imported bcknd = importlib.util.find_spec(curr_backend[0]) if bcknd is not None: _backends[curr_backend[1]] = True else: _backends[curr_backend[1]] = False del bcknd except ValueError: _backends[curr_backend[1]] = False with open(_config_file, "w") as f: json.dump({"version": _get_versions()['version'], "backend": _backends}, f, sort_keys=True, indent=4) del _backends # set values from config file to variable and empty Backend-List before global __BACKENDS __BACKENDS = [] with open(_config_file) as f: _config_dict = json.load(f) for key, val in _config_dict.pop("backend").items(): if val: __BACKENDS.append(key.upper()) del _config_dict del _config_file # make __BACKENDS non mutable __BACKENDS = tuple(__BACKENDS) def get_backends(): """ Return List of currently available backends Returns ------- list list of strings containing the currently installed backends """ global __BACKENDS if not __BACKENDS: _determine_backends() return __BACKENDS def seed_all(seed): """ Helper Function to seed all available backends Parameters ---------- seed : int the new random seed """ import sys import numpy as np np.random.seed(seed) import random random.seed = seed if "torch" in sys.modules and "TORCH" in get_backends(): import torch torch.random.manual_seed(seed) elif "tensorflow" in sys.modules and "TF" in get_backends(): import tensorflow as tf tf.random.set_random_seed(seed) elif "chainer" in sys.modules and "CHAINER" in get_backends(): try: import cupy cupy.random.seed(seed) except ImportError: pass ================================================ FILE: delira/_debug_mode.py ================================================ __DEBUG_MODE = False # Functions to get and set the internal __DEBUG_MODE variable. This variable # currently only defines whether to use multiprocessing or not. At the moment # this is only used inside the DataManager, which either returns a # MultiThreadedAugmenter or a SingleThreadedAugmenter depending on the current # debug mode. # All other functions using multiprocessing should be aware of this and # implement a functionality without multiprocessing # (even if this slows down things a lot!). def get_current_debug_mode(): """ Getter function for the current debug mode Returns ------- bool current debug mode """ return __DEBUG_MODE def switch_debug_mode(): """ Alternates the current debug mode """ set_debug_mode(not get_current_debug_mode()) def set_debug_mode(mode: bool): """ Sets a new debug mode Parameters ---------- mode : bool the new debug mode """ global __DEBUG_MODE __DEBUG_MODE = mode ================================================ FILE: delira/_version.py ================================================ # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.18 (https://github.com/warner/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "$Format:%d$" git_full = "$Format:%H$" git_date = "$Format:%ci$" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" cfg.parentdir_prefix = "" cfg.versionfile_source = "delira/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, p.returncode return stdout, p.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ================================================ FILE: delira/data_loading/__init__.py ================================================ # basic imports from delira.data_loading.data_loader import DataLoader from delira.data_loading.dataset import AbstractDataset, IterableDataset, \ DictDataset, BaseCacheDataset, BaseExtendCacheDataset, BaseLazyDataset, \ ConcatDataset from delira.data_loading.augmenter import Augmenter from delira.data_loading.data_manager import DataManager from delira.data_loading.load_utils import LoadSample, LoadSampleLabel from delira.data_loading.sampler import * from delira import get_backends as _get_backends # if numba is installed: Import Numba Transforms try: from delira.data_loading.numba_transform import NumbaTransform, \ NumbaTransformWrapper, NumbaCompose except ImportError: pass ================================================ FILE: delira/data_loading/augmenter.py ================================================ import multiprocessing from multiprocessing import connection as mpconnection from collections import Callable import abc import os import sys import numpy as np import random from delira.data_loading.sampler import AbstractSampler, BatchSampler from delira.data_loading.data_loader import DataLoader from delira import get_current_debug_mode class AbstractAugmenter(object): """ Basic Augmenter Class providing a general Augmenter API """ def __init__( self, data_loader, batchsize, sampler, transforms=None, seed=1, drop_last=False): """ Parameters ---------- data_loader : :class:`DataLoader` the dataloader, loading samples for given indices batchsize : int the batchsize to use for sampling sampler : :class:`AbstractSampler` the sampler_old (may be batch sampler_old or usual sampler_old), defining the actual sampling strategy; Is an iterable yielding indices transforms : :class:`collections.Callable` the transforms to apply; defaults to None seed : int the basic seed; default: 1 drop_last : bool whether to drop the last (possibly smaller) batch or not """ self._data_loader = data_loader if not isinstance(sampler, BatchSampler): if isinstance(sampler, AbstractSampler): sampler = BatchSampler(sampler, batchsize, drop_last=drop_last) else: raise TypeError("Invalid Sampler given: %s" % str(sampler)) self._sampler = sampler self._drop_last = drop_last self._transforms = transforms self._seed = seed # seed numpy.random and random as these are the random number # generators, which might be used for sampling np.random.seed(seed) random.seed(seed) @abc.abstractmethod def __iter__(self): raise NotImplementedError class _ParallelAugmenter(AbstractAugmenter): """ An Augmenter that loads and augments multiple batches in parallel """ def __init__(self, data_loader, batchsize, sampler, num_processes=None, transforms=None, seed=1, drop_last=False): """ Parameters ---------- data_loader : :class:`DataLoader` the dataloader, loading samples for given indices batchsize : int the batchsize to use for sampling sampler : :class:`AbstractSampler` the sampler_old (may be batch sampler_old or usual sampler_old), defining the actual sampling strategy; Is an iterable yielding indices num_processes : int the number of processes to use for dataloading + augmentation; if None: the number of available CPUs will be used as number of processes transforms : :class:`collections.Callable` the transforms to apply; defaults to None seed : int the basic seed; default: 1 drop_last : bool whether to drop the last (possibly smaller) batch or not """ super().__init__(data_loader, batchsize, sampler, transforms, seed, drop_last) if num_processes is None: num_processes = os.cpu_count() self._num_processes = num_processes self._processes = [] self._index_pipes = [] self._data_pipes = [] self._index_pipe_counter = 0 self._data_pipe_counter = 0 self._abort_event = None self._data_queued = [] self._processes_running = False @property def abort_event(self): """ Property to access the abortion Event Returns ------- :class:`multiprocessing.Event` the abortion event """ return self._abort_event @abort_event.setter def abort_event(self, new_event): """ Setter for the abortion Event; Parameters ---------- new_event : class:`multiprocessing.Event` the new event """ self._abort_event = new_event def _start_processes(self): """ Starts new processes and pipes for interprocess communication """ # reset abortion event self.abort_event = multiprocessing.Event() # for each process do: for i in range(self._num_processes): # start two oneway pipes (one for passing index to workers # and one for passing back data to main process) recv_conn_out, send_conn_out = multiprocessing.Pipe(duplex=False) recv_conn_in, send_conn_in = multiprocessing.Pipe(duplex=False) # create the actual process process = _WorkerProcess(dataloader=self._data_loader, output_pipe=send_conn_out, index_pipe=recv_conn_in, transforms=self._transforms, abort_event=self._abort_event, process_id=i) process.daemon = True process.start() # wait until process was created and started while not process.is_alive(): pass # append process and pipes to list self._processes.append(process) self._index_pipes.append(send_conn_in), self._data_pipes.append(recv_conn_out) self._data_queued.append(0) self._processes_running = True def _shutdown_processes(self): """ Shuts down the processes and resets all related flags and counters """ # create copy to avoid modifying the list we iterate over worker = list( zip(self._data_pipes, self._index_pipes, self._processes)) for _data_conn, _index_conn, _process in worker: _index_conn.send(None) _process.join() if sys.version_info >= (3, 7): _process.close() else: _process.terminate() _index_conn.close() _data_conn.close() self._data_pipes.pop() self._data_queued.pop() self._index_pipes.pop() self._processes.pop() # reset running process flag and counters self._processes_running = False self._data_pipe_counter = 0 self._index_pipe_counter = 0 @property def _next_index_pipe(self): """ Property implementing switch to next index pipe """ ctr = self._index_pipe_counter new_ctr = (self._index_pipe_counter + 1) % self._num_processes self._index_pipe_counter = new_ctr return ctr @property def _next_data_pipe(self): """ Property implementing switch to next data pipe """ ctr = self._data_pipe_counter new_ctr = (self._data_pipe_counter + 1) % self._num_processes self._data_pipe_counter = new_ctr return ctr def _enqueue_indices(self, sample_idxs): """ Enqueues a set of indices to workers while iterating over workers in cyclic way Parameters ---------- sample_idxs : list the indices to enqueue to the workers """ # iterating over all batch indices for idxs in sample_idxs: # switch to next counter index_pipe_ctr = self._next_index_pipe # increase number of queued batches for current worker self._data_queued[index_pipe_ctr] += 1 # enqueue indices to worker self._index_pipes[index_pipe_ctr].send(idxs) def _receive_data(self): """ Receives data from worker """ # switching to next worker _data_pipe = self._next_data_pipe # receive data from worker data = self._data_pipes[_data_pipe].recv() # decrease number of enqueued batches for current worker self._data_queued[_data_pipe] -= 1 return data def __iter__(self): self._start_processes() sampler_iter = iter(self._sampler) all_sampled = False try: # start by enqueuing two items per process as buffer _indices = [] try: for i in range(self._num_processes * 2): idxs = next(sampler_iter) _indices.append(idxs) except StopIteration: all_sampled = True self._enqueue_indices(_indices) # iterate while not all data has been sampled and any data is # enqueued while True: if self.abort_event.is_set(): raise RuntimeError("Abort Event was set in one of the " "workers") # enqueue additional indices if sampler_old was not already # exhausted try: if not all_sampled: idxs = next(sampler_iter) self._enqueue_indices([idxs]) except StopIteration: all_sampled = True # receive data from workers if any(self._data_queued): yield self._receive_data() else: break except Exception as e: # set abort event to shutdown workers self._abort_event.set() raise e finally: if self._processes_running: self._shutdown_processes() class _WorkerProcess(multiprocessing.Process): """ A Process running an infinite loop of loading data for given indices """ def __init__(self, dataloader: DataLoader, output_pipe: mpconnection.Connection, index_pipe: mpconnection.Connection, abort_event: multiprocessing.Event, transforms: Callable, process_id): """ Parameters ---------- dataloader : :class:`DataLoader` the data loader which loads the data corresponding to the given indices output_pipe : :class:`multiprocessing.connection.Connection` the pipe, the loaded data shoud be sent to index_pipe : :class:`multiprocessing.connection.Connection` the pipe to accept the indices abort_event : class:`multiprocessing.Event` the abortion event; will be set for every Exception; If set: Worker terminates transforms : :class:`collections.Callable` the transforms to transform the data process_id : int the process id """ super().__init__() self._data_loader = dataloader self._output_pipe = output_pipe self._input_pipe = index_pipe self._abort_event = abort_event self._process_id = process_id self._transforms = transforms def run(self) -> None: # set the process id self._data_loader.process_id = self._process_id try: while True: # check if worker should terminate if self._abort_event.is_set(): raise RuntimeError("Abort Event has been set externally") # get indices if available (with timeout to frequently check # for abortions if self._input_pipe.poll(timeout=0.2): idxs = self._input_pipe.recv() # final indices -> shutdown workers if idxs is None: break # load data data = self._data_loader(idxs) # if self._transforms is not None: data = self._transforms(**data) self._output_pipe.send(data) except Exception as e: self._abort_event.set() raise e class _SequentialAugmenter(AbstractAugmenter): """ An Augmenter that loads and augments batches sequentially without any parallelism """ def __init__( self, data_loader, batchsize, sampler, transforms=None, seed=1, drop_last=False): """ Parameters ---------- data_loader : :class:`DataLoader` the dataloader, loading samples for given indices sampler : :class:`AbstractSampler` the sampler_old (may be batch sampler_old or usual sampler_old), defining the actual sampling strategy; Is an iterable yielding indices transforms : :class:`collections.Callable` the transforms to apply; defaults to None seed : int the basic seed; default: 1 drop_last : bool whether to drop the last (possibly smaller) batch or not """ super().__init__(data_loader=data_loader, batchsize=batchsize, sampler=sampler, transforms=transforms, seed=seed, drop_last=drop_last) def __iter__(self): # create sampler_old iterator sampler_iter = iter(self._sampler) # for every index load and augment the data for idxs in sampler_iter: # load data data = self._data_loader(idxs) # transform data if transforms given if self._transforms is not None: data = self._transforms(**data) yield data class Augmenter(object): """ The actual Augmenter wrapping the :class:`_SequentialAugmenter` and the :class:`_ParallelAugmenter` and switches between them by arguments and debug mode """ def __init__(self, data_loader, batchsize, sampler, num_processes=None, transforms=None, seed=1, drop_last=False): """ Parameters ---------- data_loader : :class:`DataLoader` the dataloader, loading samples for given indices sampler : :class:`AbstractSampler` the sampler_old (may be batch sampler_old or usual sampler_old), defining the actual sampling strategy; Is an iterable yielding indices num_processes : int the number of processes to use for dataloading + augmentation; if None: the number of available CPUs will be used as number of processes transforms : :class:`collections.Callable` the transforms to apply; defaults to None seed : int the basic seed; default: 1 drop_last : bool whether to drop the last (possibly smaller) batch or not """ self._augmenter = self._resolve_augmenter_cls(num_processes, data_loader=data_loader, batchsize=batchsize, sampler=sampler, transforms=transforms, seed=seed, drop_last=drop_last) @staticmethod def _resolve_augmenter_cls(num_processes, **kwargs): """ Resolves the augmenter class by the number of specified processes and the debug mode and creates an instance of the chosen class Parameters ---------- num_processes : int the number of processes to use for dataloading + augmentation; if None: the number of available CPUs will be used as number of processes **kwargs : additional keyword arguments, used for instantiation of the chosen class Returns ------- :class:`AbstractAugmenter` an instance of the chosen augmenter class """ if get_current_debug_mode() or num_processes == 0: return _SequentialAugmenter(**kwargs) return _ParallelAugmenter(num_processes=num_processes, **kwargs) def __iter__(self): """ Makes the Augmenter iterable by generators Returns ------- Generator a generator function yielding the arguments """ yield from self._augmenter ================================================ FILE: delira/data_loading/data_loader.py ================================================ import numpy as np from delira.data_loading.dataset import AbstractDataset, DictDataset, \ IterableDataset from collections import Iterable, defaultdict class DataLoader: """ Basic Dataloader class, that returns data for a given set of indices and combines it as batches """ def __init__(self, data): """ Parameters ---------- data : Any the data to use; Ideally this either is a dataset, an iterable or a dict, but in general, this must only be indexable, have a length and return a dict of arrays if indexed """ self._process_id = None if isinstance(data, AbstractDataset): dataset = data else: # wrap it into dataset depending on datatype if isinstance(data, dict): dataset = DictDataset(data) elif isinstance(data, Iterable): dataset = IterableDataset(data) else: raise TypeError("Invalid dataset type: %s" % type(data).__name__) self.dataset = dataset def __call__(self, indices): """ Loads data for given indices and combines them to batches Parameters ---------- indices : list a list of integers specifying the data indices Returns ------- dict a dict of numpy arrays (specifying the batches) """ # get data for all indices data = [self.dataset[idx] for idx in indices] data_dict = defaultdict(list) # concatenate dict entities by keys for _result_dict in data: for key, val in _result_dict.items(): data_dict[key].append(val) # convert list to numpy arrays for key, val_list in data_dict.items(): data_dict[key] = np.asarray(val_list) return data_dict @property def process_id(self): """ A Property to access the process id Returns ------- int the process id """ if self._process_id is None: return 0 return self._process_id @process_id.setter def process_id(self, new_id): """ Setter for the :attr:`process_id`; Makes sure, that the process id is only set once Parameters ---------- new_id : int Raises ------ AttributeError if the process id has already been set once """ if self._process_id is not None: raise AttributeError("Attribute 'process_id' can be set only once") self._process_id = new_id ================================================ FILE: delira/data_loading/data_manager.py ================================================ import logging from batchgenerators.transforms import AbstractTransform from delira import get_current_debug_mode from delira.data_loading.data_loader import DataLoader from delira.data_loading.sampler import SequentialSampler, AbstractSampler from delira.data_loading.augmenter import Augmenter from delira.data_loading.dataset import DictDataset, IterableDataset, \ AbstractDataset from collections import Iterable import inspect logger = logging.getLogger(__name__) class DataManager(object): """ Class to Handle Data Creates Dataset (if necessary), Dataloader and Augmenter """ def __init__(self, data, batch_size, n_process_augmentation, transforms, sampler_cls=SequentialSampler, drop_last=False, data_loader_cls=None, **sampler_kwargs): """ Parameters ---------- data : str or Dataset if str: Path to data samples if dataset: Dataset batch_size : int Number of samples per batch n_process_augmentation : int Number of processes for augmentations transforms : Data transformations for augmentation sampler_cls : AbstractSampler class defining the sampling strategy drop_last : bool whether to drop the last (possibly smaller) batch data_loader_cls : subclass of SlimDataLoaderBase DataLoader class **sampler_kwargs : other keyword arguments (passed to sampler_cls) Raises ------ AssertionError ``data_loader_cls`` is not :obj:`None` and not a subclass of `DataLoader` TypeError ``data`` is not a Dataset object and not of type dict or iterable See Also -------- :class:`AbstractDataset` """ # Instantiate Hidden variables for property access if sampler_kwargs is None: sampler_kwargs = {} self._batch_size = None self._n_process_augmentation = None self._transforms = None self._data_loader_cls = None self._sampler = None self.drop_last = drop_last # set actual values to properties self.batch_size = batch_size self.n_process_augmentation = n_process_augmentation self.transforms = transforms if data_loader_cls is None: logger.info("No dataloader Class specified. Using DataLoader") data_loader_cls = DataLoader else: if not inspect.isclass(data_loader_cls): raise TypeError( "data_loader_cls must be class not instance of class") if not issubclass(data_loader_cls, DataLoader): raise TypeError( "data_loader_cls must be subclass of DataLoader") self.data_loader_cls = data_loader_cls self.data = data if not (inspect.isclass(sampler_cls) and issubclass(sampler_cls, AbstractSampler)): raise TypeError self.sampler_cls = sampler_cls self.sampler_kwargs = sampler_kwargs def get_batchgen(self, seed=1): """ Create DataLoader and Batchgenerator Parameters ---------- seed : int seed for Random Number Generator Returns ------- Augmenter The actual iterable batchgenerator Raises ------ AssertionError :attr:`DataManager.n_batches` is smaller than or equal to zero """ assert self.n_batches > 0 data_loader = self.data_loader_cls( self.data ) sampler = self.sampler_cls.from_dataset(data_loader.dataset, **self.sampler_kwargs) return Augmenter(data_loader=data_loader, batchsize=self.batch_size, sampler=sampler, num_processes=self.n_process_augmentation, transforms=self.transforms, seed=seed, drop_last=self.drop_last ) def get_subset(self, indices): """ Returns a Subset of the current datamanager based on given indices Parameters ---------- indices : iterable valid indices to extract subset from current dataset Returns ------- :class:`DataManager` manager containing the subset """ subset_kwargs = { "batch_size": self.batch_size, "n_process_augmentation": self.n_process_augmentation, "transforms": self.transforms, "sampler_cls": self.sampler_cls, "data_loader_cls": self.data_loader_cls, "drop_last": self.drop_last, **self.sampler_kwargs } return self.__class__( self.data.get_subset(indices), **subset_kwargs) def update_state_from_dict(self, new_state: dict): """ Updates internal state and therefore the behavior from dict. If a key is not specified, the old attribute value will be used Parameters ---------- new_state : dict The dict to update the state from. Valid keys are: * ``batch_size`` * ``n_process_augmentation`` * ``data_loader_cls`` * ``sampler_cls`` * ``sampler_kwargs`` * ``transforms`` If a key is not specified, the old value of the corresponding attribute will be used Raises ------ KeyError Invalid keys are specified """ # update batch_size if specified self.batch_size = new_state.pop("batch_size", self.batch_size) # update n_process_augmentation if specified self.n_process_augmentation = new_state.pop( "n_process_augmentation", self.n_process_augmentation) # update data_loader_cls if specified self.data_loader_cls = new_state.pop("data_loader_cls", self.data_loader_cls) # update sampler self.sampler_cls = new_state.pop("sampler_cls", self.sampler_cls) self.sampler_kwargs = new_state.pop("sampler_kwargs", self.sampler_kwargs) self.transforms = new_state.pop("transforms", self.transforms) if new_state: raise KeyError("Invalid Keys in new_state given: %s" % (','.join(map(str, new_state.keys())))) @property def batch_size(self): """ Property to access the batchsize Returns ------- int the batchsize """ return self._batch_size @batch_size.setter def batch_size(self, new_batch_size): """ Setter for current batchsize, casts to int before setting the attribute Parameters ---------- new_batch_size : int, Any the new batchsize; should be int but can be of any type that can be casted to an int """ self._batch_size = int(new_batch_size) @property def n_process_augmentation(self): """ Property to access the number of augmentation processes Returns ------- int number of augmentation processes """ if get_current_debug_mode(): return 0 return self._n_process_augmentation @n_process_augmentation.setter def n_process_augmentation(self, new_process_number): """ Setter for number of augmentation processes, casts to int before setting the attribute Parameters ---------- new_process_number : int, Any new number of augmentation processes; should be int but can be of any type that can be casted to an int """ self._n_process_augmentation = int(new_process_number) @property def transforms(self): """ Property to access the current data transforms Returns ------- None, ``AbstractTransform`` The transformation, can either be None or an instance of ``AbstractTransform`` """ return self._transforms @transforms.setter def transforms(self, new_transforms): """ Setter for data transforms, assert if transforms are of valid type (either None or instance of ``AbstractTransform``) Parameters ---------- new_transforms : None, ``AbstractTransform`` the new transforms """ if new_transforms is not None and not isinstance( new_transforms, AbstractTransform): raise TypeError self._transforms = new_transforms @property def data_loader_cls(self): """ Property to access the current data loader class Returns ------- type Subclass of ``DataLoader`` """ return self._data_loader_cls @data_loader_cls.setter def data_loader_cls(self, new_loader_cls): """ Setter for current data loader class, asserts if class is of valid type (must be a class and a subclass of ``DataLoader``) Parameters ---------- new_loader_cls : type the new data loader class """ if not inspect.isclass(new_loader_cls) and issubclass( new_loader_cls, DataLoader): raise TypeError self._data_loader_cls = new_loader_cls @property def n_samples(self): """ Number of Samples Returns ------- int Number of Samples """ return len(self.dataset) @property def n_batches(self): """ Returns Number of Batches based on batchsize and number of samples Returns ------- int Number of Batches Raises ------ AssertionError :attr:`DataManager.n_samples` is smaller than or equal to zero """ assert self.n_samples > 0 n_batches = self.n_samples // self.batch_size truncated_batch = self.n_samples % self.batch_size n_batches += int(bool(truncated_batch) and not self.drop_last) return n_batches @property def dataset(self): return self.data @dataset.setter def dataset(self, new_dset): if not isinstance(new_dset, AbstractDataset): raise TypeError self.data = new_dset def __iter__(self): """ Build-In function to create an iterator. First creates an :class:`Augmenter` and afterwards an iterable for the created augmenter, which is then returned Returns ------- Generator object generator object to iterate over the augmented batches """ return iter(self.get_batchgen()) ================================================ FILE: delira/data_loading/dataset.py ================================================ import abc import os import typing import numpy as np from skimage.transform import resize from sklearn.model_selection import train_test_split from collections import Iterable from tqdm import tqdm from delira.utils import subdirs class AbstractDataset: """ Base Class for Dataset """ def __init__(self, data_path: str, load_fn: typing.Callable): """ Parameters ---------- data_path : str path to data samples load_fn : function function to load single sample """ self.data_path = data_path self._load_fn = load_fn self.data = [] @abc.abstractmethod def _make_dataset(self, path: str): """ Create dataset Parameters ---------- path : str path to data samples Returns ------- list data: List of sample paths if lazy; List of samples if not """ pass @abc.abstractmethod def __getitem__(self, index): """ return data with given index (and loads it before if lazy) Parameters ---------- index : int index of data Returns ------- dict data """ pass def __len__(self): """ Return number of samples Returns ------- int number of samples """ return len(self.data) def __iter__(self): """ Return an iterator for the dataset Returns ------- object a single sample """ return _DatasetIter(self) def get_sample_from_index(self, index): """ Returns the data sample for a given index (without any loading if it would be necessary) This implements the base case and can be subclassed for index mappings. The actual loading behaviour (lazy or cached) should be implemented in ``__getitem__`` See Also -------- :method:ConcatDataset.get_sample_from_index :method:BaseLazyDataset.__getitem__ :method:BaseCacheDataset.__getitem__ Parameters ---------- index : int index corresponding to targeted sample Returns ------- Any sample corresponding to given index """ return self.data[index] def get_subset(self, indices): """ Returns a Subset of the current dataset based on given indices Parameters ---------- indices : iterable valid indices to extract subset from current dataset Returns ------- :class:`BlankDataset` the subset """ # extract other important attributes from current dataset kwargs = {} for key, val in vars(self).items(): if not (key.startswith("__") and key.endswith("__")): if key == "data": continue kwargs[key] = val kwargs["old_getitem"] = self.__class__.__getitem__ subset_data = [self.get_sample_from_index(idx) for idx in indices] return BlankDataset(subset_data, **kwargs) class _DatasetIter(object): """ Iterator for dataset """ def __init__(self, dset): """ Parameters ---------- dset: :class: `AbstractDataset` the dataset which should be iterated """ self._dset = dset self._curr_index = 0 def __iter__(self): return self def __next__(self): if self._curr_index >= len(self._dset): raise StopIteration sample = self._dset[self._curr_index] self._curr_index += 1 return sample class DictDataset(AbstractDataset): """ Dataset to wrap a dict of keys and iterables. """ def __init__(self, data: dict): """ Parameters ---------- data : dict dictionary consisting of keys and iterables. The iterables should contain an item for each index """ super().__init__(None, None) self._data = data def __getitem__(self, index: int): """ Function to make the dataset indexable. Returns the sample corresponding to the given index Parameters ---------- index : int the index specifying the sample to return Returns ------- dict the sample corresponding to :param:`index` """ return {k: v[index] for k, v in self._data.items()} def get_sample_from_index(self, index): """ Mapping from index to sample Parameters ---------- index : int the index specifying the sample to return Returns ------- dict the sample corresponding to :param:`index` """ return self[index] def _make_dataset(self, path: str): """ Function to create the dataset (not necessary here, since the data is already in memory) Parameters ---------- path : str the path to load the data from """ pass def __len__(self): """ Function to determine the dataset's length Returns ------- int the number of samples """ return min([len(v) for v in self._data.values()]) class IterableDataset(AbstractDataset): """ Dataset to wrap a list of dicts. """ def __init__(self, data: Iterable): """ Parameters ---------- data : Iterable an iterable of dicts each representing a single sample """ super().__init__(None, None) self._data = data def __getitem__(self, index): """ Function to make the dataset indexable. Returns the sample corresponding to the given index Parameters ---------- index : int the index specifying the sample to return Returns ------- dict the sample corresponding to :param:`index` """ return self._data[index] def get_sample_from_index(self, index): """ Mapping from index to sample Parameters ---------- index : int the index specifying the sample to return Returns ------- dict the sample corresponding to :param:`index` """ return self[index] def _make_dataset(self, path: str): """ Function to create the dataset (not necessary here, since the data is already in memory) Parameters ---------- path : str the path to load the data from """ pass def __len__(self): """ Function to determine the dataset's length Returns ------- int the number of samples """ return len(self._data) class BlankDataset(AbstractDataset): """ Blank Dataset loading the data, which has been passed in it's ``__init__`` by it's ``_sample_fn`` """ def __init__(self, data, old_getitem, **kwargs): """ Parameters ---------- data : iterable data to load old_getitem : function get item method of previous dataset **kwargs : additional keyword arguments (are set as class attribute) """ super().__init__(None, None) self.data = data self._old_getitem = old_getitem for key, val in kwargs.items(): setattr(self, key, val) def __getitem__(self, index): """ returns single sample corresponding to ``index`` via the ``_sample_fn`` Parameters ---------- index : int index specifying the data to load Returns ------- dict dictionary containing a single sample """ return self._old_getitem(self, index) def __len__(self): """ returns the length of the dataset Returns ------- int number of samples """ return len(self.data) class BaseCacheDataset(AbstractDataset): """ Dataset to preload and cache data Notes ----- data needs to fit completely into RAM! """ def __init__(self, data_path: typing.Union[str, list], load_fn: typing.Callable, **load_kwargs): """ Parameters ---------- data_path : str or list if data_path is a string, _sample_fn is called for all items inside the specified directory if data_path is a list, _sample_fn is called for elements in the list load_fn : function function to load a single data sample **load_kwargs : additional loading keyword arguments (image shape, channel number, ...); passed to _sample_fn """ super().__init__(data_path, load_fn) self._load_kwargs = load_kwargs self.data = self._make_dataset(data_path) def _make_dataset(self, path: typing.Union[str, list]): """ Helper Function to make a dataset containing all samples in a certain directory Parameters ---------- path: str or list if data_path is a string, _sample_fn is called for all items inside the specified directory if data_path is a list, _sample_fn is called for elements in the list Returns ------- list list of items which where returned from _sample_fn (typically dict) Raises ------ AssertionError if `path` is not a list and is not a valid directory """ data = [] if isinstance(path, list): # iterate over all elements for p in tqdm(path, unit='samples', desc="Loading samples"): data.append(self._load_fn(p, **self._load_kwargs)) else: # call _sample_fn for all elements inside directory assert os.path.isdir(path), '%s is not a valid directory' % path for p in tqdm(os.listdir(path), unit='samples', desc="Loading samples"): data.append(self._load_fn(os.path.join(path, p), **self._load_kwargs)) return data def __getitem__(self, index): """ return data sample specified by index Parameters ---------- index : int index to specifiy which data sample to return Returns ------- dict data sample """ data_dict = self.get_sample_from_index(index) return data_dict class BaseLazyDataset(AbstractDataset): """ Dataset to load data in a lazy way """ def __init__(self, data_path: typing.Union[str, list], load_fn: typing.Callable, **load_kwargs): """ Parameters ---------- data_path : str or list if data_path is a string, _sample_fn is called for all items inside the specified directory if data_path is a list, _sample_fn is called for elements in the list load_fn : function function to load single data sample **load_kwargs : additional loading keyword arguments (image shape, channel number, ...); passed to _sample_fn """ super().__init__(data_path, load_fn) self._load_kwargs = load_kwargs self.data = self._make_dataset(self.data_path) def _make_dataset(self, path: typing.Union[str, list]): """ Helper Function to make a dataset containing paths to all images in a certain directory Parameters ---------- path : str or list path to data samples Returns ------- list list of sample paths Raises ------ AssertionError if `path` is not a valid directory """ if isinstance(path, list): # generate list from iterable data = list(path) else: # generate list from all items assert os.path.isdir(path), '%s is not a valid directory' % path data = [os.path.join(path, p) for p in os.listdir(path)] return data def __getitem__(self, index): """ load data sample specified by index Parameters ---------- index : int index to specifiy which data sample to load Returns ------- dict loaded data sample """ data_dict = self._load_fn(self.get_sample_from_index(index), **self._load_kwargs) return data_dict class BaseExtendCacheDataset(BaseCacheDataset): """ Dataset to preload and cache data. Function to load sample is expected to return an iterable which can contain multiple samples Notes ----- data needs to fit completely into RAM! """ def __init__(self, data_path: typing.Union[str, list], load_fn: typing.Callable, **load_kwargs): """ Parameters ---------- data_path : str or list if data_path is a string, _sample_fn is called for all items inside the specified directory if data_path is a list, _sample_fn is called for elements in the list load_fn : function function to load a multiple data samples at once. Needs to return an iterable which extends the internal list. **load_kwargs : additional loading keyword arguments (image shape, channel number, ...); passed to _sample_fn See Also -------- :class: `BaseCacheDataset` """ super().__init__(data_path, load_fn, **load_kwargs) def _make_dataset(self, path: typing.Union[str, list]): """ Helper Function to make a dataset containing all samples in a certain directory Parameters ---------- path: str or iterable if data_path is a string, _sample_fn is called for all items inside the specified directory if data_path is a list, _sample_fn is called for elements in the list Returns ------- list list of items which where returned from _sample_fn (typically dict) Raises ------ AssertionError if `path` is not a list and is not a valid directory """ data = [] if isinstance(path, list): # iterate over all elements for p in tqdm(path, unit='samples', desc="Loading samples"): data.extend(self._load_fn(p, **self._load_kwargs)) else: # call _sample_fn for all elements inside directory assert os.path.isdir(path), '%s is not a valid directory' % dir for p in tqdm(os.listdir(path), unit='samples', desc="Loading samples"): data.extend(self._load_fn(os.path.join(path, p), **self._load_kwargs)) return data class ConcatDataset(AbstractDataset): def __init__(self, *datasets): """ Concatenate multiple datasets to one Parameters ---------- datasets: variable number of datasets """ super().__init__(None, None) # TODO: Why should datasets[0] be a list not a AbstractDataset? # check if first item in datasets is list and datasets is of length 1 if (len(datasets) == 1) and isinstance(datasets[0], list): datasets = datasets[0] self.data = datasets def get_sample_from_index(self, index): """ Returns the data sample for a given index (without any loading if it would be necessary) This method implements the index mapping of a global index to the subindices for each dataset. The actual loading behaviour (lazy or cached) should be implemented in ``__getitem__`` See Also -------- :method:AbstractDataset.get_sample_from_index :method:BaseLazyDataset.__getitem__ :method:BaseCacheDataset.__getitem__ Parameters ---------- index : int index corresponding to targeted sample Returns ------- Any sample corresponding to given index """ curr_max_index = 0 for dset in self.data: prev_max_index = curr_max_index curr_max_index += len(dset) if prev_max_index <= index < curr_max_index: return dset[index - prev_max_index] else: continue raise IndexError("Index %d is out of range for %d items in datasets" % (index, len(self))) def __getitem__(self, index): return self.get_sample_from_index(index) def __len__(self): return sum([len(dset) for dset in self.data]) ================================================ FILE: delira/data_loading/load_utils.py ================================================ import collections import os import numpy as np from skimage.io import imread from skimage.transform import resize def norm_range(mode): """ Closure function for range normalization Parameters ---------- mode : str '-1,1' normalizes data to range [-1, 1], while '0,1' normalizes data to range [0, 1] Returns ------- callable normalization function """ def norm_fn(data): """ Returns the input data normalized to the range Parameters ---------- data : np.ndarray data which should be normalized Returns ------- np.ndarary normalized data """ norm = data - data.min() norm = norm / norm.max() if mode == '-1,1': norm = norm - 0.5 norm = norm * 2 elif mode == '0,1': pass else: raise ValueError('{mode} not supported.') return norm return norm_fn def norm_zero_mean_unit_std(data): """ Return normalized data with mean 0, standard deviation 1 Parameters ---------- data : np.nadarray Returns ------- np.ndarray normalized data """ return (data - np.mean(data)) / np.std(data) class LoadSample: """ Provides a callable to load a single sample from multiple files in a folder """ def __init__(self, sample_ext: dict, sample_fn: collections.abc.Callable, dtype: dict = None, normalize: tuple = (), norm_fn=norm_range('-1,1'), **kwargs): """ Parameters ---------- sample_ext : dict of iterable Defines the data _sample_ext. The dict key defines the position of the sample inside the returned data dict, while the list defines the the files which should be loaded inside the data dict. sample_fn : function function to load a single sample dtype : dict defines the data type which should be used for the respective key normalize : iterable of hashable list of hashable which should be normalized. Can contain entire keys of extension (normalizes each element individually) or provide the file name which should be normalized norm_fn : function function to normalize input. Default: normalize range to [-1, 1] kwargs : variable number of keyword arguments passed to load function Examples -------- Simple loading function which returns a dict with `data` >>> from delira.data_loading.nii import load_nii >>> load_fn = LoadSample({'data:': ['data.nii']}, load_nii) Loading function for data (casted to float32 and normalized) and segmentation (casted to unit8) >>> from delira.data_loading.nii import load_nii >>> load_fn = LoadSample({'data:': ['data.nii'], 'seg': ['seg.nii']}, >>> load_nii, dtype={'data': 'float32', >>> 'seg': 'uint8'}, >>> normalize=('data',)) """ if dtype is None: dtype = {} self._sample_ext = sample_ext self._sample_fn = sample_fn self._dtype = dtype self._normalize = normalize self._norm_fn = norm_fn self._kwargs = kwargs def __call__(self, path) -> dict: """ Load sample from multiple files Parameters ---------- path : str defines patch to folder which contain the _sample_ext Returns ------- dict dict with data defines by _sample_ext """ sample_dict = {} for key, item in self._sample_ext.items(): data_list = [] for f in item: data = self._sample_fn(os.path.join(path, f), **self._kwargs) # _normalize data if necessary if (key in self._normalize) or (f in self._normalize): data = self._norm_fn(data) # cast data to type if key in self._dtype: data = data.astype(self._dtype[key]) # append data data_list.append(data) if len(data_list) == 1: sample_dict[key] = data_list[0][np.newaxis] else: sample_dict[key] = np.stack(data_list) return sample_dict class LoadSampleLabel(LoadSample): def __init__(self, sample_ext: dict, sample_fn: collections.abc.Callable, label_ext: str, label_fn: collections.abc.Callable, dtype: dict = None, normalize: tuple = (), norm_fn=norm_range('-1,1'), sample_kwargs=None, **kwargs): """ Load sample and label from folder Parameters ---------- sample_ext : dict of list Defines the data _sample_ext. The dict key defines the position of the sample inside the returned data dict, while the list defines the the files which should be loaded inside the data dict. Passed to LoadSample. sample_fn : function function to load a single sample Passed to LoadSample. label_ext : str extension for label label_fn: function functions which returns the label inside a dict dtype : dict defines the data type which should be used for the respective key normalize : iterable of hashable list of hashable which should be normalized. Can contain entire keys of extension (normalizes each element individually) or provide the file name which should be normalized norm_fn : function function to normalize input. Default: normalize range to [-1, 1] sample_kwargs : additional keyword arguments passed to LoadSample kwargs : variable number of keyword arguments passed to _label_fn See Also -------- :class: `LoadSample` """ if sample_kwargs is None: sample_kwargs = {} super().__init__(sample_ext=sample_ext, sample_fn=sample_fn, dtype=dtype, normalize=normalize, norm_fn=norm_fn, **sample_kwargs) self._label_ext = label_ext self._label_fn = label_fn self._label_kwargs = kwargs def __call__(self, path) -> dict: """ Loads a sample and a label Parameters ---------- path : str Returns ------- dict dict with data and label """ sample_dict = super().__call__(path) label_dict = self._label_fn(os.path.join(path, self._label_ext), **self._label_kwargs) sample_dict.update(label_dict) return sample_dict ================================================ FILE: delira/data_loading/numba_transform.py ================================================ from batchgenerators.transforms import AbstractTransform, Compose import logging from delira import get_current_debug_mode import numba logger = logging.getLogger(__name__) class NumbaTransformWrapper(AbstractTransform): def __init__(self, transform: AbstractTransform, nopython=True, target="cpu", parallel=False, **options): if get_current_debug_mode(): # set options for debug mode logging.debug("Debug mode detected. Overwriting numba options " "nopython to False and target to cpu") nopython = False target = "cpu" transform.__call__ = numba.jit(transform.__call__, nopython=nopython, target=target, parallel=parallel, **options) self._transform = transform def __call__(self, **kwargs): return self._transform(**kwargs) class NumbaTransform(NumbaTransformWrapper): def __init__(self, transform_cls, nopython=True, target="cpu", parallel=False, **kwargs): trafo = transform_cls(**kwargs) super().__init__(trafo, nopython=nopython, target=target, parallel=parallel) class NumbaCompose(Compose): def __init__(self, transforms): super().__init__(transforms=[NumbaTransformWrapper(trafo) for trafo in transforms]) ================================================ FILE: delira/data_loading/sampler/__init__.py ================================================ from delira.data_loading.sampler.abstract import AbstractSampler from delira.data_loading.sampler.batch import BatchSampler from delira.data_loading.sampler.random import RandomSampler, \ RandomSamplerNoReplacement, RandomSamplerWithReplacement from delira.data_loading.sampler.sequential import SequentialSampler from delira.data_loading.sampler.weighted import WeightedRandomSampler, \ PrevalenceRandomSampler ================================================ FILE: delira/data_loading/sampler/abstract.py ================================================ from delira.data_loading.dataset import AbstractDataset class AbstractSampler(object): """ Abstract Class defining a sampler interface """ def __init__(self, indices): """ Parameters ---------- indices : list the indices containing the classes to sample from """ self._indices = indices def __iter__(self): """ Returns an iterator, must be overwritten in subclasses Raises ------ NotImplementedError if not overwritten in subclass """ raise NotImplementedError def __len__(self): """ Defines the class length Returns ------- int the number of samples """ return len(self._indices) @classmethod def from_dataset(cls, dset: AbstractDataset, **kwargs): """ Class Method to create a sampler from a given dataset Parameters ---------- dset : :class:`AbstractDataset` the dataset to create the sampler from **kwargs : additional keyword arguments """ if hasattr(dset, "__len__"): length = len(dset) else: length = len([tmp for tmp in dset]) return cls(list(range(length)), **kwargs) ================================================ FILE: delira/data_loading/sampler/batch.py ================================================ from delira.data_loading.sampler.abstract import AbstractSampler class BatchSampler(object): """ A Sampler-Wrapper combining the single indices sampled by a sampler to batches of a given size """ def __init__(self, sampler: AbstractSampler, batch_size, drop_last=False): """ Parameters ---------- sampler : :class:`AbstractSampler` the actual sampler producing single-sized samples batch_size : int the size of each batch drop_last : bool whether or not to discard the last (possibly smaller) batch """ self._sampler = sampler self._batchsize = batch_size self._drop_last = drop_last def __iter__(self): """ Iterator holding lists of sample-indices. Each list contains indices for a single batch Yields ------ list a list containing the sample indices of the current batch """ batch_idxs = [] for idx in self._sampler: batch_idxs.append(idx) if len(batch_idxs) == self._batchsize: yield batch_idxs batch_idxs = [] if not self._drop_last and batch_idxs: yield batch_idxs def __len__(self): """ Defines the class length Returns ------- int number of samples """ num_batches = len(self._sampler) // self._batchsize if not self._drop_last: num_batches += int(bool(len(self._sampler) % self._batchsize)) return num_batches ================================================ FILE: delira/data_loading/sampler/random.py ================================================ from delira.data_loading.sampler.abstract import AbstractSampler import numpy as np class RandomSampler(AbstractSampler): """ A Generic Random Sampler """ def __init__(self, indices, replacement=False, num_samples=None): """ Parameters ---------- indices : list the indices containing the classes to sample from replacement : bool whether to sample with or without replacement num_samples : int the number of samples to provide. Must only be specified if :param:`replacement` is True; If not specified, it defaults to the number of samples present in :param:`indices` """ super().__init__(indices) if replacement and num_samples is None: num_samples = len(self._indices) self._replacement = replacement self._num_samples = num_samples def __iter__(self): """ Returns an iterator returning random samples Returns ------- Iterator an iterator returning random samples """ n = len(self._indices) if self._replacement: return iter(np.random.randint(n, size=self._num_samples).tolist()) possible_samples = np.arange(n) np.random.shuffle(possible_samples) return iter(possible_samples) def __len__(self): """ Defines the length of the sampler Returns ------- int the number of samples """ if self._replacement: return self._num_samples else: return super().__len__() class RandomSamplerNoReplacement(RandomSampler): """ A Random Sampler without replacement """ def __init__(self, indices): """ Parameters ---------- indices : list the indices containing the classes to sample from """ super().__init__(indices, False, None) class RandomSamplerWithReplacement(RandomSampler): """ A Random Sampler With Replacement """ def __init__(self, indices, num_samples=None): """ Parameters ---------- indices : list the indices containing the classes to sample from num_samples : int number of samples to provide, if not specified: defaults to the amount values given in :param:`indices` """ super().__init__(indices, True, num_samples) ================================================ FILE: delira/data_loading/sampler/sequential.py ================================================ from delira.data_loading.sampler.abstract import AbstractSampler class SequentialSampler(AbstractSampler): """ Class to implement sequential sampling """ def __iter__(self): """ Creates an iterator returning sequential samples Returns ------- Iterator iterator returning samples in a sequential manner """ return iter(range(len(self._indices))) ================================================ FILE: delira/data_loading/sampler/weighted.py ================================================ from delira.data_loading.sampler.abstract import AbstractSampler from delira.data_loading.dataset import AbstractDataset import numpy as np class WeightedRandomSampler(AbstractSampler): """ Class implementing Weighted Random Sampling """ def __init__(self, weights, num_samples=None): """ Parameters ---------- weights : list per-sample weights num_samples : int number of samples to provide. If not specified this defaults to the amount of values given in :param:`num_samples´ """ if num_samples is None: num_samples = len(weights) self._num_samples = num_samples super().__init__(np.arange(num_samples)) self._weights = weights def __iter__(self): """ Defines the actual weighted random sampling Returns ------- Iterator iterator producing random samples """ return iter(np.random.choice(self._indices, size=self._num_samples, p=self._weights)) def __len__(self): """ Defines the length of the sampler Returns ------- int the number of samples """ return self._num_samples class PrevalenceRandomSampler(WeightedRandomSampler): """ Class implementing prevalence weighted sampling """ def __init__(self, indices): """ Parameters ---------- indices : list list of class indices to calculate a weighting from """ weights = np.array(indices).astype(np.float) classes, classes_count = np.unique(indices, return_counts=True) # compute probabilities target_prob = 1 / classes.shape[0] # generate weight matrix for i, c in enumerate(classes): weights[weights == c] = (target_prob / classes_count[i]) super().__init__(weights, num_samples=len(indices)) @classmethod def from_dataset(cls, dset: AbstractDataset, key="label", **kwargs): """ CLass function to create an instance of this sampler by giving it a dataset Parameters ---------- dset : :class:`AbstractDataset` the dataset to create weightings from key : str the key holding the class index for each sample **kwargs : Additional keyword arguments """ return cls([_sample[key] for _sample in dset], **kwargs) ================================================ FILE: delira/io/__init__.py ================================================ from delira import get_backends if "TORCH" in get_backends(): from delira.io.torch import save_checkpoint_torch as torch_save_checkpoint from delira.io.torch import load_checkpoint_torch as torch_load_checkpoint from delira.io.torch import save_checkpoint_torchscript \ as torchscript_save_checkpoint from delira.io.torch import load_checkpoint_torchscript \ as torchscript_load_checkpoint if "TF" in get_backends(): from delira.io.tf import save_checkpoint as tf_save_checkpoint from delira.io.tf import load_checkpoint as tf_load_checkpoint from delira.io.tf import save_checkpoint_eager as tf_eager_save_checkpoint from delira.io.tf import load_checkpoint_eager as tf_eager_load_checkpoint if "CHAINER" in get_backends(): from delira.io.chainer import save_checkpoint as chainer_save_checkpoint from delira.io.chainer import load_checkpoint as chainer_load_checkpoint if "SKLEARN" in get_backends(): from delira.io.sklearn import load_checkpoint as sklearn_load_checkpoint from delira.io.sklearn import save_checkpoint as sklearn_save_checkpoint ================================================ FILE: delira/io/chainer.py ================================================ import chainer import zipfile import os import json def save_checkpoint(file, model=None, optimizers=None, epoch=None): """ Saves the given checkpoint Parameters ---------- file : str string containing the path, the state should be saved to model : :class:`AbstractChainerNetwork` optimizers : dict dictionary containing all optimizers epoch : int the current epoch """ # config file for path mapping insde the archive save_config = {} # files to write to archive and delete afterwards del_files = [] # save model to hdf5 if model is not None: # temporary filename _curr_file = file.replace("chain", "model") # serialize to temporary file chainer.serializers.save_hdf5(_curr_file, model) # add to config (without path to navigate inside archive) save_config["model"] = os.path.basename(_curr_file) # append to files to process del_files.append(_curr_file) # save all optimizers to hdf5 if optimizers is not None: # dict for mapping optimizer names to files optim_config = {} for k, v in optimizers.items(): # temporary file _curr_file = file.replace("chain", "optim.%s" % str(k)) # serialize to temporary file chainer.serializers.save_hdf5(_curr_file, v) # add to optimizer config (without path to navigate inside archive) optim_config[k] = os.path.basename(_curr_file) # append to files to process del_files.append(_curr_file) # add optimizer path mapping to config save_config["optimizers"] = optim_config # add epoch to config if epoch is not None: save_config["epoch"] = epoch # temporary config file _curr_file = file.replace("chain", "config") # serialize config dict to temporary json config file with open(_curr_file, "w") as f: json.dump(save_config, f) # append to files to process del_files.append(_curr_file) # create the actual archive with zipfile.ZipFile(file, mode="w") as f: for _file in del_files: # write temporary file to archive and remove it afterwards f.write(_file, os.path.basename(_file)) os.remove(_file) def _deserialize_and_load(archive: zipfile.ZipFile, file: str, obj, temp_dir: str): """ Helper Function to temporarily extract a file from a given archive, deserialize the object in this file and remove the temporary file Parameters ---------- archive : :class:`zipfile.Zipfile` the archive containing the file to deserialize file : str identifier specifying the file inside the archive to extract and deserialize obj : Any the object to load the deserialized state to. Must provide a `serialize` function temp_dir : str the directory the file will be temporarily extracted to Returns ------- Any the object with the loaded and deserialized state """ # temporary extract file archive.extract(file, temp_dir) # deserialize object chainer.serializers.load_hdf5(os.path.join(temp_dir, file), obj) # remove temporary file os.remove(os.path.join(temp_dir, file)) return obj def load_checkpoint(file, old_state: dict = None, model: chainer.link.Link = None, optimizers: dict = None): """ Loads a state from a given file Parameters ---------- file : str string containing the path to the file containing the saved state old_state : dict dictionary containing the modules to load the states to model : :class:`chainer.link.Link` the model the state should be loaded to; overwrites the ``model`` key in ``old_state`` if not None optimizers : dict dictionary containing all optimizers. overwrites the ``optimizers`` key in ``old_state`` if not None Returns ------- dict the loaded state """ if old_state is None: old_state = {} if model is not None: old_state["model"] = model if optimizers is not None: old_state["optimizers"] = optimizers loaded_state = {} # open zip archive with zipfile.ZipFile(file) as f: # load config _curr_file = file.replace("chain", "config") # temporarily extract json file to dir f.extract(os.path.basename(_curr_file), os.path.dirname(file)) # load config dict with open(_curr_file) as _file: config = json.load(_file) # remove temporary json file os.remove(_curr_file) # load model if path is inside config if "model" in config: # open file in archive by temporary extracting it loaded_state["model"] = _deserialize_and_load( f, config["model"], old_state["model"], os.path.dirname(file)) # load optimizers if path mapping is inside config if "optimizers" in config: loaded_state["optimizers"] = {} optimizer_config = config["optimizers"] for k, v in optimizer_config.items(): # open file in archive by temporary extracting it loaded_state["optimizers"][k] = _deserialize_and_load( f, v, old_state["optimizers"][k], os.path.dirname(file)) # load epoch from config if possible if "epoch" in config: loaded_state["epoch"] = config["epoch"] return loaded_state ================================================ FILE: delira/io/sklearn.py ================================================ import logging import joblib logger = logging.getLogger(__name__) def save_checkpoint(file: str, model=None, epoch=None, **kwargs): """ Save model's parameters Parameters ---------- file : str filepath the model should be saved to model : AbstractNetwork or None the model which should be saved if None: empty dict will be saved as state dict epoch : int current epoch (will also be pickled) """ return_val = joblib.dump({"model": model, "epoch": epoch}, file, **kwargs) return return_val def load_checkpoint(file, **kwargs): """ Loads a saved model Parameters ---------- file : str filepath to a file containing a saved model **kwargs: Additional keyword arguments (passed to torch.load) Especially "map_location" is important to change the device the state_dict should be loaded to Returns ------- OrderedDict checkpoint state_dict """ return joblib.load(file, **kwargs) ================================================ FILE: delira/io/tf.py ================================================ from delira.models.backends.tf_eager import AbstractTfEagerNetwork import typing import logging import tensorflow as tf logger = logging.getLogger(__name__) def save_checkpoint(file: str, model=None): """ Save model's parameters contained in it's graph Parameters ---------- file : str filepath the model should be saved to model : TfNetwork the model which should be saved """ tf.train.Saver().save(model._sess, file) def load_checkpoint(file: str, model=None): """ Loads a saved model Parameters ---------- file : str filepath to a file containing a saved model model : TfNetwork the model which should be loaded """ # following operation adds AssignVariableOps to the graph, keep an eye on # this for memory leak tf.train.Saver().restore(model._sess, file) return {} def _create_varlist(model: AbstractTfEagerNetwork = None, optimizer: typing.Dict[str, tf.train.Optimizer] = None): variable_list = [] if model is not None: variable_list += model.variables if optimizer is not None: for k, v in optimizer.items(): variable_list += v.variables() return variable_list def save_checkpoint_eager(file, model: AbstractTfEagerNetwork = None, optimizer: typing.Dict[str, tf.train.Optimizer] = None, epoch=None): variable_list = _create_varlist(model, optimizer) # can only save if variables exist, this is not the case if there was no # input forwarded through the network (yet) if variable_list: saver = tf.contrib.eager.Saver(variable_list) saver.save(file, global_step=epoch) return logging.warning("Could not save any variables because they don't exist " "(yet). If you haven't forwarded any input through your " "network yet, this is not an error, but expected behavior") def load_checkpoint_eager(file, model: AbstractTfEagerNetwork = None, optimizer: typing.Dict[str, tf.train.Optimizer] = None): variable_list = _create_varlist(model, optimizer) if variable_list: saver = tf.contrib.eager.Saver(variable_list) saver.restore(file) return {"model": model, "optimizer": optimizer} raise RuntimeError( "No Variables found to restore, probably no variables " "exist, because they aren't yet created. Make sure, you " "have at least once forwarded an input through your " "model!") ================================================ FILE: delira/io/torch.py ================================================ from delira.models.backends.torchscript import AbstractTorchScriptNetwork from delira.models.backends.torch import AbstractPyTorchNetwork import torch import logging import os from collections import OrderedDict logger = logging.getLogger(__name__) def save_checkpoint_torch(file: str, model=None, optimizers=None, epoch=None, **kwargs): """ Save checkpoint Parameters ---------- file : str filepath the model should be saved to model : AbstractNetwork or None the model which should be saved if None: empty dict will be saved as state dict optimizers : dict dictionary containing all optimizers epoch : int current epoch (will also be pickled) """ if optimizers is None: optimizers = {} if isinstance(model, torch.nn.DataParallel): _model = model.module else: _model = model if isinstance(_model, (AbstractPyTorchNetwork, AbstractTorchScriptNetwork)): model_state = _model.state_dict() else: model_state = {} logger.debug("Saving checkpoint without Model") optim_state = OrderedDict() for key, val in optimizers.items(): if isinstance(val, torch.optim.Optimizer): optim_state[key] = val.state_dict() if not optim_state: logger.debug("Saving checkpoint without Optimizer") if epoch is None: epoch = 0 state = {"optimizer": optim_state, "model": model_state, "epoch": epoch} torch.save(state, file, **kwargs) def load_checkpoint_torch(file, **kwargs): """ Loads a saved model Parameters ---------- file : str filepath to a file containing a saved model **kwargs: Additional keyword arguments (passed to torch.load) Especially "map_location" is important to change the device the state_dict should be loaded to Returns ------- OrderedDict checkpoint state_dict """ checkpoint = torch.load(file, **kwargs) if not all([_key in checkpoint for _key in ["model", "optimizer", "epoch"]]): return checkpoint['state_dict'] return checkpoint def save_checkpoint_torchscript(file: str, model=None, optimizers=None, epoch=None, **kwargs): """ Save current checkpoint to two different files: 1.) ``file + "_model.ptj"``: Will include the state of the model (including the graph; this is the opposite to :func:`save_checkpoint`) 2.) ``file + "_trainer_state.pt"``: Will include the states of all optimizers and the current epoch (if given) Parameters ---------- file : str filepath the model should be saved to model : AbstractPyTorchJITNetwork or None the model which should be saved if None: empty dict will be saved as state dict optimizers : dict dictionary containing all optimizers epoch : int current epoch (will also be pickled) """ # remove file extension if given if optimizers is None: optimizers = {} if any([file.endswith(ext) for ext in [".pth", ".pt", ".ptj"]]): file, old_ext = file.rsplit(".", 1) if old_ext != "ptj": logger.info("File extension was changed from %s to ptj to " "indicate that the current module is a " "torchscript module (including the graph)") if isinstance(model, AbstractTorchScriptNetwork): torch.jit.save(model, file + ".model.ptj") if optimizers or epoch is not None: save_checkpoint_torch(file + ".trainer_state.pt", None, optimizers=optimizers, epoch=epoch, **kwargs) def load_checkpoint_torchscript(file: str, **kwargs): """ Loads a saved checkpoint consisting of 2 files (see :func:`save_checkpoint_jit` for details) Parameters ---------- file : str filepath to a file containing a saved model **kwargs: Additional keyword arguments (passed to torch.load) Especially "map_location" is important to change the device the state_dict should be loaded to Returns ------- OrderedDict checkpoint state_dict """ # load model if os.path.isfile(file): model_file = file elif os.path.isfile(file.replace(".ptj", ".model.ptj")): model_file = file.replace(".ptj", ".model.ptj") else: raise ValueError("No Model File found for %s" % file) # load trainer state (if possible) trainer_file = model_file.replace(".model.ptj", ".trainer_state.pt") if os.path.isfile(trainer_file): trainer_state = load_checkpoint_torch(trainer_file, **kwargs) else: trainer_state = {"optimizer": {}, "epoch": None} trainer_state.update({"model": torch.jit.load(model_file)}) return trainer_state ================================================ FILE: delira/logging/__init__.py ================================================ from delira.logging.tensorboard_backend import TensorboardBackend from delira.logging.visdom_backend import VisdomBackend from delira.logging.base_backend import BaseBackend from delira.logging.writer_backend import WriterLoggingBackend from delira.logging.base_logger import Logger, SingleThreadedLogger, \ make_logger from delira.logging.registry import unregister_logger, register_logger, \ get_logger, logger_exists, log as _log, get_available_loggers from delira.logging.logging_context import LoggingContext log = _log ================================================ FILE: delira/logging/base_backend.py ================================================ from queue import Empty from abc import abstractmethod, ABCMeta from threading import Event from queue import Queue import warnings _FUNCTIONS_WITHOUT_STEP = ("graph_pytorch", "graph_tf", "graph_onnx", "embedding") # Deprecated Keys with their future alternative _DEPRECATED_KEYS = {"img": "image", "picture": "image", "imgs": "images", "pictures": "images", "bounding_boxes": "image_with_boxes", "bboxes": "image_with_boxes", "value": "scalar", "values": "scalar", "hist": "histogram", "fig": "figure", "sound": "audio", "pr": "pr_curve", "curve": "line", "hm": "heatmap"} class BaseBackend(object, metaclass=ABCMeta): """ The basic Logging Backend, Provides an abstract interface to log different value types and some keyword mappings """ class FigureManager: """ A Figure Manager, which creates a figure during entrance and pushes the figure to logging writer during exit """ def __init__(self, push_fn, figure_kwargs: dict, push_kwargs: dict): """ Parameters ---------- push_fn : function A function accepting a figure and some keyword arguments to push it to the logging writer figure_kwargs : dict dictionary containing all keyword arguments to create the figure push_kwargs : dict dictionary containing all keyword arguments to push the figure to the loggging writer """ self._push_fn = push_fn self._figure_kwargs = figure_kwargs self._push_kwargs = push_kwargs self._fig = None def __enter__(self): """ Function to be executed during context-manager entrance; Will create a figure with the figure kwargs """ from matplotlib.pyplot import figure self._fig = figure(**self._figure_kwargs) def __exit__(self, *args): """ Function to be executed during context-manager exit; Will push the figure to the logging writer and destroy it afterwards Parameters ---------- *args : arbitrary positional arguments; Necessary to be compatible with other context managers, but not used in this one """ from matplotlib.pyplot import close self._push_fn(figure=self._fig, **self._push_kwargs) close(self._fig) self._fig = None def __init__(self, abort_event: Event = None, queue: Queue = None): """ Parameters ---------- abort_event : :class:`threading.Event` the event to signalize, when the logger must be destroyed queue : :class:`queue.Queue` the queue to enqueue all tuples of mapped functions and the corresponding arguments before their execution """ super().__init__() self.KEYWORD_FN_MAPPING = {} self.daemon = True self._queue = queue self._abort_event = abort_event self._global_steps = {} # create Keyword mapping self.KEYWORD_FN_MAPPING.update(**{ "image": self._image, "img": self._image, "picture": self._image, "images": self._images, "imgs": self._images, "pictures": self._images, "image_with_boxes": self._image_with_boxes, "bounding_boxes": self._image_with_boxes, "bboxes": self._image_with_boxes, "scalar": self._scalar, "value": self._scalar, "scalars": self._scalars, "values": self._scalars, "histogram": self._histogram, "hist": self._histogram, "figure": self._figure, "fig": self._figure, "audio": self._audio, "sound": self._audio, "video": self._video, "text": self._text, "graph_pytorch": self._graph_pytorch, "graph_tf": self._graph_tf, "graph_onnx": self._graph_onnx, "embedding": self._embedding, "pr_curve": self._pr_curve, "pr": self._pr_curve, "scatter": self._scatter, "line": self._line, "curve": self._line, "stem": self._stem, "heatmap": self._heatmap, "hm": self._heatmap, "bar": self._bar, "boxplot": self._boxplot, "surface": self._surface, "contour": self._contour, "quiver": self._quiver, # "mesh": self._mesh }) def _log_item(self): """ Internal helper function to log an item of the queue Raises ------ ValueError if the item to log is not a dict """ # get item from dict process_item = self._queue.get(timeout=0.001) # log item if item is dict if isinstance(process_item, dict): for key, val in process_item.items(): # raise DeprecationWarning for deprecated keys if key in _DEPRECATED_KEYS: warnings.warn("The Key %s is deprecated and will" " be removed in the next release. " "Please use %s instead!" % (key, _DEPRECATED_KEYS[key]), DeprecationWarning) # performs the actual mapping execute_fn = self.KEYWORD_FN_MAPPING[str(key).lower()] # resolve the global step val = self._resolve_global_step(str(key).lower(), **val) # execute the logging function self._call_exec_fn(execute_fn, val) # item is no dict -> raise Error else: raise ValueError("Invalid Value passed for logging: %s" % str(process_item)) def _resolve_global_step(self, key, **val): """ Helper function to resolve the global step from given Arguments Parameters ---------- key : str the function key to resolve the step for **val : kwargs which may contain the step information Returns ------- int the global step Raises ------ ValueError If no valid tag was found although a tag should exist """ # check if function should be processed statically # (no time update possible) if str(key).lower() not in _FUNCTIONS_WITHOUT_STEP: # check for different step names if "tag" in val: tag = "tag" elif "main_tag" in val: tag = "main_tag" else: raise ValueError("No valid tag found to extract global step") # check if global step is given if "global_step" not in val or val["global_step"] is None: # check if tag is already part of internal global steps if val[tag] in self._global_steps: # if already existent: increment step for given tag self._global_steps[val[tag]] += 1 step = self._global_steps[val[tag]] else: # if not existent_ set step for given tag to zero step = 0 self._global_steps[val[tag]] = step val.update({"global_step": step}) elif "global_step" in val: self._global_steps[tag] = val["global_step"] return val def run(self): """ Main function which executes the logging, catches exceptions and sets the abortion event if necessary """ try: self._log_item() except Empty: pass except Exception as e: self._abort_event.set() raise e def set_queue(self, queue: Queue): """ Setter Function for the Queue Parameters ---------- queue : :class:`queue.Queue` the new queue """ self._queue = queue def set_event(self, event: Event): """ Setter Function for the abortion event Parameters ---------- event : :class:`threading.Event` the new abortion event """ self._abort_event = event def _call_exec_fn(self, exec_fn, args): """ Helper Function calling the actual mapped function Parameters ---------- exec_fn : function the function which will execute the actual logging args : iterable (listlike) or mapping (dictlike) the arguments passed to the ``exec_fn`` Returns ------- Any the return value obtained by the ``exec_fn`` Raises ------ TypeError if the given ``args`` are neither of type dict or tuple/list """ if isinstance(args, dict): ret_val = exec_fn(**args) elif isinstance(args, (tuple, list)): ret_val = exec_fn(*args) else: raise TypeError("Invalid type for args. Must be either dict, " "tuple or list, but got %s." % args.__class__.__name__) return ret_val @abstractmethod def _image(self, *args, **kwargs): """ Abstract Interface Function to log a single image Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _images(self, *args, **kwargs): """ Abstract Interface Function to log multiple images Parameters ---------- *args **kwargs Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _image_with_boxes(self, *args, **kwargs): """ Abstract Interface Function to log a single image with bounding boxes Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _scalar(self, *args, **kwargs): """ Abstract Interface Function to log a single scalar value Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _scalars(self, *args, **kwargs): """ Abstract Interface Function to log multiple scalar values Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _histogram(self, *args, **kwargs): """ Abstract Interface Function to create and log a histogram out of given values Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _figure(self, *args, **kwargs): """ Abstract Interface Function to log a single ``matplotlib`` figure Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _audio(self, *args, **kwargs): """ Abstract Interface Function to log a single audio signal Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _video(self, *args, **kwargs): """ Abstract Interface Function to log a single video Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _text(self, *args, **kwargs): """ Abstract Interface Function to log a single string as text Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _graph_pytorch(self, *args, **kwargs): """ Abstract Interface Function to log a ``PyTorch`` Graph Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _graph_tf(self, *args, **kwargs): """ Abstract Interface Function to log a TF Graph Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _graph_onnx(self, *args, **kwargs): """ Abstract Interface Function to log a ONNX Graph Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _embedding(self, *args, **kwargs): """ Abstract Interface Function to create and log an embedding Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError @abstractmethod def _pr_curve(self, *args, **kwargs): """ Abstract Interface Function to calculate and log a PR curve out of given values Parameters ---------- *args arbitrary positional arguments **kwargs arbitrary keyword arguments Raises ------ NotImplementedError If not overwritten in subclass """ raise NotImplementedError def _scatter(self, plot_kwargs: dict, figure_kwargs: dict = None, **kwargs): """ Function to create a scatter plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if figure_kwargs is None: figure_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from matplotlib.pyplot import scatter scatter(self, **plot_kwargs) def _line(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a line plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if figure_kwargs is None: figure_kwargs = {} if plot_kwargs is None: plot_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from matplotlib.pyplot import plot plot(**plot_kwargs) def _stem(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a stem plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if figure_kwargs is None: figure_kwargs = {} if plot_kwargs is None: plot_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from matplotlib.pyplot import stem stem(**plot_kwargs) def _heatmap(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a heatmap plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if figure_kwargs is None: figure_kwargs = {} if plot_kwargs is None: plot_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from seaborn import heatmap heatmap(**plot_kwargs) def _bar(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a bar plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if figure_kwargs is None: figure_kwargs = {} if plot_kwargs is None: plot_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from matplotlib.pyplot import bar bar(**plot_kwargs) def _boxplot(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a boxplot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if plot_kwargs is None: plot_kwargs = {} if figure_kwargs is None: figure_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from matplotlib.pyplot import boxplot boxplot(**plot_kwargs) def _surface(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a surface plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if figure_kwargs is None: figure_kwargs = {} if plot_kwargs is None: plot_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from seaborn import kdeplot kdeplot(**plot_kwargs) def _contour(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a contour plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if figure_kwargs is None: figure_kwargs = {} if plot_kwargs is None: plot_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from matplotlib.pyplot import contour contour(**plot_kwargs) def _quiver(self, plot_kwargs=None, figure_kwargs=None, **kwargs): """ Function to create a quiver plot and push it Parameters ---------- plot_kwargs : dict the arguments for plotting figure_kwargs : dict the arguments to actually create the figure **kwargs : additional keyword arguments for pushing the created figure to the logging writer """ if plot_kwargs is None: plot_kwargs = {} if figure_kwargs is None: figure_kwargs = {} with self.FigureManager(self._figure, figure_kwargs, kwargs): from matplotlib.pyplot import quiver quiver(**plot_kwargs) @property def name(self): return "BaseBackend" ================================================ FILE: delira/logging/base_logger.py ================================================ from multiprocessing.queues import Queue as MpQueue from threading import Event from queue import Queue, Full from delira.logging.base_backend import BaseBackend from delira.utils.dict_reductions import get_reduction, possible_reductions, \ reduce_dict import logging from types import FunctionType class Logger(object): """ The actual Logger Frontend, passing logging messages to the assigned logging backend if appropriate or to python's logging module if not """ def __init__(self, backend: BaseBackend, max_queue_size: int = None, logging_frequencies=None, reduce_types=None, level=logging.NOTSET): """ Parameters ---------- backend : :class:`delira.logging.base_backend.BaseBackend` the logging backend to use max_queue_size : int the maximum size for the queue; if queue is full, all additional logging tasks will be dropped until some tasks inside the queue were executed; Per default no maximum size is applied logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. reduce_types : str of FunctionType or dict Values are logged in each iteration. This argument specifies, how to reduce them to a single value if a logging_frequency besides 1 is passed if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'max' | 'min'. level : int the logging value to use if passing the logging message to python's logging module because it is not appropriate for logging with the assigned logging backendDict[str, Callable] Warnings -------- Since the intermediate values between to logging steps are stored in memory to enable reduction, this might cause OOM errors easily (especially if the logged items are still on GPU). If this occurs you may want to choose a lower logging frequency. """ # 0 means unlimited size, but None is more readable if max_queue_size is None: max_queue_size = 0 # convert to empty dict if None if logging_frequencies is None: logging_frequencies = {} # if int: assign int to all possible keys if isinstance(logging_frequencies, int): logging_frequencies = { k: logging_frequencies for k in backend.KEYWORD_FN_MAPPING.keys()} # if dict: update missing keys with 1 and make sure other values # are ints elif isinstance(logging_frequencies, dict): for k in backend.KEYWORD_FN_MAPPING.keys(): if k not in logging_frequencies: logging_frequencies[k] = 1 else: logging_frequencies[k] = int(logging_frequencies[k]) else: raise TypeError("Invalid Type for logging frequencies: %s" % type(logging_frequencies).__name__) # assign frequencies and create empty queues self._logging_frequencies = logging_frequencies self._logging_queues = {} default_reduce_type = "last" if reduce_types is None: reduce_types = default_reduce_type # map string and function to all valid keys if isinstance(reduce_types, (str, FunctionType)): reduce_types = { k: reduce_types for k in backend.KEYWORD_FN_MAPPING.keys()} # should be dict by now! if isinstance(reduce_types, dict): # check all valid keys for occurences for k in backend.KEYWORD_FN_MAPPING.keys(): # use default reduce type if necessary if k not in reduce_types: reduce_types[k] = default_reduce_type # check it is either valid string or already function type else: if not isinstance(reduce_types, FunctionType): assert reduce_types[k] in possible_reductions() reduce_types[k] = str(reduce_types[k]) # map all strings to actual functions if isinstance(reduce_types[k], str): reduce_types[k] = get_reduction(reduce_types[k]) else: raise TypeError("Invalid Type for logging reductions: %s" % type(reduce_types).__name__) self._reduce_types = reduce_types self._abort_event = Event() self._flush_queue = Queue(max_queue_size) self._backend = backend self._backend.set_queue(self._flush_queue) self._backend.set_event(self._abort_event) self._level = level def log(self, log_message: dict): """ Main Logging Function, Decides whether to log with the assigned backend or python's internal module Parameters ---------- log_message : dict the message to log; Should be a dict, where the keys indicate the logging function to execute, and the corresponding value holds the arguments necessary to execute this function Raises ------ RuntimeError If the abort event was set externally """ try: if self._abort_event.is_set(): self.close() raise RuntimeError("Abort-Event in logging process was set: %s" % self._backend.name) # convert tuple to dict if necessary if isinstance(log_message, (tuple, list)): if len(log_message) == 2: log_message = (log_message,) log_message = dict(log_message) # try logging and drop item if queue is full try: # logging appropriate message with backend if isinstance(log_message, dict): # multiple logging instances at once possible with # different keys for k, v in log_message.items(): # append tag if tag is given, because otherwise we # would enqueue same types but different tags in same # queue if "tag" in v: queue_key = k + "." + v["tag"] else: queue_key = k # create queue if necessary if queue_key not in self._logging_queues: self._logging_queues[queue_key] = [] # append current message to queue self._logging_queues[queue_key].append({k: v}) # check if logging should be executed if (len(self._logging_queues[queue_key]) % self._logging_frequencies[k] == 0): # reduce elements inside queue reduce_message = reduce_dict( self._logging_queues[queue_key], self._reduce_types[k]) # flush reduced elements self._flush_queue.put_nowait(reduce_message) # empty queue self._logging_queues[queue_key] = [] else: # logging inappropriate message with python's logging logging.log(self._level, log_message) except Full: pass # if an exception was raised anywhere, the abort event will be set except Exception as e: self._abort_event.set() raise e def __call__(self, log_message: dict): """ Makes the class callable and forwards the call to :meth:`delira.logging.base_logger.Logger.log` Parameters ---------- log_message : dict the logging message to log Returns ------- Any the return value obtained by :meth:`delira.logging.base_logger.Logger.log` """ return self.log(log_message) def close(self): """ Function to close the actual logger; Waits for queue closing and sets the abortion event """ if hasattr(self, "_flush_queue"): if isinstance(self._flush_queue, MpQueue): self._flush_queue.close() self._flush_queue.join_thread() if hasattr(self, "abort_event"): self._abort_event.set() def __del__(self): """ Function to be executed, when class instance will be deleted; Calls :meth:`delira.logging.base_logger.Logger.close` """ self.close() class SingleThreadedLogger(Logger): """ A single threaded Logger which executes the backend after logging a single element """ def log(self, log_message: dict): """ Function to log an actual logging message; Calls the backend to execute the logging right after pushing it to the queue Parameters ---------- log_message : dict the message to log; Should be a dict, where the keys indicate the logging function to execute, and the corresponding value holds the arguments necessary to execute this function """ super().log(log_message) self._backend.run() def make_logger(backend: BaseBackend, max_queue_size: int = None, logging_frequencies=None, reduce_types=None, level=logging.NOTSET): """ Function to create a logger Parameters ---------- backend : :class:`delira.logging.base_backend.BaseBackend` the logging backend max_queue_size : int the maximum queue size logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'max' | 'min'. level : int the logging level for python's internal logging module Notes ----- This function shall be used to create Loggers (if possible), since it may be extended with new functionalities in the future Returns ------- :class:`Logger` the instance of aa newly created logger """ return SingleThreadedLogger(backend=backend, max_queue_size=max_queue_size, logging_frequencies=logging_frequencies, reduce_types=reduce_types, level=level) ================================================ FILE: delira/logging/logging_context.py ================================================ from delira.logging.registry import logger_exists, register_logger, \ unregister_logger, log as _log from delira.logging.base_logger import make_logger log = _log class LoggingContext(object): """ Contextmanager to set a new logging context """ def __init__( self, name, initialize_if_missing=False, destroy_on_exit=None, **kwargs): """ Parameters ---------- name : str the name of the logger to use initialize_if_missing : bool whether to create a logger if it does not yet exist destroy_on_exit : bool whether to destroy the logger on exit; If None, the logger will only be destroyed, if it was created here **kwargs: additional keyword arguments to create a logger if necessary Raises ------ ValueError if the logger does not exist already and shall not be created """ # Logger does exist already if logger_exists(name): self._name = name if destroy_on_exit is None: destroy_on_exit = False # logger will be created elif initialize_if_missing: register_logger(make_logger(**kwargs), name) if destroy_on_exit is None: destroy_on_exit = True self._name = name # logger does not exist and shall not be created else: raise ValueError("No valid logger for name %s and " "'initialize_if_missing' is False" % name) self._destroy_on_exit = destroy_on_exit def __enter__(self): """ Function to be executed during entrance; Resets the logging context Returns ------- :class:`LoggingContext` self """ global log log = self.log return self def __exit__(self, *args): """ Function to be called during exiting the context manager; Destroys the logger if necessary and resets the old logging context Parameters ---------- *args Postional arguments to be compatible with other context managers Returns ------- """ if self._destroy_on_exit: _logger = unregister_logger(self._name) del _logger global log log = _log def log(self, msg: dict): """ Main Logging Function, Decides whether to log with the assigned backend or python's internal module Parameters ---------- msg : dict the message to log; Should be a dict, where the keys indicate the logging function to execute, and the corresponding value holds the arguments necessary to execute this function """ _log(msg, self._name) def __call__(self, log_message: dict): """ Makes the class callable and forwards the call to :meth:`delira.logging.base_logger.Logger.log` Parameters ---------- log_message : dict the logging message to log Returns ------- Any the return value obtained by :meth:`LoggingContext.log` """ return self.log(log_message) ================================================ FILE: delira/logging/registry.py ================================================ from delira.logging.base_logger import Logger from collections import OrderedDict # Registry dict containing all registered available Loggers # Use Ordered Dict here to use first logger for logging if no name was given _AVAILABLE_LOGGERS = OrderedDict() def log(msg: dict, name=None): """ Global logging function Parameters ---------- msg : dict the message to log; Should be a dict, where the keys indicate the logging function to execute, and the corresponding value holds the arguments necessary to execute this function name : str the name of the logger to use; if None: the last logger will be used Raises ------ AssertionError if the logger with the specified name does not exist AssertionError if the returned object is not a logger Returns ------- Any the value obtained by the loggers ``log`` function """ # use last name if no name is present if name is None: name = get_available_loggers()[-1] assert logger_exists(name) _logger = get_logger(name) assert isinstance(_logger, Logger) return _logger.log(msg) def logger_exists(name: str): """ Check if logger exists Parameters ---------- name : str the name to check the existence for Returns ------- bool whether a logger with the given name exists """ return name in _AVAILABLE_LOGGERS def register_logger(logger: Logger, name: str, overwrite=False): """ Register a new logger to the Registry Parameters ---------- logger : :class:`delira.logging.base_logger.Logger` the logger to register name : str the corresponding name, to register the logger at overwrite : bool whether or not to overwrite existing loggers if necessary Returns ------- :class:`delira.logging.base_logger.Logger` the registered logger object """ if not logger_exists(name) or overwrite: _AVAILABLE_LOGGERS[name] = logger return get_logger(name) def unregister_logger(name: str): """ Unregisters a logger from the registry Parameters ---------- name : str the name of the logger to unregister Returns ------- :class:`delira.logging.base_logger.Logger` the registered logger object """ return _AVAILABLE_LOGGERS.pop(name) def get_logger(name): """ Returns a logger from the registry Parameters ---------- name : str the name indicating the logger to return Returns ------- :class:`delira.logging.base_logger.Logger` the specified logger object """ return _AVAILABLE_LOGGERS[name] def get_available_loggers(): """ Gets names for all registered loggers Returns ------- tuple a tuple of strings specifying the names of all registered loggers """ return tuple(_AVAILABLE_LOGGERS.keys()) ================================================ FILE: delira/logging/tensorboard_backend.py ================================================ from threading import Event from queue import Queue from delira.logging.writer_backend import WriterLoggingBackend # use torch SummaryWriter if possible, since this one has latest pytorch # capabilities try: from torch.utils.tensorboard import SummaryWriter LOGDIR_KWARG = "log_dir" except ImportError: from tensorboardX import SummaryWriter LOGDIR_KWARG = "logdir" class TensorboardBackend(WriterLoggingBackend): """ A Tensorboard logging backend """ def __init__(self, writer_kwargs=None, abort_event: Event = None, queue: Queue = None): """ Parameters ---------- writer_kwargs : dict arguments to initialize a writer abort_event : :class:`threading.Event` the abortion event queue : :class:`queue.Queue` the queue holding all logging tasks """ if writer_kwargs is None: writer_kwargs = {} if "logdir" in writer_kwargs: writer_kwargs[LOGDIR_KWARG] = writer_kwargs.pop("logdir") elif "log_dir" in writer_kwargs: writer_kwargs[LOGDIR_KWARG] = writer_kwargs.pop("log_dir") super().__init__(SummaryWriter, writer_kwargs, abort_event, queue) def _call_exec_fn(self, exec_fn, args): """ Helper Function calling the actual mapped function and flushing results to the writer afterwards Parameters ---------- exec_fn : function the function which will execute the actual logging args : iterable (listlike) or mapping (dictlike) the arguments passed to the ``exec_fn`` Returns ------- Any the return value obtained by the ``exec_fn`` """ ret_val = super()._call_exec_fn(exec_fn, args) self._writer.file_writer.flush() return ret_val def __del__(self): """ Function to be executed at deletion; Flushes all unsaved changes """ self._writer.file_writer.flush() def _graph_pytorch(self, model, input_to_model=None, verbose=False, **kwargs): """ Function to log a PyTorch graph Parameters ---------- model : :class:`AbstractPyTorchNetwork` the model, whose graph shall be logged input_to_model : :class:`torch.Tensor` the input to the model; necessary for graph traversal verbose : bool verbosity option **kwargs : additional keyword arguments """ converted_args, converted_kwargs = self.convert_to_npy( model=model, input_to_model=input_to_model, verbose=verbose, **kwargs) self._writer.add_graph(*converted_args, **converted_kwargs) def _graph_tf(self, graph, run_metadata=None): """ Function to log a TensorFlow Graph Parameters ---------- graph : :class:`tensorflow.Graph` or :class:`tensorflow.GraphDef` run_metadata : the run metadata Raises ------ TypeError if given graph cannot be converted to graphdef """ import tensorflow as tf from tensorboardX.proto.event_pb2 import Event, TaggedRunMetadata # convert to graphdef if isinstance(graph, tf.Graph): graphdef = graph.as_graph_def() elif isinstance(graph, tf.GraphDef): graphdef = graph elif hasattr(graph, "SerializeToString"): graphdef = graph else: raise TypeError("Invalid type given for graph: %s" % graph.__class__.__name__) if run_metadata: run_metadata = TaggedRunMetadata( tag='step1', run_metadata=run_metadata.SerializeToString()) self._writer._get_file_writer().add_event( Event( graph_def=graphdef.SerializeToString(), tagged_run_metadata=run_metadata)) def _graph_onnx(self, prototxt): """ Function to log a ONNX graph to file Parameters ---------- prototxt : str filepath to a given prototxt file containing an ONNX graph """ converted_args, converted_kwargs = self.convert_to_npy( prototxt=prototxt) self._writer.add_onnx_graph(*converted_args, **converted_kwargs) def _embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None): """ Function to create an embedding of given data Parameters ---------- mat : array-like an arraylike object, which can be converted to a numpy array; holds the actual embedding value metadata : the embeddings metadata label_img : array-like an arraylike object, which can be converted to a numpy array; holds the label image global_step : int the global step tag : str the tag to store the embedding at metadata_header : the metadata header """ converted_args, converted_kwargs = self.convert_to_npy( mat=mat, metadata=metadata, label_img=label_img, global_step=global_step ) self._writer.add_embedding(*converted_args, **converted_kwargs) def _scalars(self, main_tag: str, tag_scalar_dict: dict, global_step=None, walltime=None, sep="/"): """ Function to log multiple scalars at once. Opposing to the base function, this is done sequentially rather then parallel to avoid creating new event files Parameters ---------- main_tag : str the main tag, will be combined with the subtags inside the ``tag_scalar_dict`` tag_scalar_dict : dict dictionary of (key, scalar) pairs global_step : int the global step walltime : the overall time sep : str the character separating maintag and subtag in the final tag """ # log scalars sequentially for key, val in tag_scalar_dict.items(): # combine tags new_tag = main_tag + sep + key self._scalar(new_tag, val, global_step=global_step, walltime=walltime) @property def name(self): return "TensorFlow Backend" ================================================ FILE: delira/logging/visdom_backend.py ================================================ import tensorboardX from threading import Event from queue import Queue from delira.logging.writer_backend import WriterLoggingBackend class VisdomBackend(WriterLoggingBackend): """ A Visdom Logging backend """ def __init__(self, writer_kwargs: dict = None, abort_event: Event = None, queue: Queue = None): """ Parameters ---------- writer_kwargs : dict arguments to initialize a writer abort_event : :class:`threading.Event` the abortion event queue : :class:`queue.Queue` the queue holding all logging tasks """ if writer_kwargs is None: writer_kwargs = {} super().__init__( tensorboardX.visdom_writer.VisdomWriter, writer_kwargs, abort_event, queue) @property def name(self): return "VisdomBackend" ================================================ FILE: delira/logging/writer_backend.py ================================================ from delira.logging.base_backend import BaseBackend from queue import Queue from threading import Event class WriterLoggingBackend(BaseBackend): """ A Basic Writer Backend for a unspecified writer class """ def __init__(self, writer_cls, writer_kwargs: dict, abort_event: Event = None, queue: Queue = None): super().__init__(abort_event, queue) self._writer = writer_cls(**writer_kwargs) @staticmethod def convert_to_npy(*args, **kwargs): """ Function to convert all positional args and keyword args to numpy (returns identity per default, but can be overwritten in subclass to log more complex types) Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- tuple converted positional arguments dict converted keyword arguments """ return args, kwargs def _image(self, tag, img_tensor, global_step=None, walltime=None, dataformats='CHW'): """ Function to log a single image Parameters ---------- tag : str the tag to store the image at img_tensor : array-like an array-like object containing the actual image; Must be convertible to numpy global_step : int the global step walltime : the overall time dataformats : str string specifying the image format """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, img_tensor=img_tensor, global_step=global_step, walltime=walltime, dataformats=dataformats) self._writer.add_image(*converted_args, **converted_kwargs) def _images(self, tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW'): """ Function to log multiple values Parameters ---------- tag : str the tag to store the image at img_tensor : array-like an array-like object containing the actual image; Must be convertible to numpy global_step : int the global step walltime : the overall time dataformats : str string specifying the image format """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, img_tensor=img_tensor, global_step=global_step, walltime=walltime, dataformats=dataformats) self._writer.add_images(*converted_args, **converted_kwargs) def _image_with_boxes(self, tag, img_tensor, box_tensor, global_step=None, walltime=None, dataformats='CHW', **kwargs): """ Function to log a single image with bounding boxes Parameters ---------- tag : str the tag to store the image at img_tensor : array-like an array-like object containing the actual image; Must be convertible to numpy box_tensor : array-like an array-like object containing the actual bounding boxes in xyxy format; must be convertible to numpy global_step : int the global step walltime : the overall time dataformats : str string specifying the image format **kwargs : additional keyword arguments """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, img_tensor=img_tensor, box_tensor=box_tensor, global_step=global_step, walltime=walltime, dataformats=dataformats, **kwargs) self._writer.add_image_with_boxes(*converted_args, **converted_kwargs) def _scalar(self, tag, scalar_value, global_step=None, walltime=None): """ Function to log a single scalar value Parameters ---------- tag : str the tag to store the image at scalar_value : int or float the scalar value to log global_step : int the global step walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, scalar_value=scalar_value, global_step=global_step, walltime=walltime) self._writer.add_scalar(*converted_args, **converted_kwargs) def _scalars(self, main_tag, tag_scalar_dict, global_step=None, walltime=None): """ Function to log multiple scalars Parameters ---------- main_tag : str the main tag to store the scalars at tag_scalar_dict : dict a dictionary containing tags as keys and the corresponding scalar values global_step : int the global step walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( main_tag=main_tag, tag_scalar_dict=tag_scalar_dict, global_step=global_step, walltime=walltime) self._writer.add_scalars(*converted_args, **converted_kwargs) def _histogram(self, tag, values, global_step=None, bins='tensorflow', walltime=None): """ Function to create and log a histogram out of given values Parameters ---------- tag : str the tag to store the histogram at values : arraylike an arraylike object containing the raw data to create a histogram from; Must be convertible to numpy global_step : int global step bins : str string indicating the bins format walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, values=values, global_step=global_step, bins=bins) self._writer.add_histogram(*converted_args, **converted_kwargs) def _figure(self, tag, figure, global_step=None, close=True, walltime=None): """ Function to log a ``matplotlib.pyplot`` figure Parameters ---------- tag : str the tag to store the figure at figure : :class:`matplotlib.pyplot.Figure`` the figure to log global_step : int the global step close : bool whether to close the figure after pushing it walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, figure=figure, global_step=global_step, close=close, walltime=walltime) self._writer.add_figure(*converted_args, **converted_kwargs) def _audio(self, tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None): """ Function to log a single audio signal Parameters ---------- tag : str the tag to store the sound signal at snd_tensor : arraylike arraylike object containing the sound signal; must be convertible to numpy global_step : int the global step sample_rate : int the sampling rate for the sound signal walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, snd_tensor=snd_tensor, global_step=global_step, sample_rate=sample_rate, walltime=walltime ) self._writer.add_audio(*converted_args, **converted_kwargs) def _text(self, tag, text_string, global_step=None, walltime=None): """ Function to log a single string as text Parameters ---------- tag : str the tag to store the text at text_string : str the text string to log global_step : int the global step walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, text_string=text_string, global_step=global_step, walltime=walltime) self._writer.add_text(*converted_args, **converted_kwargs) def _pr_curve(self, tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None): """ Function to create and log a PR curve out of given predictions and + labels Parameters ---------- tag : str function to store the curve at labels : arraylike arraylike object containing the groundtruth data; must be convertible to numpy predictions : arraylike arraylike object containing the predictions; must be convertible to numpy global_step : int the global step num_thresholds : int number of thresholds to apply for PR calculation weights : arraylike arraylike object containing sample weights, must be covertible to numpy walltime : overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, labels=labels, predictions=predictions, global_step=global_step, num_thresholds=num_thresholds, weights=weights, walltime=walltime) self._writer.add_pr_curve(*converted_args, **converted_kwargs) def _video(self, tag, vid_tensor, global_step=None, fps=4, walltime=None): """ Function to log a single video Parameters ---------- tag : str the tag to store the image at vid_tensor : arraylike arraylike object containing the video frames; must be convertible to numpy global_step : int the global step fps : int frames per second to display walltime : int the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, vid_tensor=vid_tensor, global_step=global_step, fps=fps, walltime=walltime) self._writer.add_video(*converted_args, **converted_kwargs) @property def name(self): return "WriterBackend" ================================================ FILE: delira/models/__init__.py ================================================ from delira.models.abstract_network import AbstractNetwork from delira.models.backends import * ================================================ FILE: delira/models/abstract_network.py ================================================ import abc import logging file_logger = logging.getLogger(__name__) class AbstractNetwork(object): """ Abstract class all networks should be derived from """ _init_kwargs = {} @abc.abstractmethod def __init__(self, **kwargs): """ Init function to register init kwargs (should be called from all subclasses) Parameters ---------- **kwargs keyword arguments (will be registered to `self.init_kwargs`) """ super().__init__() for key, val in kwargs.items(): self._init_kwargs[key] = val @abc.abstractmethod def __call__(self, *args, **kwargs): """ AbstractMethod to specify that each model should be able to be called for predictions Parameters ---------- *args : Positional arguments **kwargs : Keyword Arguments Raises ------ NotImplementedError if not overwritten by subclass """ raise NotImplementedError() @staticmethod @abc.abstractmethod def closure(model, data_dict: dict, optimizers: dict, losses: dict, iter_num: int, fold=0, **kwargs): """ Function which handles prediction from batch, logging, loss calculation and optimizer step Parameters ---------- model : :class:`AbstractNetwork` model to forward data through data_dict : dict dictionary containing the data optimizers : dict dictionary containing all optimizers to perform parameter update losses : dict Functions or classes to calculate losses iter_num: int the number of of the current iteration in the current epoch; Will be restarted at zero at the beginning of every epoch fold : int Current Fold in Crossvalidation (default: 0) kwargs : dict additional keyword arguments Returns ------- dict Loss values (with same keys as input dict losses) dict Arbitrary number of predictions Raises ------ NotImplementedError If not overwritten by subclass """ raise NotImplementedError() @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Converts a numpy batch of data and labels to suitable datatype and pushes them to correct devices Parameters ---------- batch : dict dictionary containing the batch (must have keys 'data' and 'label' input_device : device for network inputs output_device : device for network outputs Returns ------- dict dictionary containing all necessary data in right format and type and on the correct device Raises ------ NotImplementedError If not overwritten by subclass """ raise NotImplementedError() @property def init_kwargs(self): """ Returns all arguments registered as init kwargs Returns ------- dict init kwargs """ return self._init_kwargs ================================================ FILE: delira/models/backends/__init__.py ================================================ from delira import get_backends as _get_backends if "CHAINER" in _get_backends(): from delira.models.backends.chainer import * if "SKLEARN" in _get_backends(): from delira.models.backends.sklearn import * if "TF" in _get_backends(): from delira.models.backends.tf_eager import * from delira.models.backends.tf_graph import * if "TORCH" in _get_backends(): from delira.models.backends.torch import * from delira.models.backends.torchscript import * ================================================ FILE: delira/models/backends/chainer/__init__.py ================================================ from delira import get_backends as _get_backends if "CHAINER" in _get_backends(): from delira.models.backends.chainer.abstract_network import \ AbstractChainerNetwork from delira.models.backends.chainer.data_parallel import \ DataParallelChainerNetwork from delira.models.backends.chainer.data_parallel import \ DataParallelChainerOptimizer from delira.models.backends.chainer.data_parallel import \ ParallelOptimizerUpdateModelParameters from delira.models.backends.chainer.data_parallel import \ ParallelOptimizerCumulateGradientsHook ================================================ FILE: delira/models/backends/chainer/abstract_network.py ================================================ import abc import chainer import numpy as np from delira.models.abstract_network import AbstractNetwork # Use this Mixin Class to set __call__ to None, because there is an # internal check inside chainer.Link.__call__ for other __call__ methods # of parent classes to be not None. If this would be the case, # this function would be executed instead of our forward class ChainerMixin(AbstractNetwork): __call__ = None class AbstractChainerNetwork(chainer.Chain, ChainerMixin): """ Abstract Class for Chainer Networks """ def __init__(self, **kwargs): """ Parameters ---------- **kwargs : keyword arguments of arbitrary number and type (will be registered as ``init_kwargs``) """ chainer.Chain.__init__(self) AbstractNetwork.__init__(self, **kwargs) @abc.abstractmethod def forward(self, *args, **kwargs) -> dict: """ Feeds Arguments through the network Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- dict dictionary containing all computation results """ raise NotImplementedError def __call__(self, *args, **kwargs) -> dict: """ Makes instances of this class callable. Calls the ``forward`` method. Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- dict dictionary containing all computation results """ return chainer.Chain.__call__(self, *args, **kwargs) @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : chainer.backend.Device or string device for network inputs output_device : torch.device device for network outputs Returns ------- dict dictionary containing data in correct type and shape and on correct device """ new_batch = {k: chainer.as_variable(v.astype(np.float32)) for k, v in batch.items()} for k, v in new_batch.items(): if k == "data": device = input_device else: device = output_device # makes modification inplace! v.to_device(device) return new_batch @staticmethod def closure(model, data_dict: dict, optimizers: dict, losses: dict, iter_num, fold=0, **kwargs): """ default closure method to do a single training step; Could be overwritten for more advanced models Parameters ---------- model : :class:`AbstractChainerNetwork` trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters; ignored here, just passed for compatibility reasons losses : dict dict holding the losses to calculate errors; ignored here, just passed for compatibility reasons iter_num: int the number of of the current iteration in the current epoch; Will be restarted at zero at the beginning of every epoch fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Loss values (with same keys as input dict losses; will always be empty here) dict dictionary containing all predictions """ assert (optimizers and losses) or not optimizers, \ "Criterion dict cannot be emtpy, if optimizers are passed" loss_vals = {} total_loss = 0 inputs = data_dict["data"] preds = model(inputs) for key, crit_fn in losses.items(): _loss_val = crit_fn(preds["pred"], data_dict["label"]) loss_vals[key] = _loss_val.item() total_loss += _loss_val model.cleargrads() total_loss.backward() optimizers['default'].update() for k, v in preds.items(): v.unchain() return loss_vals, preds ================================================ FILE: delira/models/backends/chainer/data_parallel.py ================================================ from delira.models.backends.chainer.abstract_network import \ AbstractChainerNetwork import chainer def _apply_scatter(inputs: chainer.Variable, target_devices: list, dim: int = 0): """ Scatters inputs to target devices; Slicing will be done against a given dimension Parameters ---------- inputs : :class:`chainer.Variable` the input variable to scatter target_devices : list the target devices to scatter to dim : int the dimension to use for slicing Returns ------- list list of variable slices on correct devices """ def _slice_inputs(input_var, dim, num_dims, start, end, target_device): """ Slices the input variable along a given dimension from start to end and pushes it to correct device Parameters ---------- input_var : :class:`chainer.Variable` the variable to slice dim : int the dimension to slice along num_dims : int the dimensionality of ``input_var`` start : int the start value for slicing (included) end : int the end value for slicing (excluded) target_device: str or :class:`chainer.backend.Device` the device to push to Returns ------- :class:`chainer.Variable` the slice of the variable """ slc = [slice(None)] * num_dims slc[dim] = slice(start, end) sliced_var = input_var[slc] sliced_var.to_device(target_device) output_shape = list(input_var.shape) output_shape[dim] = -1 return sliced_var.reshape(output_shape) # create empty sliced input list scattered_inputs = [] # calculate constant only once num_devices = len(target_devices) samples_per_device = inputs.shape[dim] // num_devices num_dims = len(inputs.shape) # iterate over number of devices and slice accordingly # (exclude last device) # iterating until the minimum of num_devices and inputs.shape[dim] -1 # ensures that if the batchsize is too small to be scattered across all # devices, we will only scatter across as many devices as possible for i in range(min(num_devices, inputs.shape[dim]) - 1): start, end = i * samples_per_device, i + 1 * samples_per_device scattered_inputs.append(_slice_inputs(inputs, dim, num_dims, start, end, target_devices[i])) # all remaining samples (not yet sliced) are appended now # (all samples used; will be pushed to last device later) scattered_inputs.append(_slice_inputs( inputs, dim, len(inputs.shape,), (num_devices - 1) * samples_per_device, inputs.shape[dim], target_devices[-1])) return scattered_inputs def _apply_gather(target_device, dim, *outputs): for _output in outputs: _output.to_device(target_device) return chainer.functions.concat(outputs, dim) def _scatter(inputs, target_devices: list, dim): """ Scatters all inputs across given target_devices Parameters ---------- inputs : Any target_devices : list list of devices to scatter to dim : int dimension to use for slicing Returns ------- list list of scattered inputs """ def _scatter_map(inputs): """ Scatters all inputs across given target_devices Parameters ---------- inputs : Any Returns ------- list list of scattered inputs """ # directly apply the scattering on variable if isinstance(inputs, chainer.Variable): return _apply_scatter(inputs, target_devices, dim) # map _scatter_map recursively to all samples in tuple if isinstance(inputs, tuple) and inputs: return list(zip(*map(_scatter_map, inputs))) # map _scatter_map recursively to all samples in list if isinstance(inputs, list) and inputs: return list(map(list, zip(*map(_scatter_map, inputs)))) # map _scatter_map recursively to all samples in dict if isinstance(inputs, dict) and inputs: return list(map(type(inputs), zip(*map(_scatter_map, inputs.items())))) # try to convert inputs to chainer variable first and afterwards # apply _scatter_map again try: return _scatter_map(chainer.as_variable(inputs)) except TypeError: return [inputs for targets in target_devices] # After scatter_map is called, a scatter_map cell will exist. This cell # has a reference to the actual function scatter_map, which has # references to a closure that has a reference to the scatter_map cell # (because the fn is recursive). To avoid this reference cycle, we set # the function to None, clearing the cell try: return _scatter_map(inputs) finally: _scatter_map = None def _gather(outputs, target_device, dim=0): r""" Gathers tensors from different GPUs on a specified device (-1 means the CPU). """ def gather_map(outputs): out = outputs[0] if isinstance(out, chainer.Variable): return _apply_gather(target_device, dim, *outputs) if out is None: return None if isinstance(out, dict): if not all((len(out) == len(d) for d in outputs)): raise ValueError( 'All dicts must have the same number of keys') return type(out)(((k, gather_map([d[k] for d in outputs])) for k in out)) return type(out)(map(gather_map, zip(*outputs))) # Recursive function calls like this create reference cycles. # Setting the function to None clears the refcycle. try: return gather_map(outputs) finally: gather_map = None class DataParallelChainerNetwork(AbstractChainerNetwork): """ A Wrapper around a :class:`AbstractChainerNetwork` instance to implement parallel training by splitting the batches """ def __init__(self, module: AbstractChainerNetwork, devices: list, output_device=None, batch_dim=0): """ Parameters ---------- module : :class:`AbstractChainerNetwork` the module to wrap (will be replicated on all devices) devices : list a list containing the devices to use (either as strings or as :class:`chainer.backend.Device`). output_device : str or :class:`chainer.backend.Device` The output device Make sure, your labels are also on this device for loss calculation! If not specified, the second device of ``devices`` will be used for output gathering. batch_dim : int the index of the batchdimension (usually 0, but can become e.g. 1 in NLP tasks) """ super().__init__() modules = [module.copy() for _ in devices] for _module, _device in zip(modules, devices): _module.to_device(_device) with self.init_scope(): self.modules = chainer.ChainList(*modules) self.devices = devices if output_device is None: output_device = devices[1] self._output_device = output_device assert self._output_device in self.devices self._output_device_idx = self.devices.index(self._output_device) self.dim = batch_dim def forward(self, *args, **kwargs): """ Scatters the inputs (both positional and keyword arguments) across all devices, feeds them through model replicas and re-builds batches on output device Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- Any combined output from all scattered models """ scattered_args, scattered_kwargs = self._scatter(args, kwargs, self.devices, self.dim) predictions = [] for _args, _kwargs, _module in zip(scattered_args, scattered_kwargs, self.modules): predictions.append(_module(*_args, **_kwargs)) predictions = self._gather(predictions, self.dim, self._output_device) return predictions def params(self, include_uninit=True): """ Only the parameters of the module on the first device will actually be updated, all the other parameters will be replicated by the optimizer after an update Parameters ---------- include_uninit : bool Returns ------- a generator holding the root-modules parameters """ return self.modules[0].params(include_uninit) @staticmethod def _scatter(inputs, kwargs, target_devices: list, dim=0): """ Scatters all inputs (args and kwargs) to target devices and splits along given dimension Parameters ---------- inputs : list or tuple positional arguments kwargs : dict keyword arguments target_devices : list list of target device (either string or chainer.backend.Device) dim : int the dimension, which should be used for splitting the batch Returns ------- tuple scattered positional arguments tuple scattered keyword arguments """ # scatter inputs if given inputs = _scatter(inputs, target_devices, dim) if inputs else [] # scatter kwargs if given kwargs = _scatter(kwargs, target_devices, dim) if kwargs else [] # extend lengths by empty tuples if necessary if len(inputs) < len(kwargs): inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) elif len(kwargs) < len(inputs): kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) inputs = tuple(inputs) kwargs = tuple(kwargs) return inputs, kwargs @staticmethod def _gather(predictions, dim, target_device): """ Re-Builds batches on the target device Parameters ---------- predictions : list list containing the predictions from all replicated models dim : int dimension to use for concatenating single predictions target_device : str or chainer.backend.Device the device, the re-built batch should lie on Returns ------- Any the rebuild batch (lying on ``target_device``) """ return _gather(predictions, target_device, dim) def cleargrads(self): for module in self.modules: module.cleargrads() def zerograds(self): for module in self.modules: module.zerograds() @property def closure(self): return self.modules[0].closure @property def prepare_batch(self): return self.modules[0].prepare_batch class ParallelOptimizerCumulateGradientsHook(object): """ A hook which sums up all replication's gradients in a DataParallel-Scenario """ name = "DataParallelCumulateGradients" call_for_each_param = False timing = 'pre' def __call__(self, optimizer: chainer.Optimizer): """ Summing up all parameters if the target is an instance of ``DataParallel`` Parameters ---------- optimizer : chainer.Optimizer the optimizer holding the target, whoose gradients should be summed across the replications """ if isinstance(optimizer.target, DataParallelChainerNetwork): for module in optimizer.target.modules[1:]: optimizer.target.modules[0].addgrads(module) class ParallelOptimizerUpdateModelParameters(object): """ A hook to replicate all parameters from the root model, to all model-replicas after the optimizer step """ name = "DataParallelUpdateModelParams" call_for_each_param = False timing = "post" def __call__(self, optimizer: chainer.Optimizer): if isinstance(optimizer.target, DataParallelChainerNetwork): for module in optimizer.target.modules[1:]: module.copyparams(optimizer.target.modules[0]) class DataParallelChainerOptimizer(chainer.Optimizer): """ An Optimizer-Wrapper to enable DataParallel. Basically this forwards all functions to the interal optimizer, but registers the additional hooks needed for DataParallel (namely :class:`ParallelOptimizerUpdateModelParameters` as a post-update hook and :class:`ParallelOptimizerCumulateGradientsHook` as a pre-update hook) """ def __init__(self, optimizer): """ Parameters ---------- optimizer : :class:`chainer.Optimizer` the optimizer to wrap """ if isinstance(optimizer, chainer.Optimizer): self._optimizer = optimizer else: raise RuntimeError("Invalid optimizer class given: Expected " "instance of chainer.Optimizer, but got %s" % optimizer.__class__.__name__) @classmethod def from_optimizer_class(cls, optim_cls, *args, **kwargs): """ Parameters ---------- optim_cls : subclass of :class:`chainer.Optimizer` the optimizer to use internally *args : arbitrary positional arguments (will be used for initialization of internally used optimizer) **kwargs : arbitrary keyword arguments (will be used for initialization of internally used optimizer) """ if optim_cls is not None and issubclass(optim_cls, chainer.Optimizer): _optim = optim_cls(*args, **kwargs) else: raise RuntimeError("Invalid optimizer class given: Expected " "Subclass of chainer.Optimizer, but got %s" % optim_cls.__name__) return cls(_optim) def setup(self, link): """ Calls the setup method of the internal optimizer and registers the necessary grads for data-parallel behavior Parameters ---------- link : :class:`DataParallel` the target, whoose parameters should be updated """ self._optimizer.setup(link) self._optimizer.add_hook(ParallelOptimizerCumulateGradientsHook()) self._optimizer.add_hook(ParallelOptimizerUpdateModelParameters()) @property def target(self): return self._optimizer.target @property def epoch(self): return self._optimizer.epoch @property def _pre_update_hooks(self): return self._optimizer._pre_update_hooks @property def _loss_scale(self): return self._optimizer._loss_scale @property def _loss_scale_max(self): return self._optimizer._loss_scale_max @property def _loss_scaling_is_dynamic(self): return self._optimizer._loss_scaling_is_dynamic @property def use_auto_new_epoch(self): return self._optimizer.use_auto_new_epoch @property def update(self): return self._optimizer.update @property def new_epoch(self): return self._optimizer.new_epoch @property def add_hook(self): return self._optimizer.add_hook @property def remove_hook(self): return self._optimizer.remove_hook @property def call_hooks(self): return self._optimizer.call_hooks @property def serialize(self): return self._optimizer.serialize @property def loss_scaling(self): return self._optimizer.loss_scaling @property def set_loss_scale(self): return self._optimizer.set_loss_scale @property def check_nan_in_grads(self): return self._optimizer.check_nan_in_grads @property def is_safe_to_update(self): return self._optimizer.is_safe_to_update @property def update_loss_scale(self): return self._optimizer.update_loss_scale ================================================ FILE: delira/models/backends/sklearn/__init__.py ================================================ from delira import get_backends as _get_backends if "SKLEARN" in _get_backends(): from delira.models.backends.sklearn.abstract_network import \ SklearnEstimator ================================================ FILE: delira/models/backends/sklearn/abstract_network.py ================================================ from inspect import signature as get_signature from sklearn.base import BaseEstimator from delira.models.abstract_network import AbstractNetwork class SklearnEstimator(AbstractNetwork): """ Wrapper Class to wrap all ``sklearn`` estimators and provide delira compatibility """ def __init__(self, module: BaseEstimator): """ Parameters ---------- module : :class:`sklearn.base.BaseEstimator` the module to wrap """ super().__init__() self.module = module # forwards methods to self.module if necessary for key in ["fit", "partial_fit", "predict"]: if hasattr(self.module, key): setattr(self, key, getattr(self.module, key)) # if estimator is build dynamically based on input, classes have to # be passed at least at first time (we pass it every time), because # not every class is present in every batch # variable is initialized here, but feeded during the training if (self.iterative_training and "classes" in get_signature( self.partial_fit).parameters): self.classes = None def __call__(self, *args, **kwargs): """ Calls ``self.predict`` with args and kwargs Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- dict dictionary containing the predictions under key 'pred' """ return {"pred": self.predict(*args, **kwargs)} @property def iterative_training(self): """ Property indicating, whether a the current module can be trained iteratively (batchwise) Returns ------- bool True: if current module can be trained iteratively False: else """ return hasattr(self, "partial_fit") @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : Any device for module inputs (will be ignored here; just given for compatibility) output_device : Any device for module outputs (will be ignored here; just given for compatibility) Returns ------- dict dictionary containing data in correct type and shape and on correct device """ new_batch = {"X": batch["data"].reshape(batch["data"].shape[0], -1)} if "label" in batch: new_batch["y"] = batch["label"].ravel() return new_batch @staticmethod def closure(model, data_dict: dict, optimizers: dict, losses: dict, iter_num: int, fold=0, **kwargs): """ default closure method to do a single training step; Could be overwritten for more advanced models Parameters ---------- model : :class:`SkLearnEstimator` trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters; ignored here, just passed for compatibility reasons losses : dict dict holding the losses to calculate errors; ignored here, just passed for compatibility reasons iter_num: int the number of of the current iteration in the current epoch; Will be restarted at zero at the beginning of every epoch fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Loss values (with same keys as input dict losses; will always be empty here) dict dictionary containing all predictions """ if model.iterative_training: fit_fn = model.partial_fit else: fit_fn = model.fit if hasattr(model, "classes"): # classes must be specified here, because not all classes # must be present in each batch and some estimators are build # dynamically fit_fn(**data_dict, classes=model.classes) else: fit_fn(**data_dict) preds = model(data_dict["X"]) return {}, preds ================================================ FILE: delira/models/backends/tf_eager/__init__.py ================================================ from delira import get_backends as _get_backends if "TF" in _get_backends(): from delira.models.backends.tf_eager.abstract_network import \ AbstractTfEagerNetwork from delira.models.backends.tf_eager.data_parallel import \ DataParallelTfEagerNetwork ================================================ FILE: delira/models/backends/tf_eager/abstract_network.py ================================================ import abc import typing import tensorflow as tf import numpy as np from delira.models.abstract_network import AbstractNetwork class AbstractTfEagerNetwork(AbstractNetwork, tf.keras.layers.Layer): """ Abstract Network for TF eager execution backend. All models to use with this backend should be derived from this class """ def __init__(self, data_format="channels_first", trainable=True, name=None, dtype=None, **kwargs): """ Parameters ---------- data_format : str the accepted data format (default: 'channels_first') trainable : wheter or not the model is trainable (default: True) name : str the network's name dtype : the dtype to use for the model's parameters **kwargs : additional keyword arguments (will be registered as ``init_kwargs``) """ tf.keras.layers.Layer.__init__(self, trainable=trainable, name=name, dtype=dtype) AbstractNetwork.__init__(self, **kwargs) self.data_format = data_format self.device = "/cpu:0" @abc.abstractmethod def call(self, *args, **kwargs): """ Defines the model's forward pass Parameters ---------- *args : arbitrary positional arguments **kwargs : arbbitrary keyword arguments Raises ------ NotImplementedError If not overwritten by subclass """ raise NotImplementedError def __call__(self, *args, **kwargs): """ Executes the modules forward pass Parameters ---------- *args : arbitrary positional arguments **kwargs : arbitrary keyword arguments """ return self.call(*args, **kwargs) @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : str device for module inputs output_device : str device for module outputs Returns ------- dict dictionary containing data in correct type and shape and on correct device """ new_batch = {} with tf.device(output_device): new_batch["label"] = tf.convert_to_tensor( batch["label"].astype(np.float32)) with tf.device(input_device): for k, v in batch.items(): if k == "label": continue new_batch[k] = tf.convert_to_tensor(v.astype(np.float32)) return new_batch @staticmethod def closure(model, data_dict: dict, optimizers: typing.Dict[str, tf.train.Optimizer], losses: dict, iter_num: int, fold=0, **kwargs): """ default closure method to do a single training step; Could be overwritten for more advanced models Parameters ---------- model : :class:`SkLearnEstimator` trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters; ignored here, just passed for compatibility reasons losses : dict dict holding the losses to calculate errors; ignored here, just passed for compatibility reasons iter_num: int the number of of the current iteration in the current epoch; Will be restarted at zero at the beginning of every epoch fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Loss values (with same keys as input dict losses; will always be empty here) dict dictionary containing all predictions """ loss_vals = {} # calculate loss with graph created by gradient taping with tf.GradientTape() as tape: preds = model(data_dict["data"]) total_loss = None for k, loss_fn in losses.items(): _loss_val = loss_fn(preds["pred"], data_dict["label"]) loss_vals[k] = _loss_val.numpy() if total_loss is None: total_loss = _loss_val else: total_loss += _loss_val # calculate gradients grads = tape.gradient(total_loss, model.trainable_variables) # perform optimization step optimizers["default"].apply_gradients( zip(grads, model.trainable_variables)) return loss_vals, preds ================================================ FILE: delira/models/backends/tf_eager/data_parallel.py ================================================ import tensorflow as tf from delira.models.backends.tf_eager.abstract_network import \ AbstractTfEagerNetwork class DataParallelTfEagerNetwork(AbstractTfEagerNetwork): """ DataParallel Module for the TF eager execution backend Warnings -------- This Module is highly experimental and not guaranteed to work properly! """ def __init__(self, module, devices): """ Parameters ---------- module : :class:`AbstractTfEagerNetwork` the module to scatter across different devices devices : list list of ints specifying the GPU indices """ super().__init__() self._closure = module.closure self._prepare_batch = module.pepare_batch self.module = tf.keras.utils.multi_gpu_model(module, devices, True) def call(self, *args, **kwargs): """ Defines the forward pass of the module Parameters ---------- *args : arbitrary positional arguments **kwargs : arbitrary keyword arguments """ return self.module.call(*args, **kwargs) @property def closure(self): return self._closure @property def prepare_batch(self): return self._prepare_batch ================================================ FILE: delira/models/backends/tf_graph/__init__.py ================================================ from delira import get_backends as _get_backends if "TF" in _get_backends(): from delira.models.backends.tf_graph.abstract_network import \ AbstractTfGraphNetwork ================================================ FILE: delira/models/backends/tf_graph/abstract_network.py ================================================ import abc import logging import tensorflow as tf import numpy as np from delira.models.abstract_network import AbstractNetwork class AbstractTfGraphNetwork(AbstractNetwork, metaclass=abc.ABCMeta): """ Abstract Class for Tf Networks See Also -------- :class:`AbstractNetwork` """ @abc.abstractmethod def __init__(self, sess=tf.Session, **kwargs): """ Parameters ---------- **kwargs : keyword arguments (are passed to :class:`AbstractNetwork`'s ` __init__ to register them as init kwargs """ AbstractNetwork.__init__(self, **kwargs) self._sess = sess() self.inputs = {} self.outputs_train = {} self.outputs_eval = {} self._losses = None self._optims = None self.training = True def __call__(self, *args, **kwargs): """ Wrapper for calling self.run in eval setting Parameters ---------- *args : positional arguments (passed to `self.run`) **kwargs: keyword arguments (passed to `self.run`) Returns ------- Any result: module results of arbitrary type and number """ self.training = False return self.run(*args, **kwargs) def run(self, *args, **kwargs): """ Evaluates `self.outputs_train` or `self.outputs_eval` based on `self.training` Parameters ---------- *args : currently unused, exist for compatibility reasons **kwargs : kwargs used to feed as ``self.inputs``. Same keys as for ``self.inputs`` must be used Returns ------- dict sames keys as outputs_train or outputs_eval, containing evaluated expressions as values """ _feed_dict = {} for feed_key, feed_value in kwargs.items(): assert feed_key in self.inputs.keys(), \ "{} not found in self.inputs".format(feed_key) _feed_dict[self.inputs[feed_key]] = feed_value if self.training: return self._sess.run(self.outputs_train, feed_dict=_feed_dict) return self._sess.run(self.outputs_eval, feed_dict=_feed_dict) def _add_losses(self, losses: dict): """ Adds losses to model that are to be used by optimizers or during evaluation. Can be overwritten for more advanced loss behavior Parameters ---------- losses : dict dictionary containing all losses. Individual losses are averaged """ if self._losses is not None and losses: logging.warning('Change of losses is not yet supported') raise NotImplementedError() elif self._losses is not None and not losses: pass else: self._losses = {} for name, _loss in losses.items(): self._losses[name] = _loss(self.inputs["label"], self.outputs_train["pred"]) total_loss = tf.reduce_mean(list(self._losses.values()), axis=0) self._losses['total'] = total_loss self.outputs_train["losses"] = self._losses self.outputs_eval["losses"] = self._losses def _add_optims(self, optims: dict): """ Adds optims to model that are to be used by optimizers or during training. Can be overwritten for more advanced optimizers Parameters ---------- optim: dict dictionary containing all optimizers, optimizers should be of Type[tf.train.Optimizer] """ if self._optims is not None and optims: logging.warning('Change of optims is not yet supported') elif self._optims is not None and not optims: pass else: self._optims = optims['default'] grads = self._optims.compute_gradients(self._losses['total']) step = self._optims.apply_gradients(grads) self.outputs_train["default_step"] = step @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : Any device for module inputs (will be ignored here; just given for compatibility) output_device : Any device for module outputs (will be ignored here; just given for compatibility) Returns ------- dict dictionary containing data in correct type and shape and on correct device """ return {k: v.astype(np.float32) for k, v in batch.items()} @staticmethod def closure(model, data_dict: dict, optimizers: dict, losses: dict, iter_num: int, fold=0, **kwargs): """ default closure method to do a single training step; Could be overwritten for more advanced models Parameters ---------- model : :class:`SkLearnEstimator` trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters; ignored here, just passed for compatibility reasons losses : dict dict holding the losses to calculate errors; ignored here, just passed for compatibility reasons iter_num: int the number of of the current iteration in the current epoch; Will be restarted at zero at the beginning of every epoch fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Loss values (with same keys as input dict losses; will always be empty here) dict dictionary containing all predictions """ inputs = data_dict['data'] outputs = model.run(data=inputs, label=data_dict['label']) loss_vals = outputs['losses'] return loss_vals, outputs ================================================ FILE: delira/models/backends/torch/__init__.py ================================================ from delira import get_backends as _get_backends if "TORCH" in _get_backends(): from delira.models.backends.torch.abstract_network import \ AbstractPyTorchNetwork from delira.models.backends.torch.data_parallel import \ DataParallelPyTorchNetwork from delira.models.backends.torch.utils import scale_loss ================================================ FILE: delira/models/backends/torch/abstract_network.py ================================================ import abc import torch from delira.models.abstract_network import AbstractNetwork from delira.models.backends.torch.utils import scale_loss class AbstractPyTorchNetwork(AbstractNetwork, torch.nn.Module): """ Abstract Class for PyTorch Networks See Also -------- `torch.nn.Module` :class:`AbstractNetwork` """ @abc.abstractmethod def __init__(self, **kwargs): """ Parameters ---------- **kwargs : keyword arguments (are passed to :class:`AbstractNetwork`'s ` __init__ to register them as init kwargs """ torch.nn.Module.__init__(self) AbstractNetwork.__init__(self, **kwargs) @abc.abstractmethod def forward(self, *inputs): """ Forward inputs through module (defines module behavior) Parameters ---------- inputs : list inputs of arbitrary type and number Returns ------- Any result: module results of arbitrary type and number """ raise NotImplementedError() def __call__(self, *args, **kwargs): """ Calls Forward method Parameters ---------- *args : positional arguments (passed to `forward`) **kwargs : keyword arguments (passed to `forward`) Returns ------- Any result: module results of arbitrary type and number """ return torch.jit.ScriptModule.__call__(self, *args, **kwargs) @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : torch.device device for network inputs output_device : torch.device device for network outputs Returns ------- dict dictionary containing data in correct type and shape and on correct device """ return_dict = {"data": torch.from_numpy(batch["data"]).to( input_device).to(torch.float)} for key, vals in batch.items(): if key == "data": continue return_dict[key] = torch.from_numpy(vals).to(output_device).to( torch.float) return return_dict @staticmethod def closure(model, data_dict: dict, optimizers: dict, losses: dict, iter_num: int, fold=0, **kwargs): """ closure method to do a single backpropagation step Parameters ---------- model : :class:`AbstractPyTorchNetwork` trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters losses : dict dict holding the losses to calculate errors (gradients from different losses will be accumulated) iter_num: int the number of of the current iteration in the current epoch; Will be restarted at zero at the beginning of every epoch fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Loss values (with same keys as input dict losses) dict Arbitrary number of predictions as numpy array """ loss_vals = {} total_loss = 0 with torch.enable_grad(): # predict inputs = data_dict["data"] preds = model(inputs) # calculate losses for key, crit_fn in losses.items(): _loss_val = crit_fn(preds["pred"], data_dict["label"]) loss_vals[key] = _loss_val.item() total_loss += _loss_val optimizers['default'].zero_grad() # perform loss scaling via apex if half precision is enabled with scale_loss(total_loss, optimizers["default"]) as scaled_loss: scaled_loss.backward() optimizers['default'].step() return loss_vals, {k: v.detach() for k, v in preds.items()} ================================================ FILE: delira/models/backends/torch/data_parallel.py ================================================ import torch from delira.models.backends.torch.abstract_network import \ AbstractPyTorchNetwork class DataParallelPyTorchNetwork(AbstractPyTorchNetwork, torch.nn.DataParallel): """ A Wrapper around a :class:`AbstractPyTorchNetwork` instance to implement parallel training by splitting the batches """ def __init__(self, module: AbstractPyTorchNetwork, device_ids=None, output_device=None, dim=0): """ Parameters ---------- module : :class:`AbstractPyTorchNetwork` the module to wrap (will be replicated on all devices) device_ids : list a list containing the devices to use (either as strings or as :class:`chainer.backend.Device`). output_device : str or :class:`chainer.backend.Device` The output device Make sure, your labels are also on this device for loss calculation! If not specified, the second device of ``devices`` will be used for output gathering. dim : int the index of the batchdimension (usually 0, but can become e.g. 1 in NLP tasks) """ AbstractPyTorchNetwork.__init__(self) torch.nn.DataParallel.__init__(self, module, device_ids, output_device, dim) def forward(self, *args, **kwargs): """ Scatters the inputs (both positional and keyword arguments) across all devices, feeds them through model replicas and re-builds batches on output device Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- Any combined output from all scattered models """ return torch.nn.DataParallel.forward(*args, **kwargs) @property def closure(self): return self.module.closure @property def prepare_batch(self): return self.module.prepare_batch ================================================ FILE: delira/models/backends/torch/utils.py ================================================ import contextlib try: # use apex loss scaling if possible # (and enabled, this is done internally by apex) from apex import amp except ImportError: # use no loss scaling with same API if apex is unavailable amp = None @contextlib.contextmanager def scale_loss(loss, optimizers, loss_id=0, model=None, delay_unscale=False, **kwargs): """ Context Manager which automatically switches between loss scaling via apex.amp (if apex is available) and no loss scaling Parameters ---------- loss : :class:`torch.Tensor` a pytorch tensor containing the loss value optimizers : list a list of :class:`torch.optim.Optimizer` containing all optimizers, which are holding paraneters affected by the backpropagation of the current loss value loss_id : int When used in conjunction with the ``num_losses`` argument to ``amp.initialize``, enables Amp to use a different loss scale per loss. ``loss_id`` must be an integer between 0 and ``num_losses`` that tells Amp which loss is being used for the current backward pass. If ``loss_id`` is left unspecified, Amp will use the default global loss scaler for this backward pass. model : :class:`AbstractPyTorchNetwork` or None Currently unused, reserved to enable future optimizations. delay_unscale : bool ``delay_unscale`` is never necessary, and the default value of ``False`` is strongly recommended. If ``True``, Amp will not unscale the gradients or perform model->master gradient copies on context manager exit. ``delay_unscale=True`` is a minor ninja performance optimization and can result in weird gotchas (especially with multiple models/optimizers/losses), so only use it if you know what you're doing. **kwargs : additional keyword arguments; currently unused, but provided for the case amp decides to extend the functionality here Yields ------ :class:`torch.Tensor` the new loss value (scaled if apex.amp is available and was configured to do so, unscaled in all other cases) """ if amp is None: yield loss else: with amp.scale_loss(loss=loss, optimizers=optimizers, loss_id=loss_id, model=model, delay_unscale=delay_unscale, **kwargs) as _loss: yield _loss ================================================ FILE: delira/models/backends/torchscript/__init__.py ================================================ from delira import get_backends as _get_backends if "TORCH" in _get_backends(): from .abstract_network import AbstractTorchScriptNetwork ================================================ FILE: delira/models/backends/torchscript/abstract_network.py ================================================ import abc import torch from delira.models.abstract_network import AbstractNetwork class AbstractTorchScriptNetwork(AbstractNetwork, torch.jit.ScriptModule): """ Abstract Interface Class for TorchScript Networks. For more information have a look at https://pytorch.org/docs/stable/jit.html#torchscript Warnings -------- In addition to the here defined API, a forward function must be implemented and decorated with ``@torch.jit.script_method`` """ @abc.abstractmethod def __init__(self, optimize=True, **kwargs): """ Parameters ---------- optimize : bool whether to optimize the network graph or not; default: True **kwargs : additional keyword arguments (passed to :class:`AbstractNetwork`) """ torch.jit.ScriptModule.__init__(self, optimize=optimize) AbstractNetwork.__init__(self, **kwargs) def __call__(self, *args, **kwargs): """ Calls Forward method Parameters ---------- *args : positional arguments (passed to `forward`) **kwargs : keyword arguments (passed to `forward`) Returns ------- Any result: module results of arbitrary type and number """ return torch.jit.ScriptModule.__call__(self, *args, **kwargs) @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : torch.device device for network inputs output_device : torch.device device for network outputs Returns ------- dict dictionary containing data in correct type and shape and on correct device """ return_dict = {"data": torch.from_numpy(batch["data"]).to( input_device).to(torch.float)} for key, vals in batch.items(): if key == "data": continue return_dict[key] = torch.from_numpy(vals).to(output_device).to( torch.float) return return_dict @staticmethod def closure(model, data_dict: dict, optimizers: dict, losses: dict, iter_num: int, fold=0, **kwargs): """ closure method to do a single backpropagation step Parameters ---------- model : :class:`AbstractTorchScriptNetwork` trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters losses : dict dict holding the losses to calculate errors (gradients from different losses will be accumulated) iter_num: int the number of of the current iteration in the current epoch; Will be restarted at zero at the beginning of every epoch fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Loss values (with same keys as input dict losses) dict Arbitrary number of predictions as numpy array """ loss_vals = {} total_loss = 0 with torch.enable_grad(): # predict inputs = data_dict["data"] preds = model(inputs) # calculate losses for key, crit_fn in losses.items(): _loss_val = crit_fn(preds["pred"], data_dict["label"]) loss_vals[key] = _loss_val.item() total_loss += _loss_val optimizers['default'].zero_grad() # apex does not yet support torchscript total_loss.backward() optimizers['default'].step() return loss_vals, {k: v.detach() for k, v in preds.items()} ================================================ FILE: delira/training/__init__.py ================================================ from delira.training.base_experiment import BaseExperiment from delira.training.base_trainer import BaseNetworkTrainer from delira.training.predictor import Predictor from delira.training.backends import * ================================================ FILE: delira/training/backends/__init__.py ================================================ from delira import get_backends as _get_backends if "CHAINER" in _get_backends(): from delira.training.backends.chainer import * if "SKLEARN" in _get_backends(): from delira.training.backends.sklearn import * if "TF" in _get_backends(): from delira.training.backends.tf_graph import * from delira.training.backends.tf_eager import * if "TORCH" in _get_backends(): from delira.training.backends.torch import * from delira.training.backends.torchscript import * ================================================ FILE: delira/training/backends/chainer/__init__.py ================================================ from delira import get_backends as _get_backends if "CHAINER" in _get_backends(): from delira.training.backends.chainer.trainer import ChainerNetworkTrainer from delira.training.backends.chainer.experiment import ChainerExperiment from delira.training.backends.chainer.utils import convert_to_numpy \ as convert_chainer_to_numpy from delira.training.backends.chainer.utils import create_optims_default \ as create_chainer_optims_default ================================================ FILE: delira/training/backends/chainer/experiment.py ================================================ import typing from functools import partial from delira.models.backends.chainer import AbstractChainerNetwork from delira.data_loading import DataManager from delira.training.base_experiment import BaseExperiment from delira.utils import DeliraConfig from delira.training.backends.chainer.utils import create_optims_default from delira.training.backends.chainer.utils import convert_to_numpy from delira.training.backends.chainer.trainer import ChainerNetworkTrainer class ChainerExperiment(BaseExperiment): def __init__(self, config: typing.Union[str, DeliraConfig], model_cls: AbstractChainerNetwork, n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, optim_builder=create_optims_default, checkpoint_freq=1, trainer_cls=ChainerNetworkTrainer, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` or str the training config, if string is passed, it is treated as a path to a file, where the config is loaded from model_cls : Subclass of :class:`AbstractChainerNetwork` the class implementing the model to train n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API), if no keymapping is given, a default key_mapping of {"x": "data"} will be used here val_score_key : str or None key defining which metric to use for validation (determining best model and scheduling lr); if None: No validation-based operations will be done (model might still get validated, but validation metrics can only be logged and not used further) optim_builder : function Function returning a dict of backend-specific optimizers. defaults to :func:`create_optims_default_chainer` checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`ChainerNetworkTrainer` the trainer class to use for training the model, defaults to :class:`ChainerNetworkTrainer` **kwargs : additional keyword arguments """ if key_mapping is None: key_mapping = {"x": "data"} super().__init__(config=config, model_cls=model_cls, n_epochs=n_epochs, name=name, save_path=save_path, key_mapping=key_mapping, val_score_key=val_score_key, optim_builder=optim_builder, checkpoint_freq=checkpoint_freq, trainer_cls=trainer_cls, **kwargs) def test(self, network: AbstractChainerNetwork, test_data: DataManager, metrics: dict, metric_keys=None, verbose=False, prepare_batch=None, convert_fn=convert_to_numpy, **kwargs): """ Setup and run testing on a given network Parameters ---------- network : :class:`AbstractNetwork` the (trained) network to test test_data : :class:`DataManager` the data to use for testing metrics : dict the metrics to calculate metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation verbose : bool verbosity of the test process prepare_batch : function function to convert a batch-dict to a format accepted by the model. This conversion typically includes dtype-conversion, reshaping, wrapping to backend-specific tensors and pushing to correct devices. If not further specified uses the ``network``'s ``prepare_batch`` with CPU devices convert_fn : function function to convert a batch of tensors to numpy if not specified defaults to :func:`convert_chainer_tensor_to_npy` **kwargs : additional keyword arguments Returns ------- dict all predictions obtained by feeding the ``test_data`` through the ``network`` dict all metrics calculated upon the ``test_data`` and the obtained predictions """ # use backend-specific and model-specific prepare_batch fn # (runs on same device as passed network per default) device = network.device if prepare_batch is None: prepare_batch = partial(network.prepare_batch, input_device=device, output_device=device) return super().test(network=network, test_data=test_data, metrics=metrics, metric_keys=metric_keys, verbose=verbose, prepare_batch=prepare_batch, convert_fn=convert_fn, **kwargs) ================================================ FILE: delira/training/backends/chainer/trainer.py ================================================ from delira.training.backends.chainer.utils import convert_to_numpy from delira.training.backends.chainer.utils import create_optims_default from delira.training.callbacks.logging_callback import DefaultLoggingCallback from delira.io.chainer import load_checkpoint, save_checkpoint from delira.models.backends.chainer import AbstractChainerNetwork, \ DataParallelChainerNetwork, \ DataParallelChainerOptimizer from delira.training.base_trainer import BaseNetworkTrainer import chainer from batchgenerators.dataloading import MultiThreadedAugmenter import os import logging from functools import partial logger = logging.getLogger(__name__) class ChainerNetworkTrainer(BaseNetworkTrainer): """ Train and Validate a Network See Also -------- :class:`AbstractNetwork` """ def __init__(self, network: AbstractChainerNetwork, save_path: str, key_mapping, losses=None, optimizer_cls=None, optimizer_params=None, metrics=None, lr_scheduler_cls=None, lr_scheduler_params=None, gpu_ids=None, save_freq=1, optim_fn=create_optims_default, logging_type="tensorboardx", logging_kwargs=None, logging_callback_cls=DefaultLoggingCallback, logging_frequencies=None, logging_reduce_types=None, fold=0, callbacks=None, start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_to_numpy, mixed_precision=False, val_freq=1, ** kwargs): """ Parameters ---------- network : :class:`AbstractChainerNetwork` the network to train save_path : str path to save networks to key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` losses : dict dictionary containing the training losses optimizer_cls : subclass of chainer.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict keyword arguments passed to optimizer during construction metrics : dict, optional metrics, which will be evaluated during train and validation phase (should work on numpy arrays) lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs optim_fn : function creates a dictionary containing all necessary optimizers logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. logging_reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict dict specifying which batch_dict entry to use for which metric as target; default: None, which will result in key "label" for all metrics convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is a function, which detaches the tensor, moves it to cpu and then calls ``.array`` on it mixed_precision : bool whether to use mixed precision or not (False per default) val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : additional keyword arguments """ # prevent mutable defaults if callbacks is None: callbacks = [] if logging_kwargs is None: logging_kwargs = {} if gpu_ids is None: gpu_ids = [] if lr_scheduler_params is None: lr_scheduler_params = {} if metrics is None: metrics = {} if optimizer_params is None: optimizer_params = {} super().__init__(network=network, save_path=save_path, losses=losses, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, metrics=metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, gpu_ids=gpu_ids, save_freq=save_freq, optim_fn=optim_fn, key_mapping=key_mapping, logging_type=logging_type, logging_kwargs=logging_kwargs, logging_callback_cls=logging_callback_cls, logging_frequencies=logging_frequencies, logging_reduce_types=logging_reduce_types, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, val_freq=val_freq, **kwargs ) self._setup(network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, mixed_precision, callbacks) for key, val in kwargs.items(): setattr(self, key, val) def _setup(self, network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, mixed_precision, callbacks): """ Defines the Trainers Setup Parameters ---------- network : :class:`AbstractChainerNetwork` the network to train optim_fn : function creates a dictionary containing all necessary optimizers optimizer_cls : subclass of torch.optim.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead convert_batch_to_npy_fn : type function converting a batch-tensor to numpy mixed_precision : bool whether to use mixed precision or not (False per default) callbacks : list initial callbacks to register """ self.optimizers = optim_fn(network, optimizer_cls, **optimizer_params) super()._setup(network, None, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, network.prepare_batch, callbacks) if mixed_precision: # enable chainer mixed precision globally chainer.global_config.dtype = chainer.mixed16 # Load latest epoch file if available if os.path.isdir(self.save_path): latest_state_path, latest_epoch = self._search_for_prev_state( self.save_path) if latest_state_path is not None: # if pth file does not exist, load pt file instead if not os.path.isfile(latest_state_path): latest_state_path = latest_state_path[:-1] logger.info("Attempting to load state from previous \ training from %s" % latest_state_path) self.update_state(latest_state_path) self.start_epoch = latest_epoch if chainer.chainerx.is_available(): gpu_device_prefix = "cuda:" cpu_device_prefix = "native" else: gpu_device_prefix = "@cupy:" cpu_device_prefix = "@numpy" if gpu_ids: try: if chainer.cuda.check_cuda_available(): self.use_gpu = True if len(gpu_ids) > 1: # use GPU 0 as default input GPU self.input_device = chainer.get_device( gpu_device_prefix + str(gpu_ids[0])) # Train on multiple GPUs and use GPU 0 as output # device self.module = DataParallelChainerNetwork( self.module.to_device("@numpy"), devices=[chainer.get_device( gpu_device_prefix + str(_id)) for _id in gpu_ids]) # ToDo: Creating Multiple DataParallelOptimizers is # kinda tricky right now, since we need to add the # class itself to the parameters and use # DataParallelOptimizer as optimizer class. # Should look for other possibility, # but currently I don't know any self.optimizers = optim_fn( DataParallelChainerOptimizer, {**optimizer_params, "optim_cls": optimizer_cls}) self.output_device = chainer.get_device( gpu_device_prefix + str(gpu_ids[0])) else: # use the only available GPU as input device self.input_device = chainer.get_device( cpu_device_prefix) self.module = self.module.to_device( self.input_device) # use GPU 0 as output device as output device self.output_device = chainer.get_device( cpu_device_prefix) else: # cuda unavailable -> no GPU support self.use_gpu = False self.input_device = chainer.get_device( cpu_device_prefix) self.output_device = chainer.get_device( cpu_device_prefix) self.module = self.module.to_device(self.input_device) # thrown if Cupy is unavailable -> no GPU support except RuntimeError as e: logging.exception(e) self.use_gpu = False self.input_device = chainer.get_device(cpu_device_prefix) self.output_device = chainer.get_device(cpu_device_prefix) self.module = self.module.to_device(self.input_device) # no gpu indices given else: self.use_gpu = False self.input_device = chainer.get_device(cpu_device_prefix) self.output_device = chainer.get_device(cpu_device_prefix) self.module = self.module.to_device(self.input_device) self._prepare_batch = partial( self._prepare_batch, input_device=self.input_device, output_device=self.output_device) def _at_training_begin(self, *args, **kwargs): """ Defines behaviour at beginning of training Parameters ---------- *args : positional arguments **kwargs : keyword arguments """ for cbck in self._callbacks: self._update_state(cbck.at_training_begin(self, *args, **kwargs)) self.save_state(os.path.join( self.save_path, "checkpoint_epoch_%d" % self.start_epoch), self.start_epoch) def _at_training_end(self, *args, **kwargs): """ Defines Behaviour at end of training: Loads best model if available Returns ------- :class:`AbstractPyTorchNetwork` best network """ if os.path.isfile(os.path.join(self.save_path, 'checkpoint_best.chain')): # load best model and return it self.update_state(os.path.join(self.save_path, 'checkpoint_best.chain')) return super()._at_training_end(*args, **kwargs) def _at_epoch_end(self, metrics_val, val_score_key, epoch, is_best, **kwargs): """ Defines behaviour at beginning of each epoch: Executes all callbacks's `at_epoch_end` method and saves current state if necessary Parameters ---------- metrics_val : dict validation metrics val_score_key : str validation score key epoch : int current epoch num_epochs : int total number of epochs is_best : bool whether current model is best one so far **kwargs : keyword arguments """ for cb in self._callbacks: self._update_state(cb.at_epoch_end(self, val_metrics=metrics_val, val_score_key=val_score_key, curr_epoch=epoch)) if epoch % self.save_freq == 0: self.save_state( os.path.join( self.save_path, "checkpoint_epoch_%d.chain" % epoch), epoch) if is_best: self.save_state(os.path.join(self.save_path, "checkpoint_best.chain"), epoch) def _train_single_epoch(self, batchgen: MultiThreadedAugmenter, epoch, verbose=False): """ Trains the network a single epoch Parameters ---------- batchgen : MultiThreadedAugmenter Generator yielding the training batches epoch : int current epoch """ chainer.global_config.train = True return super()._train_single_epoch(batchgen, epoch, verbose=verbose) def predict_data_mgr(self, datamgr, batchsize=None, metrics={}, metric_keys={}, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False **kwargs : additional keyword arguments Returns ------- dict predictions dict calculated metrics """ chainer.global_config.train = False return super().predict_data_mgr(datamgr, batchsize, metrics, metric_keys, verbose, **kwargs) def save_state(self, file_name, epoch, **kwargs): """ saves the current state via :func:`delira.io.chainer.save_checkpoint` Parameters ---------- file_name : str filename to save the state to epoch : int current epoch (will be saved for mapping back) *args : positional arguments **kwargs : keyword arguments """ if not file_name.endswith(".chain"): file_name = file_name + ".chain" save_checkpoint(file_name, self.module, self.optimizers, **kwargs) @staticmethod def load_state(file_name, **kwargs): """ Loads the new state from file via :func:`delira.io.chainer.load_checkpoint` Parameters ---------- file_name : str the file to load the state from **kwargs : keyword arguments Returns ------- dict new state """ if not file_name.endswith(".chain"): file_name = file_name + ".chain" return load_checkpoint(file_name, **kwargs) def update_state(self, file_name, *args, **kwargs): """ Update internal state from a loaded state Parameters ---------- file_name : str file containing the new state to load *args : positional arguments **kwargs : keyword arguments Returns ------- :class:`BaseNetworkTrainer` the trainer with a modified state """ self._update_state(self.load_state(file_name, old_state={ "model": self.module, "optimizers": self.optimizers}, **kwargs)) def _update_state(self, new_state): """ Update the state from a given new state Parameters ---------- new_state : dict new state to update internal state from Returns ------- :class:`ChainerNetworkTrainer` the trainer with a modified state """ if "model" in new_state: self.module = new_state.pop("model") if "optimizers" in new_state and new_state["optimizers"]: self.optimizers = new_state.pop("optimizers") if "epoch" in new_state: self.start_epoch = new_state.pop("epoch") return super()._update_state(new_state) @staticmethod def _search_for_prev_state(path, extensions=None): """ Helper function to search in a given path for previous epoch states (indicated by extensions) Parameters ---------- path : str the path to search in extensions : list list of strings containing valid file extensions for checkpoint files Returns ------- str the file containing the latest checkpoint (if available) None if no latest checkpoint was found int the latest epoch (1 if no checkpoint was found) """ if extensions is None: extensions = [".chain"] return BaseNetworkTrainer._search_for_prev_state(path, extensions) ================================================ FILE: delira/training/backends/chainer/utils.py ================================================ import chainer from delira.models.backends.chainer import DataParallelChainerOptimizer from delira.training.utils import convert_to_numpy_identity, \ recursively_convert_elements def _single_element_tensor_conversion(element): element.to_cpu() return element.array def convert_to_numpy(*args, **kwargs): """ Converts all chainer variables in args and kwargs to numpy array Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- list converted positional arguments dict converted keyboard arguments """ args = recursively_convert_elements(args, chainer.Variable, _single_element_tensor_conversion) kwargs = recursively_convert_elements(kwargs, chainer.Variable, _single_element_tensor_conversion) return convert_to_numpy_identity(*args, **kwargs) def create_optims_default(model, optim_cls, **optimizer_params): """ Default function to create a single optimizer for chainer (also supports Data-Parallel) Parameters ---------- model : :class:`chainer.Link` the model, which should be updated by the optimizer optim_cls : type the optimizer class implementing the actual parameter update optimizer_params : dict the params used for initializing an instance of ``optim_cls`` Returns ------- dict dictionary containing the created optimizer (key: "default") """ if issubclass(optim_cls, DataParallelChainerOptimizer): optim = optim_cls.from_optimizer_class(**optimizer_params) else: optim = optim_cls(**optimizer_params) optim = optim.setup(model) return {"default": optim} ================================================ FILE: delira/training/backends/sklearn/__init__.py ================================================ from delira import get_backends as _get_backends if "SKLEARN" in _get_backends(): from delira.training.backends.sklearn.trainer import \ SklearnEstimatorTrainer from delira.training.backends.sklearn.experiment import SklearnExperiment from delira.training.backends.sklearn.utils import create_optims_default \ as create_sklearn_optims_default ================================================ FILE: delira/training/backends/sklearn/experiment.py ================================================ from functools import partial import typing import os from sklearn.base import BaseEstimator from delira.models.backends.sklearn import SklearnEstimator from delira.training.base_experiment import BaseExperiment from delira.utils import DeliraConfig from delira.training.backends.sklearn.trainer import SklearnEstimatorTrainer class SklearnExperiment(BaseExperiment): def __init__(self, config: typing.Union[str, DeliraConfig], model_cls: BaseEstimator, n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, checkpoint_freq=1, trainer_cls=SklearnEstimatorTrainer, model_wrapper_cls=SklearnEstimator, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` or str the training config, if string is passed, it is treated as a path to a file, where the config is loaded from model_cls : Subclass of :class:`sklearn.base.BaseEstimator` the class implementing the model to train (will be wrapped by :class:`SkLearnEstimator`) n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API), if no keymapping is given, a default key_mapping of {"X": "X"} will be used here checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`SkLearnEstimatorTrainer` the trainer class to use for training the model, defaults to :class:`PyTorchNetworkTrainer` model_wrapper_cls : subclass of :class:`SkLearnEstimator` class wrapping the actual sklearn model to provide delira compatibility **kwargs : additional keyword arguments """ if key_mapping is None: key_mapping = {"X": "X"} super().__init__(config=config, model_cls=model_cls, n_epochs=n_epochs, name=name, save_path=save_path, key_mapping=key_mapping, val_score_key=val_score_key, checkpoint_freq=checkpoint_freq, trainer_cls=trainer_cls, **kwargs) self._model_wrapper_cls = model_wrapper_cls def _setup_training(self, config, **kwargs): """ Handles the setup for training case Parameters ---------- config : :class:`DeliraConfig` the config containing the model and training kwargs **kwargs : additional keyword arguments Returns ------- :class:`BaseNetworkTrainer` the created trainer """ model_kwargs = config.model_params model_kwargs = {**model_kwargs["variable"], **model_kwargs["fixed"]} _model = self.model_cls(**model_kwargs) model = self._model_wrapper_cls(_model) training_params = config.training_params metrics = training_params.nested_get("metrics") # necessary for resuming training from a given path save_path = kwargs.pop("save_path", os.path.join( self.save_path, "checkpoints", "run_%02d" % self._run)) return self.trainer_cls( estimator=model, save_path=save_path, key_mapping=self.key_mapping, metrics=metrics, save_freq=self.checkpoint_freq, **kwargs ) def _setup_test(self, config, model, convert_batch_to_npy_fn, prepare_batch_fn, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` the config containing the model and training kwargs (ignored here, just passed for subclassing and unified API) model : :class:`sklearn.base.BaseEstimator` the model to test convert_batch_to_npy_fn : function function to convert a batch of tensors to numpy prepare_batch_fn : function function to convert a batch-dict to a format accepted by the model. This conversion typically includes dtype-conversion, reshaping, wrapping to backend-specific tensors and pushing to correct devices **kwargs : additional keyword arguments Returns ------- :class:`Predictor` the created predictor """ if not isinstance(model, SklearnEstimator): model = SklearnEstimator(model) if prepare_batch_fn is None: prepare_batch_fn = partial(model.prepare_batch, input_device="cpu", output_device="cpu") return super()._setup_test(config, model, convert_batch_to_npy_fn, prepare_batch_fn, **kwargs) ================================================ FILE: delira/training/backends/sklearn/trainer.py ================================================ from delira.training.backends.sklearn.utils import create_optims_default from delira.training.utils import convert_to_numpy_identity as \ convert_to_numpy from delira.training.base_trainer import BaseNetworkTrainer from delira.io.sklearn import save_checkpoint, load_checkpoint from delira.models.backends.sklearn import SklearnEstimator from delira.data_loading import DataManager from delira.data_loading.sampler import RandomSamplerWithReplacement, \ RandomSamplerNoReplacement from delira.training.callbacks.logging_callback import DefaultLoggingCallback import os import logging import numpy as np from tqdm.auto import tqdm from functools import partial logger = logging.getLogger(__name__) class SklearnEstimatorTrainer(BaseNetworkTrainer): """ Train and Validate a ``sklearn`` estimator See Also -------- :class:`SkLearnEstimator` """ def __init__(self, estimator: SklearnEstimator, save_path: str, key_mapping, metrics=None, save_freq=1, logging_type="tensorboardx", logging_kwargs=None, logging_callback_cls=DefaultLoggingCallback, logging_frequencies=None, logging_reduce_types=None, fold=0, callbacks=None, start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_to_numpy, val_freq=1, ** kwargs): """ Parameters ---------- estimator : :class:`SklearnEstimator` the estimator to train save_path : str path to save networks to key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` metrics : dict, optional metrics, which will be evaluated during train and validation phase (should work on numpy arrays) save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. logging_reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict dict specifying which batch_dict entry to use for which metric as target; default: None, which will result in key "label" for all metrics convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is a function, returning the inputs without changing anything val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : additional keyword arguments """ # prevent mutable defaults if callbacks is None: callbacks = [] if logging_kwargs is None: logging_kwargs = {} if metrics is None: metrics = {} super().__init__(network=estimator, save_path=save_path, losses={}, optimizer_cls=None, optimizer_params={}, metrics=metrics, lr_scheduler_cls=None, lr_scheduler_params={}, gpu_ids=[], save_freq=save_freq, optim_fn=create_optims_default, key_mapping=key_mapping, logging_type=logging_type, logging_kwargs=logging_kwargs, logging_callback_cls=logging_callback_cls, logging_frequencies=logging_frequencies, logging_reduce_types=logging_reduce_types, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, val_freq=val_freq, **kwargs ) self._setup(estimator, key_mapping, convert_batch_to_npy_fn, callbacks) for key, val in kwargs.items(): setattr(self, key, val) def _setup(self, estimator, key_mapping, convert_batch_to_npy_fn, callbacks): """ Defines the Trainers Setup Parameters ---------- estimator : :class:`SkLearnEstimator` the network to train key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` convert_batch_to_npy_fn : type function converting a batch-tensor to numpy callbacks : list initial callbacks to register """ self.optimizers = create_optims_default() super()._setup(estimator, None, {}, [], key_mapping, convert_batch_to_npy_fn, estimator.prepare_batch, callbacks) # Load latest epoch file if available if os.path.isdir(self.save_path): # check all files in directory starting with "checkpoint" and # not ending with "_best.pth" latest_state_path, latest_epoch = self._search_for_prev_state( self.save_path) # if list is not empty: load previous state if latest_state_path is not None: self.update_state(latest_state_path) self.start_epoch = latest_epoch self.use_gpu = False self.input_device = "cpu" self.output_device = "cpu" self._prepare_batch = partial( self._prepare_batch, input_device=self.input_device, output_device=self.output_device) def _at_training_begin(self, *args, **kwargs): """ Defines behaviour at beginning of training Parameters ---------- *args : positional arguments **kwargs : keyword arguments """ for cbck in self._callbacks: self._update_state(cbck.at_training_begin(self, *args, **kwargs)) self.save_state(os.path.join( self.save_path, "checkpoint_epoch_%d" % self.start_epoch), self.start_epoch) def _at_training_end(self, *args, **kwargs): """ Defines Behaviour at end of training: Loads best model if available Returns ------- :class:`SkLearnEstimator` best network """ if os.path.isfile(os.path.join(self.save_path, 'checkpoint_best.pkl')): # load best model and return it self.update_state(os.path.join(self.save_path, 'checkpoint_best.pkl')) return super()._at_training_end(*args, **kwargs) def _at_epoch_end(self, metrics_val, val_score_key, epoch, is_best, **kwargs): """ Defines behaviour at beginning of each epoch: Executes all callbacks's `at_epoch_end` method and saves current state if necessary Parameters ---------- metrics_val : dict validation metrics val_score_key : str validation score key epoch : int current epoch num_epochs : int total number of epochs is_best : bool whether current model is best one so far **kwargs : keyword arguments """ for cb in self._callbacks: self._update_state(cb.at_epoch_end(self, val_metrics=metrics_val, val_score_key=val_score_key, curr_epoch=epoch)) if epoch % self.save_freq == 0: self.save_state(os.path.join(self.save_path, "checkpoint_epoch_%d.pkl" % epoch), epoch) if is_best: self.save_state(os.path.join(self.save_path, "checkpoint_best.pkl"), epoch) def _get_classes_if_necessary(self, dmgr: DataManager, verbose, label_key=None): """ Checks if available classes have to be collected before starting the training to dynamically build the estimator (not all batches contain all classes) and collects them if necessary Parameters ---------- dmgr : :class:`DataManager` the datamanager to collect the classes from verbose : bool verbosity label_key : str or None the key corresponding to the target value inside the data dict """ if label_key is None or not hasattr(self.module, "classes"): return dset = dmgr.dataset if verbose: iterable = tqdm(enumerate(dset), unit=' sample', total=len( dset), desc="Creating unique targets to estimate " "classes") else: iterable = enumerate(dset) unique_targets = [] # iterate over dataset for sample_idx, sample in iterable: item = sample[label_key] if item not in unique_targets: # convert item if necessary if np.isscalar(item): item = np.array([item]) unique_targets.append(item) # sort and concatenate items and feed variable inside the module unique_targets = np.concatenate(list(sorted(unique_targets))) self.module.classes = unique_targets def train(self, num_epochs, datamgr_train, datamgr_valid=None, val_score_key=None, val_score_mode='highest', reduce_mode='mean', verbose=True, label_key="label"): """ Defines a routine to train a specified number of epochs Parameters ---------- num_epochs : int number of epochs to train datamgr_train : DataManager the datamanager holding the train data datamgr_valid : DataManager the datamanager holding the validation data (default: None) val_score_key : str the key specifying which metric to use for validation (default: None) val_score_mode : str key specifying what kind of validation score is best reduce_mode : str 'mean','sum','first_only' verbose : bool whether to show progress bars or not label_key : str or None key specifiying the value inside the batch dict to use for class collection if necessary Raises ------ NotImplementedError If not overwritten by subclass """ if self.module.iterative_training: # estimate classes from validation data if datamgr_valid is not None: self._get_classes_if_necessary(datamgr_valid, verbose, label_key) else: self._get_classes_if_necessary(datamgr_train, verbose, label_key) else: # Setting batchsize to length of dataset and replacing random # sampler_old with replacement by random sampler_old without # replacement ensures, that each sample is present in each # batch and only one batch is sampled per epoch datamgr_train.batchsize = len(datamgr_train.dataset) if issubclass(datamgr_train.sampler_cls, RandomSamplerWithReplacement): datamgr_train.sampler_cls = RandomSamplerNoReplacement # additionally setting the number of epochs to train ensures, # that only one epoch consisting of one batch (which holds the # whole dataset) is used for training if num_epochs > 1: logging.info( "An epoch number greater than 1 is given, " "but the current module does not support " "iterative training. Falling back to usual " "dataset fitting. For huge datasets, this " "might easily result in out of memory errors!") num_epochs = 1 return super().train(num_epochs, datamgr_train, datamgr_valid, val_score_key, val_score_mode, reduce_mode, verbose) def save_state(self, file_name, epoch, **kwargs): """ saves the current state via :func:`delira.io.sklearn.save_checkpoint` Parameters ---------- file_name : str filename to save the state to epoch : int current epoch (will be saved for mapping back) *args : positional arguments **kwargs : keyword arguments """ if not file_name.endswith(".pkl"): file_name = file_name + ".pkl" save_checkpoint(file_name, self.module, epoch, **kwargs) @staticmethod def load_state(file_name, *args, **kwargs): """ Loads the new state from file via :func:`delira.io.sklearn.load_checkpoint` Parameters ---------- file_name : str the file to load the state from **kwargs : keyword arguments Returns ------- dict new state """ if not file_name.endswith(".pkl"): file_name = file_name + ".pkl" return load_checkpoint(file_name, **kwargs) def _update_state(self, new_state): """ Update the state from a given new state Parameters ---------- new_state : dict new state to update internal state from Returns ------- :class:`SkLearnEstimatorTrainer` the trainer with a modified state """ if "model" in new_state: self.module = new_state.pop("model") if "epoch" in new_state: self.start_epoch = new_state.pop("epoch") return super()._update_state(new_state) @staticmethod def _search_for_prev_state(path, extensions=None): """ Helper function to search in a given path for previous epoch states (indicated by extensions) Parameters ---------- path : str the path to search in extensions : list list of strings containing valid file extensions for checkpoint files Returns ------- str the file containing the latest checkpoint (if available) None if no latst checkpoint was found int the latest epoch (1 if no checkpoint was found) """ if extensions is None: extensions = [".pkl"] return BaseNetworkTrainer._search_for_prev_state(path, extensions) @staticmethod def calc_metrics(batch, metrics: dict = None, metric_keys=None): if metrics is None: metrics = {} if metric_keys is None: metric_keys = {k: ("pred", "y") for k in metrics.keys()} return BaseNetworkTrainer.calc_metrics(batch, metrics, metric_keys) ================================================ FILE: delira/training/backends/sklearn/utils.py ================================================ def create_optims_default(*args, **kwargs): """ Function returning an empty optimizer dict Parameters ---------- *args : arbitrary positional arguments (ignored; only provided for api conformity) **kwargs : arbitrary keyword arguments (ignored; only provided for api conformity) Returns ------- dict empty dictionary """ return {} ================================================ FILE: delira/training/backends/tf_eager/__init__.py ================================================ from delira import get_backends as _get_backends if "TF" in _get_backends(): from delira.training.backends.tf_eager.experiment import TfEagerExperiment from delira.training.backends.tf_eager.trainer import TfEagerNetworkTrainer from delira.training.backends.tf_eager.utils import convert_to_numpy \ as convert_tfeager_to_numpy from delira.training.backends.tf_eager.utils import create_optims_default \ as create_tfeager_optims_default ================================================ FILE: delira/training/backends/tf_eager/experiment.py ================================================ import typing from functools import partial import tensorflow as tf from delira.data_loading import DataManager from delira.models.backends.tf_eager import AbstractTfEagerNetwork from delira.training.base_experiment import BaseExperiment from delira.utils import DeliraConfig from delira.training.backends.tf_eager.trainer import TfEagerNetworkTrainer from delira.training.backends.tf_eager.utils import create_optims_default from delira.training.backends.tf_eager.utils import convert_to_numpy class TfEagerExperiment(BaseExperiment): def __init__(self, config: typing.Union[str, DeliraConfig], model_cls: AbstractTfEagerNetwork, n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, optim_builder=create_optims_default, checkpoint_freq=1, trainer_cls=TfEagerNetworkTrainer, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` or str the training config, if string is passed, it is treated as a path to a file, where the config is loaded from model_cls : Subclass of :class:`AbstractTfEagerNetwork` the class implementing the model to train n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API), if no keymapping is given, a default key_mapping of {"x": "data"} will be used here val_score_key : str or None key defining which metric to use for validation (determining best model and scheduling lr); if None: No validation-based operations will be done (model might still get validated, but validation metrics can only be logged and not used further) optim_builder : function Function returning a dict of backend-specific optimizers. defaults to :func:`create_optims_default` checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`TfEagerNetworkTrainer` the trainer class to use for training the model, defaults to :class:`TfNetworkTrainer` **kwargs : additional keyword arguments """ if key_mapping is None: key_mapping = {"x": "data"} super().__init__(config=config, model_cls=model_cls, n_epochs=n_epochs, name=name, save_path=save_path, key_mapping=key_mapping, val_score_key=val_score_key, optim_builder=optim_builder, checkpoint_freq=checkpoint_freq, trainer_cls=trainer_cls, **kwargs) def kfold(self, data: DataManager, metrics: dict, num_epochs=None, num_splits=None, shuffle=False, random_seed=None, split_type="random", val_split=0.2, label_key="label", train_kwargs: dict = None, test_kwargs: dict = None, metric_keys: dict = None, config=None, verbose=False, **kwargs): """ Performs a k-Fold cross-validation Parameters ---------- data : :class:`DataManager` the data to use for training(, validation) and testing. Will be split based on ``split_type`` and ``val_split`` metrics : dict dictionary containing the metrics to evaluate during k-fold num_epochs : int or None number of epochs to train (if not given, will either be extracted from ``config``, ``self.config`` or ``self.n_epochs``) num_splits : int or None the number of splits to extract from ``data``. If None: uses a default of 10 shuffle : bool whether to shuffle the data before splitting or not (implemented by index-shuffling rather than actual data-shuffling to retain potentially lazy-behavior of datasets) random_seed : None seed to seed numpy, the splitting functions and the used backend-framework split_type : str must be one of ['random', 'stratified'] if 'random': uses random data splitting if 'stratified': uses stratified data splitting. Stratification will be based on ``label_key`` val_split : float or None the fraction of the train data to use as validation set. If None: No validation will be done during training; only testing for each fold after the training is complete label_key : str the label to use for stratification. Will be ignored unless ``split_type`` is 'stratified'. Default: 'label' train_kwargs : dict or None kwargs to update the behavior of the :class:`DataManager` containing the train data. If None: empty dict will be passed metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation test_kwargs : dict or None kwargs to update the behavior of the :class:`DataManager` containing the test and validation data. If None: empty dict will be passed config : :class:`DeliraConfig` or None the training and model parameters (will be merged with ``self.config``) verbose : bool verbosity **kwargs : additional keyword arguments Returns ------- dict all predictions from all folds dict all metric values from all folds Raises ------ ValueError if ``split_type`` is neither 'random', nor 'stratified' See Also -------- * :class:`sklearn.model_selection.KFold` and :class:`sklearn.model_selection.ShuffleSplit` for random data-splitting * :class:`sklearn.model_selection.StratifiedKFold` and :class:`sklearn.model_selection.StratifiedShuffleSplit` for stratified data-splitting * :meth:`DataManager.update_from_state_dict` for updating the data managers by kwargs * :meth:`BaseExperiment.run` for the training * :meth:`BaseExperiment.test` for the testing Notes ----- using stratified splits may be slow during split-calculation, since each item must be loaded once to obtain the labels necessary for stratification. """ # seed tf backend if random_seed is not None: tf.set_random_seed(random_seed) return super().kfold( data=data, metrics=metrics, num_epochs=num_epochs, num_splits=num_splits, shuffle=shuffle, random_seed=random_seed, split_type=split_type, val_split=val_split, label_key=label_key, train_kwargs=train_kwargs, test_kwargs=test_kwargs, metric_keys=metric_keys, config=config, verbose=verbose, **kwargs) def test(self, network, test_data: DataManager, metrics: dict, metric_keys=None, verbose=False, prepare_batch=lambda x: x, convert_fn=None, **kwargs): """ Setup and run testing on a given network Parameters ---------- network : :class:`AbstractNetwork` the (trained) network to test test_data : :class:`DataManager` the data to use for testing metrics : dict the metrics to calculate metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation verbose : bool verbosity of the test process prepare_batch : function function to convert a batch-dict to a format accepted by the model. This conversion typically includes dtype-conversion, reshaping, wrapping to backend-specific tensors and pushing to correct devices. If not further specified uses the ``network``'s ``prepare_batch`` with CPU devices convert_fn : function function to convert a batch of tensors to numpy if not specified defaults to :func:`convert_torch_tensor_to_npy` **kwargs : additional keyword arguments Returns ------- dict all predictions obtained by feeding the ``test_data`` through the ``network`` dict all metrics calculated upon the ``test_data`` and the obtained predictions """ # specify convert_fn to correct backend function if convert_fn is None: convert_fn = convert_to_numpy if prepare_batch is None: prepare_batch = partial( network.prepare_batch, input_device="/cpu:0", output_device="/cpu:0") return super().test(network=network, test_data=test_data, metrics=metrics, metric_keys=metric_keys, verbose=verbose, prepare_batch=prepare_batch, convert_fn=convert_fn, **kwargs) def setup(self, config, training=True, **kwargs): """ Defines the setup behavior (model, trainer etc.) for training and testing case Parameters ---------- config : :class:`DeliraConfig` the parameters to use for setup training : bool whether to setup for training case or for testing case **kwargs : additional keyword arguments Returns ------- :class:`BaseNetworkTrainer` the created trainer (if ``training=True``) :class:`Predictor` the created predictor (if ``training=False``) See Also -------- * :meth:`BaseExperiment._setup_training` for training setup * :meth:`BaseExperiment._setup_test` for test setup """ tf.reset_default_graph() return super().setup(config=config, training=training, **kwargs) ================================================ FILE: delira/training/backends/tf_eager/trainer.py ================================================ from delira.training.backends.tf_eager.utils import create_optims_default from delira.training.backends.tf_eager.utils import convert_to_numpy from delira.training.base_trainer import BaseNetworkTrainer from delira.io.tf import save_checkpoint_eager, load_checkpoint_eager from delira.models.backends.tf_eager import AbstractTfEagerNetwork, \ DataParallelTfEagerNetwork from delira.training.callbacks.logging_callback import DefaultLoggingCallback import logging import os from functools import partial import tensorflow as tf logger = logging.getLogger(__name__) class TfEagerNetworkTrainer(BaseNetworkTrainer): def __init__(self, network: AbstractTfEagerNetwork, save_path: str, key_mapping: dict, losses: dict, optimizer_cls, optimizer_params=None, metrics=None, lr_scheduler_cls=None, lr_scheduler_params=None, gpu_ids=None, save_freq=1, optim_fn=create_optims_default, logging_type="tensorboardx", logging_kwargs=None, logging_callback_cls=DefaultLoggingCallback, logging_frequencies=None, logging_reduce_types=None, fold=0, callbacks=None, start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_to_numpy, val_freq=1, **kwargs): """ Parameters ---------- network : :class:`AbstractTfEagerNetwork` the network to train save_path : str path to save networks to key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` losses : dict dictionary containing the training losses optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict keyword arguments passed to optimizer during construction metrics : dict, optional metrics, which will be evaluated during train and validation phase (should work on numpy arrays) lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs optim_fn : function creates a dictionary containing all necessary optimizers logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. logging_reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict dict specifying which batch_dict entry to use for which metric as target; default: None, which will result in key "label" for all metrics convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is the identity function val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : Additional keyword arguments """ # prevent mutable default arguments if logging_kwargs is None: logging_kwargs = {} if callbacks is None: callbacks = [] if gpu_ids is None: gpu_ids = [] if lr_scheduler_params is None: lr_scheduler_params = {} if metrics is None: metrics = {} if optimizer_params is None: optimizer_params = {} # check if eager execution is enabled assert tf.executing_eagerly() super().__init__(network=network, save_path=save_path, losses=losses, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, metrics=metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, gpu_ids=gpu_ids, save_freq=save_freq, optim_fn=optim_fn, key_mapping=key_mapping, logging_type=logging_type, logging_kwargs=logging_kwargs, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, val_freq=val_freq, logging_callback_cls=logging_callback_cls, logging_frequencies=logging_frequencies, logging_reduce_types=logging_reduce_types, **kwargs ) self._setup(network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, key_mapping, convert_batch_to_npy_fn, gpu_ids, callbacks) for key, val in kwargs.items(): setattr(self, key, val) def _setup(self, network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, key_mapping, convert_batch_to_npy_fn, gpu_ids, callbacks): """ Defines the Trainers Setup Parameters ---------- network : instance of :class: `AbstractTfNetwork` the network to train optim_fn : function creates a dictionary containing all necessary optimizers optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is the identity function gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead callbacks : list initial callbacks to register Raises ------ RuntimeError if multiple GPU ids passed """ if gpu_ids and tf.test.is_gpu_available(): self.use_gpu = True if len(gpu_ids) > 1: raise RuntimeError("Multiple GPUs not yet supported") # logger.warning( # "multi-GPU training not yet tested!") # network = DataParallelTfEagerNetwork(network, gpu_ids) # # self.input_device = "/cpu:0" # self.output_device = "/cpu:0" else: self.input_device = "/gpu:%d" % gpu_ids[0] self.output_device = "/gpu:%d" % gpu_ids[0] else: self.use_gpu = False self.input_device = "/cpu:0" self.output_device = "/cpu:0" self.optimizers = optim_fn(optimizer_cls, **optimizer_params) super()._setup(network, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, network.prepare_batch, callbacks) self._prepare_batch = partial(self._prepare_batch, input_device=self.input_device, output_device=self.output_device) # Load latest epoch file if available if os.path.isdir(self.save_path): # check all files in directory starting with "checkpoint" and # not ending with "_best.meta" latest_state_path, latest_epoch = self._search_for_prev_state( self.save_path ) if latest_state_path is not None: logger.info("Attempting to load state from previous \ training from %s" % latest_state_path) self.update_state(latest_state_path) self.start_epoch = latest_epoch def _at_training_end(self, *args, **kwargs): """ Defines Behaviour at end of training: Loads best model if available Returns ------- :class:`AbstractTfNetwork` best network """ if os.path.isfile(os.path.join(self.save_path, 'checkpoint_best.meta')): # load best model and return it. self.update_state(os.path.join(self.save_path, 'checkpoint_best') ) return super()._at_training_end(*args, **kwargs) def _train_single_epoch(self, batchgen, epoch, verbose=False): """ Trains the network a single epoch Parameters ---------- batchgen : MultiThreadedAugmenter Generator yielding the training batches epoch : int current epoch """ self.module.trainable = True return super()._train_single_epoch(batchgen, epoch, verbose=verbose) def predict_data_mgr(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False **kwargs : additional keyword arguments """ if metrics is None: metrics = {} self.module.trainable = False return super().predict_data_mgr(datamgr, batchsize, metrics, metric_keys, verbose=verbose, **kwargs) def save_state(self, file_name, *args, **kwargs): """ saves the current state via :func:`delira.io.tf.save_checkpoint_eager` Parameters ---------- file_name : str filename to save the state to """ save_checkpoint_eager(file_name, self.module, self.optimizers, *args, **kwargs) def load_state(self, file_name, *args, **kwargs): """ Loads the new state from file via :func:`delira.io.tf.load_checkpoint_eager` Parameters ---------- file_name : str the file to load the state from Returns ------- """ return load_checkpoint_eager( file_name, self.module, self.optimizers) @staticmethod def _search_for_prev_state(path, extensions=None): """ Helper function to search in a given path for previous epoch states (indicated by extensions) Parameters ---------- path : str the path to search in extensions : list list of strings containing valid file extensions for checkpoint files Returns ------- str the file containing the latest checkpoint (if available) None if no latst checkpoint was found int the latest epoch (1 if no checkpoint was found) """ if extensions is None: extensions = [".meta"] return BaseNetworkTrainer._search_for_prev_state(path, extensions) ================================================ FILE: delira/training/backends/tf_eager/utils.py ================================================ import tensorflow as tf from delira.training.utils import convert_to_numpy_identity, \ recursively_convert_elements def _single_element_tensor_conversion(element): return element.numpy() def convert_to_numpy(*args, **kwargs): """ Converts all tf tensors in args and kwargs to numpy array Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- list converted positional arguments dict converted keyboard arguments """ args = recursively_convert_elements(args, tf.Tensor, _single_element_tensor_conversion) kwargs = recursively_convert_elements(kwargs, tf.Tensor, _single_element_tensor_conversion) return convert_to_numpy_identity(*args, **kwargs) def create_optims_default(optim_cls, **optim_params): """ Function to create a optimizer dictionary (in this case only one optimizer) Parameters ---------- optim_cls : Class implementing an optimization algorithm **optim_params : Additional keyword arguments (passed to the optimizer class) Returns ------- dict dictionary containing all created optimizers """ return {"default": optim_cls(**optim_params)} ================================================ FILE: delira/training/backends/tf_graph/__init__.py ================================================ from delira import get_backends as _get_backends if "TF" in _get_backends(): from delira.training.backends.tf_graph.experiment import TfGraphExperiment from delira.training.backends.tf_graph.trainer import TfGraphNetworkTrainer from delira.training.backends.tf_graph.utils import \ initialize_uninitialized ================================================ FILE: delira/training/backends/tf_graph/experiment.py ================================================ import typing from functools import partial import tensorflow as tf from delira.models.backends.tf_graph import AbstractTfGraphNetwork from delira.data_loading import DataManager from delira.utils import DeliraConfig from delira.training.backends.tf_eager.experiment import TfEagerExperiment from delira.training.backends.tf_eager.utils import create_optims_default from delira.training.backends.tf_graph.trainer import TfGraphNetworkTrainer from delira.training.backends.tf_graph.utils import initialize_uninitialized class TfGraphExperiment(TfEagerExperiment): def __init__(self, config: typing.Union[str, DeliraConfig], model_cls: AbstractTfGraphNetwork, n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, optim_builder=create_optims_default, checkpoint_freq=1, trainer_cls=TfGraphNetworkTrainer, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` or str the training config, if string is passed, it is treated as a path to a file, where the config is loaded from model_cls : Subclass of :class:`AbstractTfEagerNetwork` the class implementing the model to train n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API), if no keymapping is given, a default key_mapping of {"data": "data"} will be used here val_score_key : str or None key defining which metric to use for validation (determining best model and scheduling lr); if None: No validation-based operations will be done (model might still get validated, but validation metrics can only be logged and not used further) optim_builder : function Function returning a dict of backend-specific optimizers. defaults to :func:`create_optims_default_tf` checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`TfEagerNetworkTrainer` the trainer class to use for training the model, defaults to :class:`TfEagerNetworkTrainer` **kwargs : additional keyword arguments """ if key_mapping is None: key_mapping = {"data": "data"} super().__init__( config=config, model_cls=model_cls, n_epochs=n_epochs, name=name, save_path=save_path, key_mapping=key_mapping, val_score_key=val_score_key, optim_builder=optim_builder, checkpoint_freq=checkpoint_freq, trainer_cls=trainer_cls, **kwargs) def test(self, network, test_data: DataManager, metrics: dict, metric_keys=None, verbose=False, prepare_batch=lambda x: x, convert_fn=None, **kwargs): """ Setup and run testing on a given network Parameters ---------- network : :class:`AbstractNetwork` the (trained) network to test test_data : :class:`DataManager` the data to use for testing metrics : dict the metrics to calculate metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation verbose : bool verbosity of the test process prepare_batch : function function to convert a batch-dict to a format accepted by the model. This conversion typically includes dtype-conversion, reshaping, wrapping to backend-specific tensors and pushing to correct devices. If not further specified uses the ``network``'s ``prepare_batch`` with CPU devices convert_fn : function function to convert a batch of tensors to numpy if not specified defaults to :func:`convert_torch_tensor_to_npy` **kwargs : additional keyword arguments Returns ------- dict all predictions obtained by feeding the ``test_data`` through the ``network`` dict all metrics calculated upon the ``test_data`` and the obtained predictions """ initialize_uninitialized(network._sess) if prepare_batch is None: prepare_batch = partial(network.prepare_batch, input_device=None, output_device=None) return super().test(network=network, test_data=test_data, metrics=metrics, metric_keys=metric_keys, verbose=verbose, prepare_batch=prepare_batch, convert_fn=convert_fn, **kwargs) ================================================ FILE: delira/training/backends/tf_graph/trainer.py ================================================ from delira.training.backends.tf_graph.utils import initialize_uninitialized from delira.training.backends.tf_eager.utils import create_optims_default from delira.training.utils import convert_to_numpy_identity \ as convert_to_numpy from delira.training.base_trainer import BaseNetworkTrainer from delira.training.callbacks.logging_callback import DefaultLoggingCallback from delira.io.tf import load_checkpoint, save_checkpoint from delira.models.backends.tf_graph import AbstractTfGraphNetwork from delira.data_loading import DataManager import os import logging from tensorflow import executing_eagerly from batchgenerators.dataloading import MultiThreadedAugmenter logger = logging.getLogger(__name__) class TfGraphNetworkTrainer(BaseNetworkTrainer): """ Train and Validate a Network See Also -------- :class:`AbstractNetwork` """ def __init__(self, network: AbstractTfGraphNetwork, save_path: str, key_mapping: dict, losses: dict, optimizer_cls, optimizer_params=None, metrics=None, lr_scheduler_cls=None, lr_scheduler_params=None, gpu_ids=None, save_freq=1, optim_fn=create_optims_default, logging_type="tensorboardx", logging_kwargs=None, logging_callback_cls=DefaultLoggingCallback, logging_frequencies=None, logging_reduce_types=None, fold=0, callbacks=None, start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_to_numpy, val_freq=1, **kwargs ): """ Parameters ---------- network : :class:`AbstractTfGraphNetwork` the network to train save_path : str path to save networks to key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` losses : dict dictionary containing the training losses optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict keyword arguments passed to optimizer during construction metrics : dict, optional metrics, which will be evaluated during train and validation phase (should work on numpy arrays) lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs optim_fn : function creates a dictionary containing all necessary optimizers logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. logging_reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' 'median' | 'max' | 'min'. fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict dict specifying which batch_dict entry to use for which metric as target; default: None, which will result in key "label" for all metrics convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is the identity function val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : Additional keyword arguments """ assert not executing_eagerly() if optimizer_params is None: optimizer_params = {} if metrics is None: metrics = {} if lr_scheduler_params is None: lr_scheduler_params = {} if gpu_ids is None: gpu_ids = [] if logging_kwargs is None: logging_kwargs = {} if callbacks is None: callbacks = [] super().__init__(network=network, save_path=save_path, losses=losses, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, metrics=metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, gpu_ids=gpu_ids, save_freq=save_freq, optim_fn=optim_fn, key_mapping=key_mapping, logging_type=logging_type, logging_kwargs=logging_kwargs, logging_callback_cls=logging_callback_cls, logging_frequencies=logging_frequencies, logging_reduce_types=logging_reduce_types, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, val_freq=val_freq, **kwargs ) self._setup(network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, key_mapping, convert_batch_to_npy_fn, gpu_ids, callbacks) for key, val in kwargs.items(): setattr(self, key, val) def _setup(self, network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, key_mapping, convert_batch_to_npy_fn, gpu_ids, callbacks): """ Defines the Trainers Setup Parameters ---------- network : instance of :class: `AbstractTfNetwork` the network to train optim_fn : function creates a dictionary containing all necessary optimizers optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is the identity function gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead callbacks : list initial callbacks to register Raises ------ RuntimeError if multiple GPU ids passed """ # TODO: implement multi-GPU and single GPU training with help of # keras multi-gpu model # note: might be bugged in combination with sess.run # https://github.com/tensorflow/tensorflow/issues/21788 # if gpu_ids and tf.test.is_gpu_available(): # assert len(gpu_ids) <= len(get_available_gpus()), "more GPUs # specified than available" # self.use_gpu = True # if len(gpu_ids) > 1: # logger.warning( # "multi-GPU training not yet tested!") # # network.model = tf.keras.utils.multi_gpu_model( # network.model, # len(gpu_ids), # cpu_merge=True, # cpu_relocation=False) # else: # network.models = tf.keras.models.clone_model(network.model) # else: # self.use_gpu = False # if len(gpu_ids) > 1: raise RuntimeError("Multiple GPUs not yet supported") self.optimizers = optim_fn(optimizer_cls, **optimizer_params) super()._setup(network, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, lambda x: x, callbacks) self.use_gpu = True self.module._add_losses(self.losses) self.module._add_optims(self.optimizers) # check for unitialized variables initialize_uninitialized(self.module._sess) # Load latest epoch file if available if os.path.isdir(self.save_path): latest_state_path, latest_epoch = self._search_for_prev_state( self.save_path) if latest_state_path is not None: # if pth file does not exist, load pt file instead if not os.path.isfile(latest_state_path): latest_state_path = latest_state_path[:-1] logger.info("Attempting to load state from previous \ training from %s" % latest_state_path) self.update_state(latest_state_path) self.start_epoch = latest_epoch def _at_training_end(self, *args, **kwargs): """ Defines Behaviour at end of training: Loads best model if available Returns ------- :class:`AbstractTfNetwork` best network """ if os.path.isfile(os.path.join(self.save_path, 'checkpoint_best.meta')): # load best model and return it. self.update_state(os.path.join(self.save_path, 'checkpoint_best') ) return super()._at_training_end(*args, **kwargs) def _train_single_epoch(self, dmgr_train: DataManager, epoch, verbose=False): """ Trains the network a single epoch Parameters ---------- dmgr_train : :class:`DataManager` Datamanager to create the data generator epoch : int current epoch """ self.module.training = True return super()._train_single_epoch(dmgr_train, epoch, verbose=verbose) def predict_data_mgr(self, datamgr, batch_size=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batch_size : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False **kwargs : additional keyword arguments """ if metrics is None: metrics = {} self.module.training = False return super().predict_data_mgr(datamgr, batch_size, metrics, metric_keys, verbose=verbose) def save_state(self, file_name, *args, **kwargs): """ saves the current state via :func:`delira.io.tf.save_checkpoint` Parameters ---------- file_name : str filename to save the state to """ save_checkpoint(file_name, self.module) def load_state(self, file_name, *args, **kwargs): """ Loads the new state from file via :func:`delira.io.tf.load_checkpoint` Parameters ---------- file_name : str the file to load the state from Returns ------- """ return load_checkpoint(file_name, self.module) @staticmethod def _search_for_prev_state(path, extensions=None): """ Helper function to search in a given path for previous epoch states (indicated by extensions) Parameters ---------- path : str the path to search in extensions : list list of strings containing valid file extensions for checkpoint files Returns ------- str the file containing the latest checkpoint (if available) None if no latest checkpoint was found int the latest epoch (1 if no checkpoint was found) """ if extensions is None: extensions = [".meta"] return BaseNetworkTrainer._search_for_prev_state(path, extensions) ================================================ FILE: delira/training/backends/tf_graph/utils.py ================================================ import tensorflow as tf def initialize_uninitialized(sess): """ Function to initialize only uninitialized variables in a session graph Parameters ---------- sess : tf.Session() """ global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [v for (v, f) in zip( global_vars, is_not_initialized) if not f] if not_initialized_vars: sess.run(tf.variables_initializer(not_initialized_vars)) ================================================ FILE: delira/training/backends/torch/__init__.py ================================================ from delira import get_backends as _get_backends if "TORCH" in _get_backends(): from delira.training.backends.torch.trainer import PyTorchNetworkTrainer from delira.training.backends.torch.experiment import PyTorchExperiment from delira.training.backends.torch.utils import create_optims_default \ as create_pytorch_optims_default from delira.training.backends.torch.utils import convert_to_numpy \ as convert_torch_to_numpy ================================================ FILE: delira/training/backends/torch/experiment.py ================================================ from functools import partial import typing import torch from delira.models.backends.torch import AbstractPyTorchNetwork from delira.data_loading import DataManager from delira.training.base_experiment import BaseExperiment from delira.utils import DeliraConfig from delira.training.backends.torch.trainer import PyTorchNetworkTrainer from delira.training.backends.torch.utils import create_optims_default from delira.training.backends.torch.utils import convert_to_numpy class PyTorchExperiment(BaseExperiment): def __init__(self, config: typing.Union[str, DeliraConfig], model_cls: AbstractPyTorchNetwork, n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, optim_builder=create_optims_default, checkpoint_freq=1, trainer_cls=PyTorchNetworkTrainer, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` or str the training config, if string is passed, it is treated as a path to a file, where the config is loaded from model_cls : Subclass of :class:`AbstractPyTorchNetwork` the class implementing the model to train n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API), if no keymapping is given, a default key_mapping of {"x": "data"} will be used here val_score_key : str or None key defining which metric to use for validation (determining best model and scheduling lr); if None: No validation-based operations will be done (model might still get validated, but validation metrics can only be logged and not used further) optim_builder : function Function returning a dict of backend-specific optimizers. defaults to :func:`create_optims_default_pytorch` checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`PyTorchNetworkTrainer` the trainer class to use for training the model, defaults to :class:`PyTorchNetworkTrainer` **kwargs : additional keyword arguments """ if key_mapping is None: key_mapping = {"x": "data"} super().__init__(config=config, model_cls=model_cls, n_epochs=n_epochs, name=name, save_path=save_path, key_mapping=key_mapping, val_score_key=val_score_key, optim_builder=optim_builder, checkpoint_freq=checkpoint_freq, trainer_cls=trainer_cls, **kwargs) def kfold(self, data: DataManager, metrics: dict, num_epochs=None, num_splits=None, shuffle=False, random_seed=None, split_type="random", val_split=0.2, label_key="label", train_kwargs: dict = None, test_kwargs: dict = None, metric_keys: dict = None, config=None, verbose=False, **kwargs): """ Performs a k-Fold cross-validation Parameters ---------- data : :class:`DataManager` the data to use for training(, validation) and testing. Will be split based on ``split_type`` and ``val_split`` metrics : dict dictionary containing the metrics to evaluate during k-fold num_epochs : int or None number of epochs to train (if not given, will either be extracted from ``config``, ``self.config`` or ``self.n_epochs``) num_splits : int or None the number of splits to extract from ``data``. If None: uses a default of 10 shuffle : bool whether to shuffle the data before splitting or not (implemented by index-shuffling rather than actual data-shuffling to retain potentially lazy-behavior of datasets) random_seed : None seed to seed numpy, the splitting functions and the used backend-framework split_type : str must be one of ['random', 'stratified'] if 'random': uses random data splitting if 'stratified': uses stratified data splitting. Stratification will be based on ``label_key`` val_split : float or None the fraction of the train data to use as validation set. If None: No validation will be done during training; only testing for each fold after the training is complete label_key : str the label to use for stratification. Will be ignored unless ``split_type`` is 'stratified'. Default: 'label' train_kwargs : dict or None kwargs to update the behavior of the :class:`DataManager` containing the train data. If None: empty dict will be passed metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation test_kwargs : dict or None kwargs to update the behavior of the :class:`DataManager` containing the test and validation data. If None: empty dict will be passed config : :class:`Parameters`or None the training and model parameters (will be merged with ``self.config``) verbose : bool verbosity **kwargs : additional keyword arguments Returns ------- dict all predictions from all folds dict all metric values from all folds Raises ------ ValueError if ``split_type`` is neither 'random', nor 'stratified' See Also -------- * :class:`sklearn.model_selection.KFold` and :class:`sklearn.model_selection.ShuffleSplit` for random data-splitting * :class:`sklearn.model_selection.StratifiedKFold` and :class:`sklearn.model_selection.StratifiedShuffleSplit` for stratified data-splitting * :meth:`DataManager.update_from_state_dict` for updating the data managers by kwargs * :meth:`BaseExperiment.run` for the training * :meth:`BaseExperiment.test` for the testing Notes ----- using stratified splits may be slow during split-calculation, since each item must be loaded once to obtain the labels necessary for stratification. """ # seed torch backend if random_seed is not None: torch.manual_seed(random_seed) return super().kfold( data=data, metrics=metrics, num_epochs=num_epochs, num_splits=num_splits, shuffle=shuffle, random_seed=random_seed, split_type=split_type, val_split=val_split, label_key=label_key, train_kwargs=train_kwargs, test_kwargs=test_kwargs, metric_keys=metric_keys, config=config, verbose=verbose, **kwargs) def test(self, network, test_data: DataManager, metrics: dict, metric_keys=None, verbose=False, prepare_batch=None, convert_fn=None, **kwargs): """ Setup and run testing on a given network Parameters ---------- network : :class:`AbstractNetwork` the (trained) network to test test_data : :class:`DataManager` the data to use for testing metrics : dict the metrics to calculate metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation verbose : bool verbosity of the test process prepare_batch : function function to convert a batch-dict to a format accepted by the model. This conversion typically includes dtype-conversion, reshaping, wrapping to backend-specific tensors and pushing to correct devices. If not further specified uses the ``network``'s ``prepare_batch`` with CPU devices convert_fn : function function to convert a batch of tensors to numpy if not specified defaults to :func:`convert_torch_tensor_to_npy` **kwargs : additional keyword arguments Returns ------- dict all predictions obtained by feeding the ``test_data`` through the ``network`` dict all metrics calculated upon the ``test_data`` and the obtained predictions """ # use backend-specific and model-specific prepare_batch fn # (runs on same device as passed network per default) device = next(network.parameters()).device if prepare_batch is None: prepare_batch = partial(network.prepare_batch, input_device=device, output_device=device) # switch to backend-specific convert function if convert_fn is None: convert_fn = convert_to_numpy return super().test(network=network, test_data=test_data, metrics=metrics, metric_keys=metric_keys, verbose=verbose, prepare_batch=prepare_batch, convert_fn=convert_fn, **kwargs) ================================================ FILE: delira/training/backends/torch/trainer.py ================================================ import logging import os from functools import partial import warnings import torch from batchgenerators.dataloading import MultiThreadedAugmenter from delira.io.torch import load_checkpoint_torch, save_checkpoint_torch from delira.models.backends.torch import AbstractPyTorchNetwork, \ DataParallelPyTorchNetwork from delira.training.base_trainer import BaseNetworkTrainer from delira.training.backends.torch.utils import create_optims_default from delira.training.backends.torch.utils import convert_to_numpy from delira.training.callbacks.logging_callback import DefaultLoggingCallback logger = logging.getLogger(__name__) class PyTorchNetworkTrainer(BaseNetworkTrainer): """ Train and Validate a Network See Also -------- :class:`AbstractNetwork` """ def __init__(self, network: AbstractPyTorchNetwork, save_path: str, key_mapping, losses=None, optimizer_cls=None, optimizer_params=None, metrics=None, lr_scheduler_cls=None, lr_scheduler_params=None, gpu_ids=None, save_freq=1, optim_fn=create_optims_default, logging_type="tensorboardx", logging_kwargs=None, logging_callback_cls=DefaultLoggingCallback, logging_frequencies=None, logging_reduce_types=None, fold=0, callbacks=None, start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_to_numpy, mixed_precision=False, mixed_precision_kwargs={"opt_level": "O1", "cast_model_type": None, "patch_torch_functions": None, "master_weights": None, "loss_scale": None, "cast_model_outputs": None, "num_losses": 1, "verbosity": 1}, val_freq=1, ** kwargs): """ Parameters ---------- network : :class:`AbstractPyTorchNetwork` the network to train save_path : str path to save networks to key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` losses : dict dictionary containing the training losses optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict keyword arguments passed to optimizer during construction metrics : dict, optional metrics, which will be evaluated during train and validation phase (should work on numpy arrays) lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs optim_fn : function creates a dictionary containing all necessary optimizers logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. logging_reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict dict specifying which batch_dict entry to use for which metric as target; default: None, which will result in key "label" for all metrics convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is a function, which detaches the tensor, moves it to cpu and then calls ``.numpy()`` on it mixed_precision : bool whether to use mixed precision or not (False per default) mixed_precision_kwargs : dict additional keyword arguments for mixed precision from apex.amp.frontend: opt_level : str Pure or mixed precision optimization level. Accepted values are "O0", "O1", "O2", and "O3": O0: Pure FP32 training. O1: Insert automatic casts around Pytorch functions and Tensor methods. O2: FP16 training with FP32 batchnorm and FP32 master weights O3: Pure FP16 training. cast_model_type : :class:`torch.dtype` Optional property override for model dtype; default: None patch_torch_functions : bool Optional property override. keep_batchnorm_fp32 : bool or str Optional property override. If passed as a string, must be the string "True" or "False". master_weights : bool Optional property override; whether to create master weights or not loss_scale : float or str Optional property override. If passed as a string, must be a string representing a number, e.g., "128.0", or the string "dynamic". cast_model_outputs : :class:`torch.dtype` Option to ensure that the outputs of your model(s) are always cast to a particular type regardless of ``opt_level``. num_losses : int Option to tell Amp in advance how many losses/backward passes you plan to use. When used in conjunction with the ``loss_id`` argument to ``amp.scale_loss``, enables Amp to use a different loss scale per loss/backward pass, which can improve stability. See "Multiple models/optimizers/losses" under "Advanced Amp Usage" for examples. If ``num_losses`` is left to 1, Amp will still support multiple losses/backward passes, but use a single global loss scale for all of them; default: 1 verbosity : int Set to 0 to suppress Amp-related output; default: 1 val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : additional keyword arguments """ if callbacks is None: callbacks = [] if logging_kwargs is None: logging_kwargs = {} if gpu_ids is None: gpu_ids = [] if lr_scheduler_params is None: lr_scheduler_params = {} if metrics is None: metrics = {} if optimizer_params is None: optimizer_params = {} super().__init__(network=network, save_path=save_path, losses=losses, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, metrics=metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, gpu_ids=gpu_ids, save_freq=save_freq, optim_fn=optim_fn, key_mapping=key_mapping, logging_type=logging_type, logging_kwargs=logging_kwargs, logging_callback_cls=logging_callback_cls, logging_frequencies=logging_frequencies, logging_reduce_types=logging_reduce_types, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, val_freq=val_freq, **kwargs ) self._setup(network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, mixed_precision, mixed_precision_kwargs, callbacks) for key, val in kwargs.items(): setattr(self, key, val) def _setup(self, network, optim_fn, optimizer_cls, optimizer_params, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, mixed_precision, mixed_precision_kwargs, callbacks): """ Defines the Trainers Setup Parameters ---------- network : :class:`AbstractPyTorchNetwork` the network to train optim_fn : function creates a dictionary containing all necessary optimizers optimizer_cls : subclass of torch.optim.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead convert_batch_to_npy_fn : type function converting a batch-tensor to numpy mixed_precision : bool whether to use mixed precision or not (False per default) mixed_precision_kwargs : dict additional keyword arguments for mixed precision callbacks : list initial callbacks to register """ self.optimizers = optim_fn(network, optimizer_cls, **optimizer_params) super()._setup(network, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, network.prepare_batch, callbacks) # Load latest epoch file if available if os.path.isdir(self.save_path): latest_state_path, latest_epoch = self._search_for_prev_state( self.save_path) if latest_state_path is not None: # if pth file does not exist, load pt file instead if not os.path.isfile(latest_state_path): latest_state_path = latest_state_path[:-1] logger.info("Attempting to load state from previous \ training from %s" % latest_state_path) try: self.update_state(latest_state_path) except KeyError: logger.warning("Previous State could not be loaded, \ although it exists.Training will be \ restarted") self.start_epoch = latest_epoch if gpu_ids and torch.cuda.is_available(): self.use_gpu = True if (len(gpu_ids) > 1) and (torch.cuda.device_count() > 1): # use GPU 0 as default input GPU self.input_device = torch.device("cuda:%d" % gpu_ids[0]) # Train on multiple GPUs and use GPU 0 as output device self.module = DataParallelPyTorchNetwork(self.module.to( self.input_device), device_ids=gpu_ids, output_device=gpu_ids[1]) # use GPU 1 as default output GPU for balanced GPU usage self.output_device = torch.device("cuda:%d" % gpu_ids[1]) else: # use the only available GPU as input device self.input_device = torch.device("cuda:%d" % gpu_ids[0]) self.module = self.module.to(self.input_device) # use GPU 0 as output device as output device self.output_device = torch.device("cuda:%d" % gpu_ids[0]) else: self.use_gpu = False self.input_device = torch.device("cpu") self.output_device = torch.device("cpu") self.module = self.module.to(self.input_device) self._prepare_batch = partial( self._prepare_batch, input_device=self.input_device, output_device=self.output_device) try: # use apex for mixed precision if installed from apex import amp # extract optimizers and corresponding keys # (in case dict is not ordered) _optim_keys = list(self.optimizers.keys()) _optims = list(self.optimizers[k] for k in _optim_keys) # wrap model and register optimizers for mixed precision self.module, _optims = amp.initialize(self.module, _optims, mixed_precision, **mixed_precision_kwargs) for k, v in zip(_optim_keys, _optims): self.optimizers[k] = v except (ImportError, RuntimeError) as e: warnings.warn( "Either APEX can't be imported correctly or a value " "missmatch occured. Switching to default FP32 " "training insted. The following Exception occured:" "\n%s" % str(e)) def _at_training_begin(self, *args, **kwargs): """ Defines the behaviour at beginnig of the training Parameters ---------- *args : positional arguments **kwargs : keyword arguments """ for cbck in self._callbacks: self._update_state(cbck.at_training_begin(self, *args, **kwargs)) self.save_state(os.path.join(self.save_path, "checkpoint_epoch_%d" % self.start_epoch), self.start_epoch) def _at_training_end(self, *args, **kwargs): """ Defines Behaviour at end of training: Loads best model if available Returns ------- :class:`AbstractPyTorchNetwork` best network """ if os.path.isfile(os.path.join(self.save_path, 'checkpoint_best.pt')): # load best model and return it self.update_state(os.path.join(self.save_path, 'checkpoint_best.pt')) return super()._at_training_end(*args, **kwargs) def _at_epoch_end(self, metrics_val, val_score_key, epoch, is_best, **kwargs): """ Defines behaviour at beginning of each epoch: Executes all callbacks's `at_epoch_end` method and saves current state if necessary Parameters ---------- metrics_val : dict validation metrics val_score_key : str validation score key epoch : int current epoch num_epochs : int total number of epochs is_best : bool whether current model is best one so far **kwargs : keyword arguments """ for cb in self._callbacks: self._update_state( cb.at_epoch_end( self, val_metrics=metrics_val, val_score_key=val_score_key, curr_epoch=epoch)) if epoch % self.save_freq == 0: self.save_state(os.path.join(self.save_path, "checkpoint_epoch_%d.pt" % epoch), epoch) if is_best: self.save_state(os.path.join(self.save_path, "checkpoint_best.pt"), epoch) def _train_single_epoch(self, batchgen: MultiThreadedAugmenter, epoch, verbose=False): """ Trains the network a single epoch Parameters ---------- batchgen : MultiThreadedAugmenter Generator yielding the training batches epoch : int current epoch """ self.module.train() return super()._train_single_epoch(batchgen, epoch, verbose=verbose) def predict_data_mgr(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False **kwargs : additional keyword arguments Returns ------- dict predictions dict calculated metrics """ self.module.eval() if metrics is None: metrics = {} return super().predict_data_mgr(datamgr, batchsize, metrics, metric_keys, verbose, **kwargs) def save_state(self, file_name, epoch, **kwargs): """ saves the current state via :func:`delira.io.torch.save_checkpoint` Parameters ---------- file_name : str filename to save the state to epoch : int current epoch (will be saved for mapping back) **kwargs : keyword arguments """ if not (file_name.endswith(".pth") or file_name.endswith(".pt")): file_name = file_name + ".pt" save_checkpoint_torch(file_name, self.module, self.optimizers, epoch, **kwargs) @staticmethod def load_state(file_name, **kwargs): """ Loads the new state from file via :func:`delira.io.torch.load_checkpoint` Parameters ---------- file_name : str the file to load the state from **kwargs : keyword arguments Returns ------- dict new state """ if not (file_name.endswith(".pth") or file_name.endswith(".pt")): file_name = file_name + ".pt" return load_checkpoint_torch(file_name, **kwargs) def _update_state(self, new_state): """ Update the state from a given new state Parameters ---------- new_state : dict new state to update internal state from Returns ------- :class:`PyTorchNetworkTrainer` the trainer with a modified state """ if "model" in new_state: self.module.load_state_dict(new_state.pop("model")) if "optimizer" in new_state and new_state["optimizer"]: optim_state = new_state.pop("optimizer") for key in self.optimizers.keys(): self.optimizers[key].load_state_dict( optim_state[key]) if "epoch" in new_state: self.start_epoch = new_state.pop("epoch") return super()._update_state(new_state) @staticmethod def _search_for_prev_state(path, extensions=None): """ Helper function to search in a given path for previous epoch states (indicated by extensions) Parameters ---------- path : str the path to search in extensions : list list of strings containing valid file extensions for checkpoint files Returns ------- str the file containing the latest checkpoint (if available) None if no latst checkpoint was found int the latest epoch (1 if no checkpoint was found) """ if extensions is None: extensions = [".pt", ".pth"] return BaseNetworkTrainer._search_for_prev_state(path, extensions) ================================================ FILE: delira/training/backends/torch/utils.py ================================================ import torch from delira.utils.decorators import dtype_func from delira.training.utils import convert_to_numpy_identity from delira.training.utils import recursively_convert_elements @dtype_func(torch.nn.Module) def create_optims_default(model, optim_cls, **optim_params): """ Function to create a optimizer dictionary (in this case only one optimizer for the whole network) Parameters ---------- model : :class:`AbstractPyTorchNetwork` model whose parameters should be updated by the optimizer optim_cls : Class implementing an optimization algorithm **optim_params : Additional keyword arguments (passed to the optimizer class Returns ------- dict dictionary containing all created optimizers """ return {"default": optim_cls(model.parameters(), **optim_params)} def _single_element_tensor_conversion(element): return element.cpu().detach().numpy() def convert_to_numpy(*args, **kwargs): """ Converts all :class:`torch.Tensor` in args and kwargs to numpy array Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- list converted positional arguments dict converted keyboard arguments """ args = recursively_convert_elements(args, torch.Tensor, _single_element_tensor_conversion) kwargs = recursively_convert_elements(kwargs, torch.Tensor, _single_element_tensor_conversion) return convert_to_numpy_identity(*args, **kwargs) ================================================ FILE: delira/training/backends/torchscript/__init__.py ================================================ from delira import get_backends as _get_backends if "TORCH" in _get_backends(): from delira.training.backends.torchscript.experiment import \ TorchScriptExperiment from delira.training.backends.torchscript.trainer import \ TorchScriptNetworkTrainer ================================================ FILE: delira/training/backends/torchscript/experiment.py ================================================ import typing from delira.models.backends.torchscript import AbstractTorchScriptNetwork from delira.utils import DeliraConfig from delira.training.backends.torch.experiment import PyTorchExperiment from delira.training.backends.torch.utils import create_optims_default from delira.training.backends.torchscript.trainer import \ TorchScriptNetworkTrainer class TorchScriptExperiment(PyTorchExperiment): def __init__(self, config: typing.Union[str, DeliraConfig], model_cls: AbstractTorchScriptNetwork, n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, optim_builder=create_optims_default, checkpoint_freq=1, trainer_cls=TorchScriptNetworkTrainer, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` or str the training config, if string is passed, it is treated as a path to a file, where the config is loaded from model_cls : Subclass of :class:`AbstractTorchScriptNetwork` the class implementing the model to train n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API), if no keymapping is given, a default key_mapping of {"x": "data"} will be used here val_score_key : str or None key defining which metric to use for validation (determining best model and scheduling lr); if None: No validation-based operations will be done (model might still get validated, but validation metrics can only be logged and not used further) optim_builder : function Function returning a dict of backend-specific optimizers. defaults to :func:`create_optims_default_pytorch` checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`TorchScriptNetworkTrainer` the trainer class to use for training the model, defaults to :class:`TorchScriptNetworkTrainer` **kwargs : additional keyword arguments """ super().__init__(config=config, model_cls=model_cls, n_epochs=n_epochs, name=name, save_path=save_path, key_mapping=key_mapping, val_score_key=val_score_key, optim_builder=optim_builder, checkpoint_freq=checkpoint_freq, trainer_cls=trainer_cls, **kwargs) ================================================ FILE: delira/training/backends/torchscript/trainer.py ================================================ import logging from delira.io.torch import load_checkpoint_torchscript, \ save_checkpoint_torchscript from delira.models.backends.torchscript import AbstractTorchScriptNetwork from delira.training.base_trainer import BaseNetworkTrainer from delira.training.backends.torch.trainer import PyTorchNetworkTrainer from delira.training.backends.torch.utils import convert_to_numpy from delira.training.backends.torch.utils import create_optims_default from delira.training.callbacks.logging_callback import DefaultLoggingCallback logger = logging.getLogger(__name__) class TorchScriptNetworkTrainer(PyTorchNetworkTrainer): def __init__(self, network: AbstractTorchScriptNetwork, save_path: str, key_mapping, losses=None, optimizer_cls=None, optimizer_params=None, metrics=None, lr_scheduler_cls=None, lr_scheduler_params=None, gpu_ids=None, save_freq=1, optim_fn=create_optims_default, logging_type="tensorboardx", logging_kwargs=None, fold=0, callbacks=None, start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_to_numpy, criterions=None, val_freq=1, logging_callback_cls=DefaultLoggingCallback, logging_frequencies=None, logging_reduce_types=None, **kwargs): """ Parameters ---------- network : :class:`AbstractPyTorchJITNetwork` the network to train save_path : str path to save networks to key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` losses : dict dictionary containing the training losses optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict keyword arguments passed to optimizer during construction metrics : dict, optional metrics, which will be evaluated during train and validation phase (should work on numpy arrays) lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead Currently ``torch.jit`` only supports single GPU-Training, thus only the first GPU will be used if multiple GPUs are passed save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs optim_fn : function creates a dictionary containing all necessary optimizers logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. logging_reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict dict specifying which batch_dict entry to use for which metric as target; default: None, which will result in key "label" for all metrics convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is a function, which detaches the tensor, moves it to cpu and then calls ``.numpy()`` on it mixed_precision : bool whether to use mixed precision or not (False per default) mixed_precision_kwargs : dict additional keyword arguments for mixed precision val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : additional keyword arguments """ if callbacks is None: callbacks = [] if logging_kwargs is None: logging_kwargs = {} if gpu_ids is None: gpu_ids = [] if lr_scheduler_params is None: lr_scheduler_params = {} if metrics is None: metrics = {} if optimizer_params is None: optimizer_params = {} if len(gpu_ids) > 1: # only use first GPU due to # https://github.com/pytorch/pytorch/issues/15421 gpu_ids = [gpu_ids[0]] logging.warning("Multiple GPUs specified. Torch JIT currently " "supports only single-GPU training. " "Switching to use only the first GPU " "for now...") super().__init__(network=network, save_path=save_path, losses=losses, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, metrics=metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, gpu_ids=gpu_ids, save_freq=save_freq, optim_fn=optim_fn, key_mapping=key_mapping, logging_type=logging_type, logging_kwargs=logging_kwargs, logging_callback_cls=logging_callback_cls, logging_frequencies=logging_frequencies, logging_reduce_types=logging_reduce_types, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, val_freq=val_freq, mixed_precision=False, mixed_precision_kwargs={}, **kwargs ) def save_state(self, file_name, epoch, **kwargs): """ saves the current state via :func:`delira.io.torch.save_checkpoint_jit` Parameters ---------- file_name : str filename to save the state to epoch : int current epoch (will be saved for mapping back) **kwargs : keyword arguments """ if file_name.endswith(".ptj"): file_name = file_name.rsplit(".", 1)[0] save_checkpoint_torchscript(file_name, self.module, self.optimizers, **kwargs) @staticmethod def load_state(file_name, **kwargs): """ Loads the new state from file via :func:`delira.io.torch.load_checkpoint:jit` Parameters ---------- file_name : str the file to load the state from **kwargs : keyword arguments Returns ------- dict new state """ return load_checkpoint_torchscript(file_name, **kwargs) def _update_state(self, new_state): """ Update the state from a given new state Parameters ---------- new_state : dict new state to update internal state from Returns ------- :class:`TorchScriptNetworkTrainer` the trainer with a modified state """ if "model" in new_state: self.module = new_state.pop("model").to(self.input_device) return super()._update_state(new_state) @staticmethod def _search_for_prev_state(path, extensions=None): """ Helper function to search in a given path for previous epoch states (indicated by extensions) Parameters ---------- path : str the path to search in extensions : list list of strings containing valid file extensions for checkpoint files Returns ------- str the file containing the latest checkpoint (if available) None if no latst checkpoint was found int the latest epoch (1 if no checkpoint was found) """ if extensions is None: extensions = [".model.ptj"] return BaseNetworkTrainer._search_for_prev_state(path, extensions) ================================================ FILE: delira/training/base_experiment.py ================================================ import typing import logging import pickle import os from datetime import datetime import warnings import copy import numpy as np from sklearn.model_selection import KFold, StratifiedKFold, \ StratifiedShuffleSplit, ShuffleSplit from delira import get_backends from delira.data_loading import DataManager from delira.models import AbstractNetwork from delira.utils import DeliraConfig from delira.training.base_trainer import BaseNetworkTrainer from delira.training.predictor import Predictor logger = logging.getLogger(__name__) class BaseExperiment(object): """ Baseclass for Experiments. Implements: * Setup-Behavior for Models, Trainers and Predictors (depending on train and test case) * The K-Fold logic (including stratified and random splitting) * Argument Handling """ def __init__(self, config: typing.Union[str, DeliraConfig], model_cls: AbstractNetwork, n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, optim_builder=None, checkpoint_freq=1, trainer_cls=BaseNetworkTrainer, predictor_cls=Predictor, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` or str the training parameters, if string is passed, it is treated as a path to a file, where the config is loaded from model_cls : Subclass of :class:`AbstractNetwork` the class implementing the model to train n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API) val_score_key : str or None key defining which metric to use for validation (determining best model and scheduling lr); if None: No validation-based operations will be done (model might still get validated, but validation metrics can only be logged and not used further) optim_builder : function Function returning a dict of backend-specific optimizers checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`BaseNetworkTrainer` the trainer class to use for training the model predictor_cls : subclass of :class:`Predictor` the predictor class to use for testing the model **kwargs : additional keyword arguments """ # config could also be a file containing config information if isinstance(config, str): config = config.create_from_file(config) if n_epochs is None: n_epochs = config.nested_get("n_epochs", config.nested_get("num_epochs")) self.n_epochs = n_epochs if name is None: name = "UnnamedExperiment" self.name = name if save_path is None: save_path = os.path.abspath(".") self.save_path = os.path.join(save_path, name, str(datetime.now().strftime( "%y-%m-%d_%H-%M-%S"))) if os.path.isdir(self.save_path): logger.warning("Save Path %s already exists") os.makedirs(self.save_path, exist_ok=True) self.trainer_cls = trainer_cls self.predictor_cls = predictor_cls if val_score_key is None: warnings.warn("No 'val_score_key' is given. This disables the " "automatic selection of the best model", UserWarning) self.val_score_key = val_score_key assert key_mapping is not None self.key_mapping = key_mapping self.config = config self.model_cls = model_cls self._optim_builder = optim_builder self.checkpoint_freq = checkpoint_freq self._run = 0 self.kwargs = kwargs def setup(self, config, training=True, **kwargs): """ Defines the setup behavior (model, trainer etc.) for training and testing case Parameters ---------- config : :class:`DeliraConfig` the config to use for setup training : bool whether to setup for training case or for testing case **kwargs : additional keyword arguments Returns ------- :class:`BaseNetworkTrainer` the created trainer (if ``training=True``) :class:`Predictor` the created predictor (if ``training=False``) See Also -------- * :meth:`BaseExperiment._setup_training` for training setup * :meth:`BaseExperiment._setup_test` for test setup """ if training: return self._setup_training(config, **kwargs) return self._setup_test(config, **kwargs) def _setup_training(self, config, **kwargs): """ Handles the setup for training case Parameters ---------- config : :class:`DeliraConfig` the config containing the model and training kwargs **kwargs : additional keyword arguments Returns ------- :class:`BaseNetworkTrainer` the created trainer """ model_kwargs = config.model_params model_kwargs = {**model_kwargs["variable"], **model_kwargs["fixed"]} model = self.model_cls(**model_kwargs) training_params = config.training_params losses = training_params.nested_get("losses") optimizer_cls = training_params.nested_get("optimizer_cls") optimizer_params = training_params.nested_get("optimizer_params") train_metrics = training_params.nested_get("train_metrics", {}) lr_scheduler_cls = training_params.nested_get("lr_sched_cls", None) lr_scheduler_params = training_params.nested_get("lr_sched_params", {}) metrics = training_params.nested_get("metrics", {}) # ToDo: remove after next release val_metrics = config.nested_get("val_metrics", {}) train_metrics = config.nested_get("train_metrics", {}) if val_metrics or train_metrics: warnings.warn("'val_metrics' and 'train_metrics' are deprecated. " "Please use the combined 'metrics' instead!", DeprecationWarning) metrics.update(val_metrics) metrics.update(train_metrics) # necessary for resuming training from a given path save_path = kwargs.pop("save_path", os.path.join( self.save_path, "checkpoints", "run_%02d" % self._run)) return self.trainer_cls( network=model, save_path=save_path, losses=losses, key_mapping=self.key_mapping, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, train_metrics=train_metrics, metrics=metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, optim_fn=self._optim_builder, save_freq=self.checkpoint_freq, **kwargs ) def _setup_test(self, config, model, convert_batch_to_npy_fn, prepare_batch_fn, **kwargs): """ Parameters ---------- config : :class:`DeliraConfig` the parameters containing the model and training kwargs (ignored here, just passed for subclassing and unified API) model : :class:`AbstractNetwork` the model to test convert_batch_to_npy_fn : function function to convert a batch of tensors to numpy prepare_batch_fn : function function to convert a batch-dict to a format accepted by the model. This conversion typically includes dtype-conversion, reshaping, wrapping to backend-specific tensors and pushing to correct devices **kwargs : additional keyword arguments Returns ------- :class:`Predictor` the created predictor """ predictor = self.predictor_cls( model=model, key_mapping=self.key_mapping, convert_batch_to_npy_fn=convert_batch_to_npy_fn, prepare_batch_fn=prepare_batch_fn, **kwargs) return predictor def run(self, train_data: DataManager, val_data: DataManager = None, config: DeliraConfig = None, **kwargs): """ Setup and run training Parameters ---------- train_data : :class:`DataManager` the data to use for training val_data : :class:`DataManager` or None the data to use for validation (no validation is done if passing None); default: None config : :class:`DeliraConfig` or None the config to use for training and model instantiation (will be merged with ``self.config``) **kwargs : additional keyword arguments Returns ------- :class:`AbstractNetwork` The trained network returned by the trainer (usually best network) See Also -------- :class:`BaseNetworkTrainer` for training itself """ config = self._resolve_params(config) kwargs = self._resolve_kwargs(kwargs) training_params = config.training_params trainer = self.setup(config, training=True, **kwargs) self._run += 1 num_epochs = kwargs.get("num_epochs", training_params.nested_get( "num_epochs", self.n_epochs)) if num_epochs is None: num_epochs = self.n_epochs return trainer.train(num_epochs, train_data, val_data, self.val_score_key, kwargs.get("val_score_mode", "lowest")) def resume(self, save_path: str, train_data: DataManager, val_data: DataManager = None, config: DeliraConfig = None, **kwargs): """ Resumes a previous training by passing an explicit ``save_path`` instead of generating a new one Parameters ---------- save_path : str path to previous training train_data : :class:`DataManager` the data to use for training val_data : :class:`DataManager` or None the data to use for validation (no validation is done if passing None); default: None config : :class:`DeliraConfig` or None the config to use for training and model instantiation (will be merged with ``self.config``) **kwargs : additional keyword arguments Returns ------- :class:`AbstractNetwork` The trained network returned by the trainer (usually best network) See Also -------- :class:`BaseNetworkTrainer` for training itself """ return self.run( train_data=train_data, val_data=val_data, config=config, save_path=save_path, **kwargs) def test(self, network, test_data: DataManager, metrics: dict, metric_keys=None, verbose=False, prepare_batch=None, convert_fn=lambda *x, **y: (x, y), **kwargs): """ Setup and run testing on a given network Parameters ---------- network : :class:`AbstractNetwork` the (trained) network to test test_data : :class:`DataManager` the data to use for testing metrics : dict the metrics to calculate metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation verbose : bool verbosity of the test process prepare_batch : function function to convert a batch-dict to a format accepted by the model. This conversion typically includes dtype-conversion, reshaping, wrapping to backend-specific tensors and pushing to correct devices convert_fn : function function to convert a batch of tensors to numpy **kwargs : additional keyword arguments Returns ------- dict all predictions obtained by feeding the ``test_data`` through the ``network`` dict all metrics calculated upon the ``test_data`` and the obtained predictions """ kwargs = self._resolve_kwargs(kwargs) predictor = self.setup(None, training=False, model=network, convert_batch_to_npy_fn=convert_fn, prepare_batch_fn=prepare_batch, **kwargs) # return first item of generator return next(predictor.predict_data_mgr_cache_all(test_data, 1, metrics, metric_keys, verbose)) def kfold(self, data: DataManager, metrics: dict, num_epochs=None, num_splits=None, shuffle=False, random_seed=None, split_type="random", val_split=0.2, label_key="label", train_kwargs: dict = None, metric_keys: dict = None, test_kwargs: dict = None, config=None, verbose=False, **kwargs): """ Performs a k-Fold cross-validation Parameters ---------- data : :class:`DataManager` the data to use for training(, validation) and testing. Will be split based on ``split_type`` and ``val_split`` metrics : dict dictionary containing the metrics to evaluate during k-fold num_epochs : int or None number of epochs to train (if not given, will either be extracted from ``config``, ``self.config`` or ``self.n_epochs``) num_splits : int or None the number of splits to extract from ``data``. If None: uses a default of 10 shuffle : bool whether to shuffle the data before splitting or not (implemented by index-shuffling rather than actual data-shuffling to retain potentially lazy-behavior of datasets) random_seed : None seed to seed numpy, the splitting functions and the used backend-framework split_type : str must be one of ['random', 'stratified'] if 'random': uses random data splitting if 'stratified': uses stratified data splitting. Stratification will be based on ``label_key`` val_split : float or None the fraction of the train data to use as validation set. If None: No validation will be done during training; only testing for each fold after the training is complete label_key : str the label to use for stratification. Will be ignored unless ``split_type`` is 'stratified'. Default: 'label' train_kwargs : dict or None kwargs to update the behavior of the :class:`DataManager` containing the train data. If None: empty dict will be passed metric_keys : dict of tuples the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation test_kwargs : dict or None kwargs to update the behavior of the :class:`DataManager` containing the test and validation data. If None: empty dict will be passed config : :class:`DeliraConfig`or None the training and model parameters (will be merged with ``self.config``) verbose : bool verbosity **kwargs : additional keyword arguments Returns ------- dict all predictions from all folds dict all metric values from all folds Raises ------ ValueError if ``split_type`` is neither 'random', nor 'stratified' See Also -------- * :class:`sklearn.model_selection.KFold` and :class:`sklearn.model_selection.ShuffleSplit` for random data-splitting * :class:`sklearn.model_selection.StratifiedKFold` and :class:`sklearn.model_selection.StratifiedShuffleSplit` for stratified data-splitting * :meth:`DataManager.update_from_state_dict` for updating the data managers by kwargs * :meth:`BaseExperiment.run` for the training * :meth:`BaseExperiment.test` for the testing Notes ----- using stratified splits may be slow during split-calculation, since each item must be loaded once to obtain the labels necessary for stratification. """ # set number of splits if not specified if num_splits is None: num_splits = 10 logger.warning("num_splits not defined, using default value of \ 10 splits instead ") metrics_test, outputs = {}, {} split_idxs = list(range(len(data.dataset))) if train_kwargs is None: train_kwargs = {} if test_kwargs is None: test_kwargs = {} # switch between differnt kfold types if split_type == "random": split_cls = KFold val_split_cls = ShuffleSplit # split_labels are ignored for random splitting, set them to # split_idxs just ensures same length split_labels = split_idxs elif split_type == "stratified": split_cls = StratifiedKFold val_split_cls = StratifiedShuffleSplit # iterate over dataset to get labels for stratified splitting split_labels = [data.dataset[_idx][label_key] for _idx in split_idxs] else: raise ValueError("split_type must be one of " "['random', 'stratified'], but got: %s" % str(split_type)) fold = split_cls(n_splits=num_splits, shuffle=shuffle, random_state=random_seed) if random_seed is not None: np.random.seed(random_seed) # iterate over folds for idx, (train_idxs, test_idxs) in enumerate( fold.split(split_idxs, split_labels)): # extract data from single manager train_data = data.get_subset(train_idxs) test_data = data.get_subset(test_idxs) train_data.update_state_from_dict(copy.deepcopy(train_kwargs)) test_data.update_state_from_dict(copy.deepcopy(test_kwargs)) val_data = None if val_split is not None: if split_type == "random": # split_labels are ignored for random splitting, set them # to split_idxs just ensures same length train_labels = train_idxs elif split_type == "stratified": # iterate over dataset to get labels for stratified # splitting train_labels = [train_data.dataset[_idx][label_key] for _idx in train_idxs] else: raise ValueError("split_type must be one of " "['random', 'stratified'], but got: %s" % str(split_type)) _val_split = val_split_cls(n_splits=1, test_size=val_split, random_state=random_seed) for _train_idxs, _val_idxs in _val_split.split(train_idxs, train_labels): val_data = train_data.get_subset(_val_idxs) val_data.update_state_from_dict(copy.deepcopy(test_kwargs)) train_data = train_data.get_subset(_train_idxs) model = self.run(train_data=train_data, val_data=val_data, config=config, num_epochs=num_epochs, fold=idx, **kwargs) _outputs, _metrics_test = self.test(model, test_data, metrics=metrics, metric_keys=metric_keys, verbose=verbose) outputs[str(idx)] = _outputs metrics_test[str(idx)] = _metrics_test return outputs, metrics_test def __str__(self): """ Converts :class:`BaseExperiment` to string representation Returns ------- str representation of class """ s = "Experiment:\n" for k, v in vars(self).items(): s += "\t{} = {}\n".format(k, v) return s def __call__(self, *args, **kwargs): """ Call :meth:`BaseExperiment.run` Parameters ---------- *args : positional arguments **kwargs : keyword arguments Returns ------- :class:`BaseNetworkTrainer` trainer of trained network """ return self.run(*args, **kwargs) def save(self): """ Saves the Whole experiments """ with open(os.path.join(self.save_path, "experiment.delira.pkl"), "wb") as f: pickle.dump(self, f) self.config.dump(os.path.join(self.save_path, "parameters")) @staticmethod def load(file_name): """ Loads whole experiment Parameters ---------- file_name : str file_name to load the experiment from """ with open(file_name, "rb") as f: return pickle.load(f) def _resolve_params(self, config: typing.Union[DeliraConfig, None]): """ Merges the given config with ``self.config``. If the same argument is given in both configs, the one from the currently given config is used here Parameters ---------- config : :class:`DeliraConfig` or None the parameters to merge with ``self.config`` Returns ------- :class:`Parameters` the merged parameter instance """ if config is None: config = DeliraConfig() if hasattr(self, "config") and isinstance(self.config, DeliraConfig): _config = copy.deepcopy(config) config = self.config config.update(_config, overwrite=True) return config def _resolve_kwargs(self, kwargs: typing.Union[dict, None]): """ Merges given kwargs with ``self.kwargs`` If same argument is present in both kwargs, the one from the given kwargs will be used here Parameters ---------- kwargs : dict the given kwargs to merge with self.kwargs Returns ------- dict merged kwargs """ if kwargs is None: kwargs = {} if hasattr(self, "kwargs") and isinstance(self.kwargs, dict): _kwargs = kwargs kwargs = self.kwargs kwargs.update(_kwargs) return kwargs def __getstate__(self): return vars(self) def __setstate__(self, state): vars(self).update(state) ================================================ FILE: delira/training/base_trainer.py ================================================ import logging import os import pickle import typing import warnings from delira.utils.config import LookupConfig import numpy as np from tqdm import tqdm from .callbacks import AbstractCallback, DefaultLoggingCallback from .predictor import Predictor from ..data_loading import Augmenter, DataManager from ..models import AbstractNetwork from ..logging import register_logger, make_logger logger = logging.getLogger(__name__) class BaseNetworkTrainer(Predictor): """ Defines a Base API and basic functions for Network Trainers See Also -------- :class:`PyTorchNetworkTrainer` :class:`TfNetworkTrainer` """ __KEYS_TO_GUARD = ["use_gpu", "input_device", "output_device", "_callbacks"] def __init__(self, network: AbstractNetwork, save_path: str, losses: dict, optimizer_cls: type, optimizer_params: dict, metrics: dict, lr_scheduler_cls: type, lr_scheduler_params: dict, gpu_ids: typing.List[int], save_freq: int, optim_fn, key_mapping: dict, logging_type: str, logging_kwargs: dict, logging_callback_cls=DefaultLoggingCallback, logging_frequencies=None, logging_reduce_types=None, fold: int = 0, callbacks: typing.List[AbstractCallback] = None, start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=lambda x: x, val_freq=1, **kwargs ): """ Parameters ---------- network : :class:`AbstractTfNetwork` the network to train save_path : str path to save networks to losses : dict dictionary containing the training losses optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict keyword arguments passed to optimizer during construction metrics : dict, optional metrics, which will be evaluated during train and validation phase (should work on numpy arrays) lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs optim_fn : function creates a dictionary containing all necessary optimizers key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler backend class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. logging_reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict the batch_dict keys to use for each metric to calculate. Should contain a value for each key in ``metrics``. If no values are given for a key, per default ``pred`` and ``label`` will be used for metric calculation convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is the identity function val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : Additional keyword arguments """ # explicity not call self._setup here to reuse the __init__ of # abstract class. self._setup has to be called in subclass if callbacks is None: callbacks = [] # check argument types for instance, cls_type in zip([ network, save_path, losses, optimizer_params, metrics, lr_scheduler_params, gpu_ids], [AbstractNetwork, str, dict, dict, dict, dict, list]): if not isinstance(instance, cls_type): raise TypeError("%s should be of type %s, but is of type %s" % (instance.__name__, cls_type.__name__, type(instance).__name__)) if os.path.isdir(save_path): logger.warning( "Save Path already exists. Saved Models may be overwritten") else: os.makedirs(save_path) self._fold = fold self.start_epoch = start_epoch self.save_path = save_path self.losses = losses self.metrics = metrics self.stop_training = False self.save_freq = save_freq self.metric_keys = metric_keys self._tqdm_desc = "Validate" self.val_freq = val_freq self._global_iter_num = 1 self._logging_setup_kwargs = { "logging_type": logging_type, "logging_kwargs": logging_kwargs, "logging_callback_cls": logging_callback_cls, "logging_frequencies": logging_frequencies, "reduce_types": logging_reduce_types} def _setup(self, network, lr_scheduler_cls, lr_scheduler_params, gpu_ids, key_mapping, convert_batch_to_npy_fn, prepare_batch_fn, callbacks): super()._setup(network, key_mapping, convert_batch_to_npy_fn, prepare_batch_fn, callbacks) self._reinitialize_logging(**self._logging_setup_kwargs) self.closure_fn = network.closure # optimizers must exist before calling _setup() if lr_scheduler_cls is not None: for key, optim in self.optimizers.items(): if not issubclass(lr_scheduler_cls, AbstractCallback): logger.warning("lr_scheduler_cls is not a callback.") self.register_callback(lr_scheduler_cls(optim, **lr_scheduler_params)) if gpu_ids: self.use_gpu = True else: self.use_gpu = False def _at_training_begin(self, *args, **kwargs): """ Defines the behaviour at beginnig of the training Parameters ---------- *args : positional arguments **kwargs : keyword arguments """ for cbck in self._callbacks: self._update_state(cbck.at_training_begin(self, *args, **kwargs)) self.save_state(os.path.join(self.save_path, "checkpoint_epoch_%d" % self.start_epoch)) def _at_training_end(self, *args, **kwargs): """ Defines the behaviour at the end of the training Parameters ---------- *args : positional arguments **kwargs : keyword arguments Returns ------- :class:`AbstractNetwork` the network with the loaded state """ for cbck in self._callbacks: self._update_state(cbck.at_training_end(self, *args, **kwargs)) return self.module def _at_epoch_begin(self, val_score_key, epoch, num_epochs, **kwargs): """ Defines behaviour at beginning of each epoch: Executes all callbacks's `at_epoch_begin` method Parameters ---------- val_score_key : str validation score key epoch : int current epoch num_epochs : int total number of epochs **kwargs : keyword arguments """ # execute all callbacks for cb in self._callbacks: self._update_state(cb.at_epoch_begin(self, val_metrics={}, val_score_key=val_score_key, curr_epoch=epoch)) def _at_epoch_end(self, metrics_val, val_score_key, epoch, is_best, **kwargs): """ Defines behaviour at beginning of each epoch: Executes all callbacks's `at_epoch_end` method and saves current state if necessary Parameters ---------- metrics_val : dict validation metrics val_score_key : str validation score key epoch : int current epoch num_epochs : int total number of epochs **kwargs : keyword arguments """ for cb in self._callbacks: self._update_state(cb.at_epoch_end(self, val_metrics=metrics_val, val_score_key=val_score_key, curr_epoch=epoch)) if epoch % self.save_freq == 0: self.save_state(os.path.join(self.save_path, "checkpoint_epoch_%d" % epoch)) if is_best: self.save_state(os.path.join(self.save_path, "checkpoint_best")) def _at_iter_begin(self, iter_num, epoch=0, **kwargs): """ Defines the behavior executed at an iteration's begin Parameters ---------- iter_num : int number of current iter epoch : int number of current epoch **kwargs : additional keyword arguments (forwarded to callback calls) """ for cb in self._callbacks: self._update_state(cb.at_iter_begin( self, iter_num=iter_num, curr_epoch=epoch, global_iter_num=self._global_iter_num, train=True, **kwargs, )) def _at_iter_end(self, iter_num, data_dict, metrics, epoch=0, **kwargs): """ Defines the behavior executed at an iteration's end Parameters ---------- iter_num : int number of current iter data_dict : dict dictionary holding input data and predictions metrics: dict calculated metrics epoch : int number of current epoch **kwargs : additional keyword arguments (forwarded to callback calls) """ for cb in self._callbacks: self._update_state(cb.at_iter_end( self, iter_num=iter_num, data_dict=data_dict, metrics=metrics, curr_epoch=epoch, global_iter_num=self._global_iter_num, train=True, **kwargs, )) self._global_iter_num += 1 def _train_single_epoch(self, dmgr_train: DataManager, epoch, verbose=False): """ Trains the network a single epoch Parameters ---------- dmgr_train : :class:`DataManager` Datamanager to create the data generator epoch : int current epoch """ metrics, losses = [], [] batchgen = dmgr_train.get_batchgen(seed=epoch) n_batches = dmgr_train.n_batches if verbose: iterable = tqdm( enumerate(batchgen), unit=' batch', total=n_batches, desc='Epoch %d' % epoch) else: iterable = enumerate(batchgen) for iter_num, batch in iterable: self._at_iter_begin(epoch=epoch, iter_num=iter_num) data_dict = self._prepare_batch(batch) _losses, _preds = self.closure_fn(self.module, data_dict, optimizers=self.optimizers, losses=self.losses, fold=self.fold, iter_num=iter_num) data_dict = self._convert_to_npy_fn(**data_dict)[1] _preds = self._convert_to_npy_fn(**_preds)[1] _metrics = self.calc_metrics( LookupConfig(**data_dict, **_preds), self.metrics, self.metric_keys) metrics.append(_metrics) losses.append(_losses) self._at_iter_end(epoch=epoch, iter_num=iter_num, data_dict={**batch, **_preds}, metrics={**_metrics, **_losses}, ) total_losses, total_metrics = {}, {} for _metrics in metrics: for key, val in _metrics.items(): if key in total_metrics: total_metrics[key].append(val) else: total_metrics[key] = [val] for _losses in losses: for key, val in _losses.items(): if key in total_losses: total_losses[key].append(val) else: total_losses[key] = [val] return total_metrics, total_losses def train(self, num_epochs, datamgr_train, datamgr_valid=None, val_score_key=None, val_score_mode='highest', reduce_mode='mean', verbose=True): """ Defines a routine to train a specified number of epochs Parameters ---------- num_epochs : int number of epochs to train datamgr_train : DataManager the datamanager holding the train data datamgr_valid : DataManager the datamanager holding the validation data (default: None) val_score_key : str the key specifying which metric to use for validation (default: None) val_score_mode : str key specifying what kind of validation score is best reduce_mode : str 'mean','sum','first_only' verbose : bool whether to show progress bars or not """ self._at_training_begin() if val_score_mode == 'highest': best_val_score = 0 elif val_score_mode == 'lowest': best_val_score = float('inf') else: best_val_score = None is_best = False new_val_score = best_val_score if reduce_mode == 'mean': def reduce_fn(batch): return np.mean(batch) elif reduce_mode == 'sum': def reduce_fn(batch): return np.sum(batch) elif reduce_mode == 'first_only': def reduce_fn(batch): return batch[0] elif reduce_mode == 'last_only': def reduce_fn(batch): return batch[-1] else: raise ValueError("No valid reduce mode given") for epoch in range(self.start_epoch, num_epochs + 1): self._at_epoch_begin(val_score_key, epoch, num_epochs) # train single network epoch train_metrics, train_losses = self._train_single_epoch( datamgr_train, epoch, verbose=verbose) total_metrics = { **train_metrics, **train_losses} # validate network if datamgr_valid is not None and (epoch % self.val_freq == 0): # next must be called here because self.predict_data_mgr # returns a generator (of size 1) and we want to get the # first (and only) item val_metrics = next( self.predict_data_mgr_cache_metrics_only( datamgr_valid, datamgr_valid.batch_size, metrics=self.metrics, metric_keys=self.metric_keys, verbose=verbose)) val_metrics = {"val_" + k: v for k, v in val_metrics.items()} total_metrics.update(val_metrics) _, total_metrics = self._convert_to_npy_fn(**total_metrics) for k, v in total_metrics.items(): total_metrics[k] = reduce_fn(v) # check if metric became better if val_score_key is not None: if val_score_key not in total_metrics: if "val_" + val_score_key not in total_metrics: warnings.warn("val_score_key '%s' not a valid key " "for validation metrics" % str(val_score_key), UserWarning) new_val_score = best_val_score else: new_val_score = \ total_metrics["val_" + val_score_key] val_score_key = "val_" + val_score_key else: new_val_score = total_metrics.get(val_score_key) if new_val_score != best_val_score: is_best = self._is_better_val_scores( best_val_score, new_val_score, val_score_mode) # set best_val_score to new_val_score if is_best if is_best: best_val_score = new_val_score if is_best and verbose: logging.info("New Best Value at Epoch %03d : %03.3f" % (epoch, best_val_score)) self._at_epoch_end(total_metrics, val_score_key, epoch, is_best) is_best = False # stop training (might be caused by early stopping) if self.stop_training: break return self._at_training_end() @property def fold(self): """ Get current fold Returns ------- int current fold """ return self._fold @fold.setter def fold(self, fold): """ Set the current fold Parameters ---------- fold : int new fold Raises ------ ValueError if `fold` is not covertable to :obj:`int` """ try: self._fold = int(fold) except ValueError as e: logger.error(e) raise e def register_callback(self, callback: AbstractCallback): """ Register Callback to Trainer Parameters ---------- callback : :class:`AbstractCallback` the callback to register Raises ------ AssertionError `callback` is not an instance of :class:`AbstractCallback` and has not both methods ['at_epoch_begin', 'at_epoch_end'] """ assertion_str = "Given callback is not valid; Must be instance of " \ "AbstractCallback or provide functions " \ "'at_training_begin' and 'at_training_end'" instance_check = isinstance(callback, AbstractCallback) attr_check_begin_train = hasattr(callback, "at_training_begin") attr_check_end_train = hasattr(callback, "at_training_end") attr_check_both_train = attr_check_begin_train and attr_check_end_train assert instance_check or attr_check_both_train, assertion_str super().register_callback(callback) def save_state(self, file_name, *args, **kwargs): """ saves the current state Parameters ---------- file_name : str filename to save the state to *args : positional arguments **kwargs : keyword arguments """ with open(file_name, "wb") as f: pickle.dump(vars(self), f, *args, **kwargs) @staticmethod def load_state(file_name, *args, **kwargs): """ Loads the new state from file Parameters ---------- file_name : str the file to load the state from *args : positional arguments **kwargs : keyword arguments Returns ------- dict new state """ with open(file_name, "rb") as f: new_state = pickle.load(f, *args, **kwargs) return new_state def _update_state(self, new_state): """ Update the state from a given new state Parameters ---------- new_state : dict new state to update internal state from Returns ------- :class:`BaseNetworkTrainer` the trainer with a modified state """ for key, val in new_state.items(): if key.startswith("__") and key.endswith("__"): continue try: setattr(self, key, val) except PermissionError: logger.error("Trying to overwrite attribute %s of " "NetworkTrainer, which is not allowed!" % key) return self def update_state(self, file_name, *args, **kwargs): """ Update internal state from a loaded state Parameters ---------- file_name : str file containing the new state to load *args : positional arguments **kwargs : keyword arguments Returns ------- :class:`BaseNetworkTrainer` the trainer with a modified state """ self._update_state(self.load_state(file_name, *args, **kwargs)) @staticmethod def _is_better_val_scores(old_val_score, new_val_score, mode='highest'): """ Check whether the new val score is better than the old one with respect to the optimization goal Parameters ---------- old_val_score : old validation score new_val_score : new validation score mode: str String to specify whether a higher or lower validation score is optimal; must be in ['highest', 'lowest'] Returns ------- bool True if new score is better, False otherwise """ assert mode in ['highest', 'lowest'], "Invalid Comparison Mode" if mode == 'highest': return new_val_score > old_val_score elif mode == 'lowest': return new_val_score < old_val_score @property def name(self): return os.path.basename(os.path.dirname(os.path.dirname( os.path.dirname(self.save_path)))) def _reinitialize_logging(self, logging_type, logging_kwargs: dict, logging_callback_cls, logging_frequencies, reduce_types): """ Parameters ---------- logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler backend class logging_kwargs : dict dictionary containing all logging keyword arguments logging_callback_cls : class the callback class to create and register for logging logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. reduce_types : str of FunctionType or dict Values are logged in each iteration. This argument specifies, how to reduce them to a single value if a logging_frequency besides 1 is passed if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'max' | 'min'. """ from delira.logging import TensorboardBackend, VisdomBackend, \ BaseBackend if isinstance(logging_type, str): if logging_type.lower() == "visdom": backend_cls = VisdomBackend elif logging_type.lower() == "tensorboardx": backend_cls = TensorboardBackend else: raise ValueError("Invalid Logging Type") elif issubclass(logging_type, BaseBackend): backend_cls = logging_type else: raise ValueError("Invalid logging_type passed") _logging_kwargs = {} if backend_cls == VisdomBackend: _logging_kwargs.update({"exp_name": "main", "level": 0}) elif backend_cls == TensorboardBackend: _logging_kwargs.update( { "logdir": os.path.join(os.path.dirname( os.path.dirname(self.save_path)), "logs", "run_%02d" % self.fold), "level": 0}) _logging_kwargs.update(logging_kwargs) if "exp_name" in _logging_kwargs.keys(): _logging_kwargs["exp_name"] = _logging_kwargs["exp_name"] + \ "_%02d" % self.fold # remove prior Trixihandlers and reinitialize it with given logging # type # This facilitates visualization of multiple splits/fold inside one # tensorboard-instance by means of # different tf.Summary.FileWriters() level = _logging_kwargs.pop("level") logger = backend_cls(_logging_kwargs) self.register_callback( logging_callback_cls( logger, level=level, logging_frequencies=logging_frequencies, reduce_types=reduce_types)) register_logger(self._callbacks[-1]._logger, self.name) @staticmethod def _search_for_prev_state(path, extensions=None): """ Helper function to search in a given path for previous epoch states (indicated by extensions) Parameters ---------- path : str the path to search in extensions : list list of strings containing valid file extensions for checkpoint files Returns ------- str the file containing the latest checkpoint (if available) None if no latst checkpoint was found int the latest epoch (1 if no checkpoint was found) """ if extensions is None: extensions = [] files = [] for file in os.listdir(path): for ext in extensions: if not ext.startswith("."): ext = "." + ext if not file.endswith(ext): continue if not file.startswith("checkpoint"): continue if file.endswith("_best" + ext): continue files.append(file) break if files: latest_epoch = max([ int(x.rsplit("_", 1)[-1].split(".", 1)[0]) for x in files]) latest_state_filename = [x for x in files if x.startswith("checkpoint_epoch_%d" % latest_epoch)][0] latest_state_path = os.path.join(path, latest_state_filename) return latest_state_path, latest_epoch return None, 1 def register_callback(self, callback: AbstractCallback): """ Registers the passed callback to the trainer, after checking it is really a valid callback Parameters ---------- callback : AbstractCallback the potential callback to register Raises ------ AssertionError :param:`callback` is not an instance of :class:`AbstractCallback` and does not provide the methods `at_iter_begin`, `at_iter_end`, `at_epoch_begin` and `at_epoch_end` """ has_all_attrs = True for attr in ("epoch",): has_all_attrs = has_all_attrs and hasattr(callback, "at_%s_begin" % attr) has_all_attrs = has_all_attrs and hasattr(callback, "at_%s_end" % attr) assert has_all_attrs, "Given callback is not valid; Must be " \ "instance of AbstractCallback or provide " \ "functions 'at_epoch_begin' and 'at_epoch_end'" super().register_callback(callback) ================================================ FILE: delira/training/callbacks/__init__.py ================================================ from delira import get_backends from delira.training.callbacks.logging_callback import DefaultLoggingCallback from delira.training.callbacks.abstract_callback import AbstractCallback from delira.training.callbacks.early_stopping import EarlyStopping if "TORCH" in get_backends(): from delira.training.callbacks.pytorch_schedulers import \ DefaultPyTorchSchedulerCallback from delira.training.callbacks.pytorch_schedulers import \ CosineAnnealingLRCallback as CosineAnnealingLRCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import \ ExponentialLRCallback as ExponentialLRCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import \ LambdaLRCallback as LambdaLRCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import \ MultiStepLRCallback as MultiStepLRCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import \ ReduceLROnPlateauCallback as ReduceLROnPlateauCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import StepLRCallback \ as StepLRCallbackPyTorch from delira.training.callbacks.pytorch_schedulers import \ OneCycleLRCallback as OneCycleLRCallbackPyTorch ================================================ FILE: delira/training/callbacks/abstract_callback.py ================================================ class AbstractCallback(object): """ Implements abstract callback interface. All callbacks should be derived from this class See Also -------- :class:`AbstractNetworkTrainer` """ def __init__(self, *args, **kwargs): """ Parameters ---------- *args : positional arguments **kwargs : keyword arguments """ super().__init__(*args, **kwargs) def at_epoch_begin(self, trainer, *args, **kwargs): """ Function which will be executed at begin of each epoch Parameters ---------- trainer : :class:`AbstractNetworkTrainer` **kwargs : additional keyword arguments Returns ------- dict modified trainer attributes, where the name must correspond to the trainer's attribute name Notes ----- The basetrainer calls the callbacks with the following additional arguments: `val_metrics`(dict), `val_score_key`(str), `curr_epoch`(int) """ return {} def at_epoch_end(self, trainer, *args, **kwargs): """ Function which will be executed at end of each epoch Parameters ---------- trainer : :class:`AbstractNetworkTrainer` **kwargs : additional keyword arguments Returns ------- dict modified trainer attributes, where the name must correspond to the trainer's attribute name Notes ----- The basetrainer calls the callbacks with the following additional arguments: `val_metrics`(dict), `val_score_key`(str), `curr_epoch`(int) """ return {} def at_training_begin(self, trainer, *args, **kwargs): """ Function which will be executed at begin of training Parameters ---------- trainer : :class:`AbstractNetworkTrainer` **kwargs : additional keyword arguments Returns ------- dict modified trainer attributes, where the name must correspond to the trainer's attribute name """ return {} def at_training_end(self, trainer, *args, **kwargs): """ Function which will be executed at end of training Parameters ---------- trainer : :class:`AbstractNetworkTrainer` **kwargs : additional keyword arguments Returns ------- dict modified trainer attributes, where the name must correspond to the trainer's attribute name """ return {} def at_iter_begin(self, trainer, *args, **kwargs): """ Function which will be executed at begin of each iteration Parameters ---------- trainer : :class:`AbstractNetworkTrainer` **kwargs : additional keyword arguments Returns ------- dict modified trainer attributes, where the name must correspond to the trainer's attribute name Notes ----- The predictor calls the callbacks with the following additional arguments: `iter_num`(int), `train`(bool) The basetrainer adds following arguments (wrt the predictor): `curr_epoch`(int), `global_iter_num`(int) """ return {} def at_iter_end(self, trainer, *args, **kwargs): """ Function which will be executed at end of each iteration Parameters ---------- trainer : :class:`AbstractNetworkTrainer` **kwargs : additional keyword arguments Returns ------- dict modified trainer attributes, where the name must correspond to the trainer's attribute name Notes ----- The predictor calls the callbacks with the following additional arguments: `iter_num`(int), `metrics`(dict), `data_dict`(dict, contains prediction and input data), `train`(bool) The basetrainer adds following arguments (wrt the predictor): `curr_epoch`(int), `global_iter_num`(int) """ return {} ================================================ FILE: delira/training/callbacks/early_stopping.py ================================================ from delira.training.callbacks.abstract_callback import AbstractCallback class EarlyStopping(AbstractCallback): """ Implements Early Stopping as callback See Also -------- :class:`AbstractCallback` """ def __init__(self, monitor_key, min_delta=0, patience=0, mode='min'): """ Parameters ---------- monitor_key : str the validation key to monitor min_delta : float or int the minimum difference between the best metric value so far and the current one patience : int number of epochs to wait before stopping training mode : str (default: 'min') defines the optimum for the monitored value """ super().__init__() self.monitor_key = monitor_key, self.min_delta = min_delta self.patience = patience self.mode = mode if 'min' == mode: self.best_metric = float('inf') elif 'max' == mode: self.best_metric = - float('inf') else: raise ValueError("Unknown compare mode: Got %s, but expected one " "of ['min', 'max']" % mode) self.epochs_waited = 0 def _is_better(self, metric): """ Helper function to decide whether the current metric is better than the best metric so far Parameters ---------- metric : current metric value Returns ------- bool whether this metric is the new best metric or not """ if 'min' == self.mode: return metric < (self.best_metric - self.min_delta) else: return metric > (self.best_metric + self.min_delta) def at_epoch_end(self, trainer, **kwargs): """ Actual early stopping: Checks at end of each epoch if monitored metric is new best and if it hasn't improved over `self.patience` epochs, the training will be stopped Parameters ---------- trainer : :class:`AbstractNetworkTrainer` the trainer whose arguments can be modified **kwargs : additional keyword arguments Returns ------- :class:`AbstractNetworkTrainer` trainer with modified attributes """ metric = kwargs.get("val_metrics", {})[self.monitor_key] self.epochs_waited += 1 - int(self._is_better(metric)) if self.epochs_waited >= self.patience: stop_training = True else: stop_training = False return {"stop_training": stop_training} ================================================ FILE: delira/training/callbacks/logging_callback.py ================================================ from delira.training.callbacks.abstract_callback import AbstractCallback from delira.logging import make_logger, BaseBackend import logging class DefaultLoggingCallback(AbstractCallback): """ A default Logging backend which logs only the metrics; Should be subclassed for additional logging """ def __init__(self, backend: BaseBackend, max_queue_size: int = None, logging_frequencies=None, reduce_types=None, level=logging.NOTSET): """ Parameters ---------- backend : :class:`delira.logging.base_backend.BaseBackend` the logging backend max_queue_size : int the maximum queue size logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. reduce_types : str of FunctionType or dict if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'max' | 'min'. level : int the logging level for python's internal logging module """ super().__init__() self._logger = make_logger(backend=backend, max_queue_size=max_queue_size, logging_frequencies=logging_frequencies, reduce_types=reduce_types, level=level) def at_iter_end(self, trainer, iter_num=None, data_dict=None, train=False, **kwargs): """ Function logging the metrics at the end of each iteration Parameters ---------- trainer : :class:`BaseNetworkTrainer` the current trainer object (unused in this callback) iter_num : int number of the current iteration inside the current epoch (unused in this callback) data_dict : dict the current data dict (including predictions) train: bool signals if callback is called by trainer or predictor **kwargs : additional keyword arguments Returns ------- dict empty dict, because no state should be updated """ metrics = kwargs.get("metrics", {}) for k, v in metrics.items(): self._logger.log({"scalar": {"tag": self.create_tag(k, train), "scalar_value": v}}) return {} @staticmethod def create_tag(tag: str, train: bool): if train: tag = tag + "_val" return tag ================================================ FILE: delira/training/callbacks/pytorch_schedulers.py ================================================ from delira import get_backends from delira.training.callbacks.abstract_callback import AbstractCallback if 'TORCH' in get_backends(): from torch.optim.lr_scheduler import ReduceLROnPlateau, \ CosineAnnealingLR, ExponentialLR, LambdaLR, MultiStepLR, StepLR, \ OneCycleLR class DefaultPyTorchSchedulerCallback(AbstractCallback): """ Implements a Callback, which `at_epoch_end` function is suitable for most schedulers """ def __init__(self, *args, **kwargs): """ Parameters ---------- *args : Arbitrary Positional Arguments **kwargs : Arbitrary Keyword Arguments """ super().__init__() self.scheduler = None def at_epoch_end(self, trainer, **kwargs): """ Executes a single scheduling step Parameters ---------- trainer : :class:`PyTorchNetworkTrainer` the trainer class, which can be changed **kwargs : additional keyword arguments Returns ------- :class:`PyTorchNetworkTrainer` modified trainer """ self.scheduler.step(epoch=kwargs.get("curr_epoch", None)) return {} class OneCycleLRCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `OneCycleLR` Scheduler as Callback """ def __init__( self, optimizer, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, pct_start=0.3, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, final_div_factor=10000.0, last_epoch=-1): """ Parameters ---------- optimizer (Optimizer): Wrapped optimizer. max_lr (float or list): Upper learning rate boundaries in the cycle for each parameter group. total_steps (int): The total number of steps in the cycle. Note that if a value is provided here, then it must be inferred by providing a value for epochs and steps_per_epoch. Default: None epochs (int): The number of epochs to train for. This is used along with steps_per_epoch in order to infer the total number of steps in the cycle if a value for total_steps is not provided. Default: None steps_per_epoch (int): The number of steps per epoch to train for. This is used along with epochs in order to infer the total number of steps in the cycle if a value for total_steps is not provided. Default: None pct_start (float): The percentage of the cycle (in number of steps) spent increasing the learning rate. Default: 0.3 anneal_strategy (str): {'cos', 'linear'} Specifies the annealing strategy. Default: 'cos' cycle_momentum (bool): If ``True``, momentum is cycled inversely to learning rate between 'base_momentum' and 'max_momentum'. Default: True base_momentum (float or list): Lower momentum boundaries in the cycle for each parameter group. Note that momentum is cycled inversely to learning rate; at the peak of a cycle, momentum is 'base_momentum' and learning rate is 'max_lr'. Default: 0.85 max_momentum (float or list): Upper momentum boundaries in the cycle for each parameter group. Functionally, it defines the cycle amplitude (max_momentum - base_momentum). Note that momentum is cycled inversely to learning rate; at the start of a cycle, momentum is 'max_momentum' and learning rate is 'base_lr' Default: 0.95 div_factor (float): Determines the initial learning rate via initial_lr = max_lr/div_factor Default: 25 final_div_factor (float): Determines the minimum learning rate via min_lr = initial_lr/final_div_factor Default: 1e4 last_epoch (int): The index of the last batch. This parameter is used when resuming a training job. Since `step()` should be invoked after each batch instead of after each epoch, this number represents the total number of *batches* computed, not the total number of epochs computed. When last_epoch=-1, the schedule is started from the beginning. Default: -1 """ super().__init__() self.scheduler = OneCycleLR( optimizer, max_lr, total_steps, epochs, steps_per_epoch, pct_start, anneal_strategy, cycle_momentum, base_momentum, max_momentum, div_factor, final_div_factor, last_epoch) def at_iter_begin(self, trainer, train, **kwargs): """ Executes a single scheduling step Parameters ---------- trainer : :class:`PyTorchNetworkTrainer` the trainer class, which can be changed kwargs : additional keyword arguments Returns ------- :class:`PyTorchNetworkTrainer` modified trainer """ if train: self.scheduler.step() return {} def at_epoch_end(self, trainer, **kwargs): return {} class ReduceLROnPlateauCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `ReduceLROnPlateau` Scheduler as Callback """ def __init__(self, optimizer, mode='min', factor=0.1, patience=10, verbose=False, threshold=1e-4, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-8): """ Parameters ---------- optimizer : Optimizer Wrapped optimizer. mode : str One of `min`, `max`. In `min` mode, lr will be reduced when the quantity monitored has stopped decreasing; in `max` mode it will be reduced when the quantity monitored has stopped increasing. Default: 'min'. factor : float Factor by which the learning rate will be reduced. new_lr = lr * factor. Default: 0.1. patience : int Number of epochs with no improvement after which learning rate will be reduced. For example, if `patience = 2`, then we will ignore the first 2 epochs with no improvement, and will only decrease the LR after the 3rd epoch if the loss still hasn't improved then. Default: 10. verbose : bool If ``True``, prints a message to stdout for each update. Default: ``False``. threshold : float Threshold for measuring the new optimum, to only focus on significant changes. Default: 1e-4. threshold_mode : string One of `rel`, `abs`. In `rel` mode, dynamic_threshold = best * ( 1 + threshold ) in 'max' mode or best * ( 1 - threshold ) in `min` mode. In `abs` mode, dynamic_threshold = best + threshold in `max` mode or best - threshold in `min` mode. Default: 'rel'. cooldown : int Number of epochs to wait before resuming normal operation after lr has been reduced. Default: 0. min_lr : float or list A scalar or a list of scalars. A lower bound on the learning rate of all param groups or each group respectively. Default: 0. eps : float Minimal decay applied to lr. If the difference between new and old lr is smaller than eps, the update is ignored. Default: 1e-8 """ super().__init__() self.scheduler = ReduceLROnPlateau( optimizer, mode, factor, patience, verbose, threshold, threshold_mode, cooldown, min_lr, eps) def at_epoch_end(self, trainer, **kwargs): """ Executes a single scheduling step Parameters ---------- trainer : :class:`PyTorchNetworkTrainer` the trainer class, which can be changed kwargs : additional keyword arguments Returns ------- :class:`PyTorchNetworkTrainer` modified trainer """ val_metrics = kwargs.get("val_metrics", {}) val_score_key = kwargs.get("val_score_key", None) metrics = val_metrics.get(val_score_key) self.scheduler.step(metrics=metrics) return {} class CosineAnnealingLRCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `CosineAnnealingLR` Scheduler as callback """ def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1): """ Parameters ---------- optimizer : optimizer Wrapped optimizer. T_max : int Maximum number of iterations. eta_min : float Minimum learning rate. Default: 0. last_epoch : int The index of last epoch. Default: -1. """ super().__init__() self.scheduler = CosineAnnealingLR(optimizer, T_max, eta_min, last_epoch) class ExponentialLRCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `ExponentialLR` Scheduler as callback """ def __init__(self, optimizer, gamma, last_epoch=-1): """ Parameters ---------- optimizer : Optimizer Wrapped optimizer. gamma : float Multiplicative factor of learning rate decay. last_epoch : int The index of last epoch. Default: -1. """ super().__init__() self.scheduler = ExponentialLR(optimizer, gamma, last_epoch) class LambdaLRCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `LambdaLR` Scheduler as callback """ def __init__(self, optimizer, lr_lambda, last_epoch=-1): """ Parameters ---------- optimizer : Optimizer Wrapped optimizer. lr_lambda : function or list A function which computes a multiplicative factor given an integer parameter epoch, or a list of such functions, one for each group in optimizer.param_groups. last_epoch : int The index of last epoch. Default: -1. """ super().__init__() self.scheduler = LambdaLR(optimizer, lr_lambda, last_epoch) class MultiStepLRCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `MultiStepLR` Scheduler as callback """ def __init__(self, optimizer, milestones, gamma=0.1, last_epoch=-1): """ Parameters ---------- optimizer : Optimizer Wrapped optimizer. milestones : list List of epoch indices. Must be increasing. gamma : float Multiplicative factor of learning rate decay. Default: 0.1. last_epoch : int The index of last epoch. Default: -1. """ super().__init__() self.scheduler = MultiStepLR( optimizer, milestones, gamma, last_epoch) class StepLRCallback(DefaultPyTorchSchedulerCallback): """ Wraps PyTorch's `StepLR` Scheduler as callback """ def __init__(self, optimizer, step_size, gamma=0.1, last_epoch=-1): """ Parameters ---------- optimizer : Optimizer Wrapped optimizer. step_size : int Period of learning rate decay. gamma :float Multiplicative factor of learning rate decay. Default: 0.1. last_epoch : int The index of last epoch. Default: -1 """ super().__init__() self.scheduler = StepLR(optimizer, step_size, gamma, last_epoch) ================================================ FILE: delira/training/losses.py ================================================ from delira import get_backends if "TORCH" in get_backends(): import torch import torch.nn.functional as F class BCEFocalLossPyTorch(torch.nn.Module): """ Focal loss for binary case without(!) logit """ def __init__(self, alpha=None, gamma=2, reduction='elementwise_mean'): """ Implements Focal Loss for binary class case Parameters ---------- alpha : float alpha has to be in range [0,1], assigns class weight gamma : float focusing parameter reduction : str Specifies the reduction to apply to the output: ‘none’ | ‘elementwise_mean’ | ‘sum’. ‘none’: no reduction will be applied, ‘elementwise_mean’: the sum of the output will be divided by the number of elements in the output, ‘sum’: the output will be summed (further information about parameters above can be found in pytorch documentation) Returns ------- torch.Tensor loss value """ super().__init__() self.alpha = alpha self.gamma = gamma self.reduction = reduction def forward(self, p, t): bce_loss = F.binary_cross_entropy(p, t, reduction='none') if self.alpha is not None: # create weights for alpha alpha_weight = torch.ones(t.shape, device=p.device) * \ self.alpha alpha_weight = torch.where(torch.eq(t, 1.), alpha_weight, 1 - alpha_weight) else: alpha_weight = torch.Tensor([1]).to(p.device) # create weights for focal loss focal_weight = 1 - torch.where(torch.eq(t, 1.), p, 1 - p) focal_weight.pow_(self.gamma) focal_weight.to(p.device) # compute loss focal_loss = focal_weight * alpha_weight * bce_loss if self.reduction == 'elementwise_mean': return torch.mean(focal_loss) if self.reduction == 'none': return focal_loss if self.reduction == 'sum': return torch.sum(focal_loss) raise AttributeError('Reduction parameter unknown.') class BCEFocalLossLogitPyTorch(torch.nn.Module): """ Focal loss for binary case WITH logit """ def __init__(self, alpha=None, gamma=2, reduction='elementwise_mean'): """ Implements Focal Loss for binary class case Parameters ---------- alpha : float alpha has to be in range [0,1], assigns class weight gamma : float focusing parameter reduction : str Specifies the reduction to apply to the output: ‘none’ | ‘elementwise_mean’ | ‘sum’. ‘none’: no reduction will be applied, ‘elementwise_mean’: the sum of the output will be divided by the number of elements in the output, ‘sum’: the output will be summed (further information about parameters above can be found in pytorch documentation) Returns ------- torch.Tensor loss value """ super().__init__() self.alpha = alpha self.gamma = gamma self.reduction = reduction def forward(self, p, t): bce_loss = F.binary_cross_entropy_with_logits( p, t, reduction='none') p = torch.sigmoid(p) if self.alpha is not None: # create weights for alpha alpha_weight = torch.ones(t.shape, device=p.device) * \ self.alpha alpha_weight = torch.where(torch.eq(t, 1.), alpha_weight, 1 - alpha_weight) else: alpha_weight = torch.Tensor([1]).to(p.device) # create weights for focal loss focal_weight = 1 - torch.where(torch.eq(t, 1.), p, 1 - p) focal_weight.pow_(self.gamma) focal_weight.to(p.device) # compute loss focal_loss = focal_weight * alpha_weight * bce_loss if self.reduction == 'elementwise_mean': return torch.mean(focal_loss) if self.reduction == 'none': return focal_loss if self.reduction == 'sum': return torch.sum(focal_loss) raise AttributeError('Reduction parameter unknown.') ================================================ FILE: delira/training/metrics.py ================================================ from sklearn.metrics import accuracy_score, balanced_accuracy_score, \ f1_score, fbeta_score, hamming_loss, jaccard_similarity_score, log_loss, \ matthews_corrcoef, precision_score, recall_score, zero_one_loss, \ roc_auc_score from sklearn.preprocessing import label_binarize import numpy as np class SklearnClassificationMetric(object): def __init__(self, score_fn, gt_logits=False, pred_logits=True, **kwargs): """ Wraps an score function as a metric Parameters ---------- score_fn : function function which should be wrapped gt_logits : bool whether given ``y_true`` are logits or not pred_logits : bool whether given ``y_pred`` are logits or not **kwargs: variable number of keyword arguments passed to score_fn function """ self._score_fn = score_fn self._gt_logits = gt_logits self._pred_logits = pred_logits self.kwargs = kwargs def __call__(self, y_true, y_pred, **kwargs): """ Compute metric with score_fn Parameters ---------- y_true: np.ndarray ground truth data y_pred: np.ndarray predictions of network kwargs: variable number of keyword arguments passed to score_fn Returns ------- float result from score function """ if self._gt_logits: y_true = np.argmax(y_true, axis=-1) if self._pred_logits: y_pred = np.argmax(y_pred, axis=-1) return self._score_fn(y_true=y_true, y_pred=y_pred, **kwargs, **self.kwargs) class SklearnAccuracyScore(SklearnClassificationMetric): """ Accuracy Metric """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(accuracy_score, gt_logits, pred_logits, **kwargs) class SklearnBalancedAccuracyScore(SklearnClassificationMetric): """ Balanced Accuracy Metric """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(balanced_accuracy_score, gt_logits, pred_logits, **kwargs) class SklearnF1Score(SklearnClassificationMetric): """ F1 Score """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(f1_score, gt_logits, pred_logits, **kwargs) class SklearnFBetaScore(SklearnClassificationMetric): """ F-Beta Score (Generalized F1) """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(fbeta_score, gt_logits, pred_logits, **kwargs) class SklearnHammingLoss(SklearnClassificationMetric): """ Hamming Loss """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(hamming_loss, gt_logits, pred_logits, **kwargs) class SklearnJaccardSimilarityScore(SklearnClassificationMetric): """ Jaccard Similarity Score """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(jaccard_similarity_score, gt_logits, pred_logits, **kwargs) class SklearnLogLoss(SklearnClassificationMetric): """ Log Loss (NLL) """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(log_loss, gt_logits, pred_logits, **kwargs) class SklearnMatthewsCorrCoeff(SklearnClassificationMetric): """ Matthews Correlation Coefficient """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(matthews_corrcoef, gt_logits, pred_logits, **kwargs) class SklearnPrecisionScore(SklearnClassificationMetric): """ Precision Score """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(precision_score, gt_logits, pred_logits, **kwargs) class SklearnRecallScore(SklearnClassificationMetric): """ Recall Score """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(recall_score, gt_logits, pred_logits, **kwargs) class SklearnZeroOneLoss(SklearnClassificationMetric): """ Zero One Loss """ def __init__(self, gt_logits=False, pred_logits=True, **kwargs): super().__init__(zero_one_loss, gt_logits, pred_logits, **kwargs) class AurocMetric(object): def __init__(self, classes=(0, 1), **kwargs): """ Implements the auroc metric for binary and multi class classification Parameters ---------- classes: array-like uniquely holds the label for each class. kwargs: variable number of keyword arguments passed to roc_auc_score Raises ------ ValueError if not at least two classes are provided """ self.classes = classes self.kwargs = kwargs if len(self.classes) < 2: raise ValueError("At least classes 2 must exist for " "classification. Only classes {} were passed to " "AurocMetric.".format(classes)) def __call__(self, y_true, y_pred, **kwargs): """ Compute auroc Parameters ---------- y_true: np.ndarray ground truth data with shape (N) y_pred: np.ndarray predictions of network in numpy format with shape (N, nclasses) kwargs: variable number of keyword arguments passed to roc_auc_score Returns ------- float computes auc score Raises ------ ValueError if two classes are given and the predictions contain more than two classes """ # binary classification if len(self.classes) == 2: # single output unit (e.g. sigmoid) if len(y_pred.shape) == 1 or y_pred.shape[2] == 1: return roc_auc_score(y_true, y_pred, **kwargs) # output of two units (e.g. softmax) elif y_pred.shape[2] == 2: return roc_auc_score(y_true, y_pred[:, 1], **kwargs) else: raise ValueError("Can not compute auroc metric for binary " "classes with {} predicted " "classes.".format(y_pred.shape[2])) # classification with multiple classes if len(self.classes) > 2: y_true_bin = label_binarize(y_true, self.classes) return roc_auc_score(y_true_bin, y_pred, **kwargs, **self.kwargs) ================================================ FILE: delira/training/predictor.py ================================================ import logging import gc import numpy as np from tqdm import tqdm from delira.data_loading import DataManager from delira.training.utils import convert_to_numpy_identity from ..utils.config import LookupConfig from delira.training.callbacks import AbstractCallback logger = logging.getLogger(__name__) class Predictor(object): """ Defines an API for Predictions from a Network See Also -------- :class:`PyTorchNetworkTrainer` """ # static variable to prevent certain attributes from overwriting __KEYS_TO_GUARD = [] def __init__( self, model, key_mapping: dict, convert_batch_to_npy_fn=convert_to_numpy_identity, prepare_batch_fn=lambda x: x, callbacks=None, **kwargs): """ Parameters ---------- model : :class:`AbstractNetwork` the model to predict from key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` convert_batch_args_kwargs_to_npy_fn : type, optional a callable function to convert tensors in positional and keyword arguments to numpy; default: identity function prepare_batch_fn : type, optional function converting a batch-tensor to the framework specific tensor-type and pushing it to correct device, default: identity function callbacks : list initial callbacks to register **kwargs : additional keyword arguments """ if callbacks is None: callbacks = [] self._setup(model, key_mapping, convert_batch_to_npy_fn, prepare_batch_fn, callbacks, **kwargs) self._tqdm_desc = "Test" def _setup(self, network, key_mapping, convert_batch_args_kwargs_to_npy_fn, prepare_batch_fn, callbacks, **kwargs): """ Parameters ---------- network : :class:`AbstractNetwork` the network to predict from key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` convert_batch_to_npy_fn : type a callable function to convert tensors in positional and keyword arguments to numpy prepare_batch_fn : (dict, str, str) -> dict function converting a batch-tensor to the framework specific tensor-type and pushing it to correct device, default: identity function callbacks : list initial callbacks to register """ self.module = network self.key_mapping = key_mapping self._convert_to_npy_fn = convert_batch_args_kwargs_to_npy_fn self._prepare_batch = prepare_batch_fn self._callbacks = [] for cb in callbacks: self.register_callback(cb) def __call__(self, data: dict, **kwargs): """ Method to call the class. Returns the predictions corresponding to the given data obtained by the model Parameters ---------- data : dict batch dictionary Returns ------- dict predicted data """ return self.predict(data, **kwargs) def predict(self, data: dict, already_prepared=False, **kwargs): """ Predict single batch Returns the predictions corresponding to the given data obtained by the model Parameters ---------- data : dict batch dictionary already_prepared : bool if True, the `prepare_batch` function won't be called on the data anymore **kwargs : keyword arguments(directly passed to ``prepare_batch``) Returns ------- dict predicted data """ if not already_prepared: data = self._prepare_batch(data, **kwargs) mapped_data = { k: data[v] for k, v in self.key_mapping.items()} pred = self.module( **mapped_data ) # converts positional arguments and keyword arguments, # but returns only keyword arguments, since positional # arguments are not given. return self._convert_to_npy_fn( **pred )[1] def _at_iter_begin(self, iter_num, **kwargs): """ Function defining the behavior executed at beginning of each iteration Parameters ---------- iter_num : int the number of the current iteration **kwargs : additional keyword arguments (forwarded to callbacks call) Returns ------- dict combined dicts returned by the callbacks """ return_dict = {} for cb in self._callbacks: return_dict.update(cb.at_iter_begin(self, iter_num=iter_num, train=False, **kwargs)) return return_dict def _at_iter_end(self, iter_num, data_dict, metrics, **kwargs): """ Function defining the behavior executed at beginning of each iteration Parameters ---------- iter_num : int the number of the current iteration data_dict : dict dictionary holding input data and predictions metrics: dict calculated metrics **kwargs : additional keyword arguments (forwarded to callbacks call) Returns ------- dict combined dicts returned by the callbacks """ return_dict = {} for cb in self._callbacks: return_dict.update(cb.at_iter_end(self, iter_num=iter_num, data_dict=data_dict, metrics=metrics, train=False, **kwargs)) return return_dict def predict_data_mgr( self, datamgr: DataManager, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator without explicitly caching anything Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all predictions of the current batch dict a dictionary containing all metrics of the current batch """ if metrics is None: metrics = {} orig_num_aug_processes = datamgr.n_process_augmentation orig_batch_size = datamgr.batch_size if batchsize is None: batchsize = orig_batch_size datamgr.batch_size = 1 batchgen = datamgr.get_batchgen() n_batches = datamgr.n_batches if verbose: iterable = tqdm(enumerate(batchgen), unit=' sample', total=n_batches, desc=self._tqdm_desc) else: iterable = enumerate(batchgen) batch_list = [] for i, batch in iterable: Predictor._at_iter_begin(self, iter_num=i) if not batch_list and (n_batches - i) < batchsize: batchsize = n_batches - i logger.debug("Set Batchsize down to %d to avoid cutting " "of the last batches" % batchsize) batch_list.append(batch) # if queue is full process queue: if batchsize is None or len(batch_list) >= batchsize: batch_dict = {} for _batch in batch_list: for key, val in _batch.items(): if key in batch_dict.keys(): batch_dict[key].append(val) else: batch_dict[key] = [val] for key, val_list in batch_dict.items(): batch_dict[key] = np.concatenate(val_list) batch_dict = self._prepare_batch(batch_dict) preds = self.predict(batch_dict, already_prepared=True, **kwargs) # convert batchdict back to numpy (self.predict may convert it # to backend-specific tensor type) - no-op if already numpy batch_dict = self._convert_to_npy_fn(**batch_dict)[1] preds_batch = LookupConfig() # explicitly free memory of old lookup config gc.collect() preds_batch.update(batch_dict) preds_batch.update(preds) # calculate metrics for predicted batch _metric_vals = self.calc_metrics(preds_batch, metrics=metrics, metric_keys=metric_keys) self._at_iter_end(data_dict={**batch_dict, **preds_batch}, metrics={"val_" + k: v for k, v in _metric_vals.items()}, iter_num=i) yield preds, _metric_vals batch_list = [] datamgr.batch_size = orig_batch_size datamgr.n_process_augmentation = orig_num_aug_processes return def predict_data_mgr_cache_metrics_only(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator and caches the metrics Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all validation metrics (maybe empty) Notes ----- This function stores each prediction temporarily for metric calculation; This results in a (typically) way lower memory consumption than :meth:`Predictor.predict_data_mgr_cache_all`, but still caches the metrics. If this is not desired, it is recommended to use :meth:`Predictor.predict_data_mgr` and iterate over the generator as this only produces per-batch metrics and predictions and does not cache anything by default """ if metrics is None: metrics = {} yield from self.predict_data_mgr_cache(datamgr=datamgr, batchsize=batchsize, metrics=metrics, metric_keys=metric_keys, verbose=verbose, cache_preds=False, **kwargs) return def predict_data_mgr_cache_all(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator and caches all predictions and metrics (yields them in dicts) Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all predictions; dict a dictionary containing all validation metrics (maybe empty) Warnings -------- Since this function caches all predictions and metrics, this may result in huge memory consumption. If you are running out of memory, please have a look at :meth:`Predictor.predict_data_mgr_cache_metrics_only` or :meth:`Predictor.predict_data_mgr` """ if metrics is None: metrics = {} yield from self.predict_data_mgr_cache(datamgr=datamgr, batchsize=batchsize, metrics=metrics, metric_keys=metric_keys, verbose=verbose, cache_preds=True, **kwargs) return def predict_data_mgr_cache(self, datamgr, batchsize=None, metrics=None, metric_keys=None, verbose=False, cache_preds=False, **kwargs): """ Defines a routine to predict data obtained from a batchgenerator and caches all predictions and metrics (yields them in dicts) Parameters ---------- datamgr : :class:`DataManager` Manager producing a generator holding the batches batchsize : int Artificial batchsize (sampling will be done with batchsize 1 and sampled data will be stacked to match the artificial batchsize)(default: None) metrics : dict the metrics to calculate metric_keys : dict the ``batch_dict`` items to use for metric calculation verbose : bool whether to show a progress-bar or not, default: False cache_preds : bool whether to also cache predictions kwargs : keyword arguments passed to :func:`prepare_batch_fn` Yields ------ dict a dictionary containing all validation metrics (maybe empty) dict a dictionary containing all predictions; If ``cache_preds=True`` Warnings -------- Since this function caches all metrics and may additionally cache all predictions (based on the argument ``cache_preds``), this may result in huge memory consumption. If you are running out of memory, please have a look at :meth:`Predictor.predict_data_mgr_cache_metrics_only` or :meth:`Predictor.predict_data_mgr` or consider setting ``cache_preds`` to ``False`` (if not done already) """ if metrics is None: metrics = {} predictions_all, metric_vals = [], {k: [] for k in metrics.keys()} for preds, _metric_vals in self.predict_data_mgr( datamgr=datamgr, batchsize=batchsize, metrics=metrics, metric_keys=metric_keys, verbose=verbose, **kwargs): if cache_preds: predictions_all.append(preds) for k, v in _metric_vals.items(): metric_vals[k].append(v) if cache_preds: # convert predictions from list of dicts to dict of lists new_predictions_all = {} # recursively convert all nested dicts for preds in predictions_all: new_predictions_all = self.__convert_dict(preds, new_predictions_all) # concatenate lists to single arrays preds_all = self.__concatenate_dict_items(new_predictions_all) else: preds_all = {} for k, v in metric_vals.items(): metric_vals[k] = np.array(v) if cache_preds: yield preds_all, metric_vals else: yield metric_vals return @staticmethod def __convert_dict(old_dict, new_dict): """ Function to recursively convert dicts Parameters ---------- old_dict : dict the old nested dict new_dict : dict the new nested dict Returns ------- dict the updated new nested dict """ for k, v in old_dict.items(): # apply same function again on item if item is dict if isinstance(v, dict): if k not in new_dict: new_dict[k] = {} new_dict[k] = Predictor.__convert_dict(v, new_dict[k]) else: # check if v is scalar and convert to npy-array if # necessary. # Otherwise concatenation might fail if np.isscalar(v): v = np.array(v) # check for zero-sized arrays and reshape if necessary. # Otherwise concatenation might fail if v.shape == (): v = v.reshape(1) if k in new_dict: new_dict[k].append(v) else: new_dict[k] = [v] return new_dict @staticmethod def __concatenate_dict_items(dict_like: dict): """ Function to recursively concatenate dict-items Parameters ---------- dict_like : dict the (nested) dict, whoose items should be concatenated Returns ------- """ for k, v in dict_like.items(): if isinstance(v, dict): v = Predictor.__concatenate_dict_items(v) else: v = np.concatenate(v) dict_like[k] = v return dict_like def __setattr__(self, key, value): """ Set attributes and guard specific attributes after they have been set once Parameters ---------- key : str the attributes name value : Any the value to set Raises ------ PermissionError If attribute which should be set is guarded """ # check if key has been set once if key in self.__KEYS_TO_GUARD and hasattr(self, key): raise PermissionError("%s should not be overwritten after " "it has been set once" % key) else: super().__setattr__(key, value) @staticmethod def calc_metrics(batch: LookupConfig, metrics=None, metric_keys=None): """ Compute metrics Parameters ---------- batch: LookupConfig dictionary containing the whole batch (including predictions) metrics: dict dict with metrics metric_keys : dict dict of tuples which contains hashables for specifying the items to use for calculating the respective metric. If not specified for a metric, the keys "pred" and "label" are used per default Returns ------- dict dict with metric results """ if metrics is None: metrics = {} if metric_keys is None: metric_keys = {k: ("label", "pred") for k in metrics.keys()} return {key: metric_fn(*[batch.nested_get(k) for k in metric_keys[key]]) for key, metric_fn in metrics.items()} def register_callback(self, callback: AbstractCallback): """ Register Callback to Trainer Parameters ---------- callback : :class:`AbstractCallback` the callback to register Raises ------ AssertionError `callback` is not an instance of :class:`AbstractCallback` and has not both methods ['at_iter_begin', 'at_iter_end'] """ assertion_str = "Given callback is not valid; Must be instance of " \ "AbstractCallback or provide functions " \ "'at_iter_begin' and 'at_iter_end'" instance_check = isinstance(callback, AbstractCallback) attr_check_begin = hasattr(callback, "at_iter_begin") attr_check_end = hasattr(callback, "at_iter_end") attr_check_both = attr_check_begin and attr_check_end assert instance_check or attr_check_both, assertion_str self._callbacks.append(callback) ================================================ FILE: delira/training/utils.py ================================================ import collections import numpy as np def recursively_convert_elements(element, check_type, conversion_fn): """ Function to recursively convert all elements Parameters ---------- element : Any the element to convert check_type : Any if ``element`` is of type ``check_type``, the conversion function will be applied to it conversion_fn : Any the function to apply to ``element`` if it is of type ``check_type`` Returns ------- Any the converted element """ # convert element with conversion_fn if isinstance(element, check_type): return conversion_fn(element) # return string and arrays as is elif isinstance(element, (str, np.ndarray)): return element # recursively convert all keys and values of mapping and convert result # back to original mapping type # must be checked before iterable since most mappings are also a iterable elif isinstance(element, collections.Mapping): element = type(element)({ recursively_convert_elements(k, check_type, conversion_fn): recursively_convert_elements(v, check_type, conversion_fn) for k, v in element.items() }) return element # recursively convert all items of iterable and convert result back to # original iterable type elif isinstance(element, collections.Iterable): element = type(element)([recursively_convert_elements(x, check_type, conversion_fn) for x in element]) return element # none of the previous cases is suitable for the element -> return as is return element def _correct_zero_shape(arg): """ Corrects the shape of numpy array to be at least 1d and returns the argument as is otherwise Parameters ---------- arg : Any the argument which must be corrected in its shape if it's zero-dimensional Returns ------- Any argument (shape corrected if necessary) """ if arg.shape == (): arg = arg.reshape(1) return arg def convert_to_numpy_identity(*args, **kwargs): """ Corrects the shape of all zero-sized numpy arrays to be at least 1d Parameters ---------- *args : positional arguments of potential arrays to be corrected **kwargs : keyword arguments of potential arrays to be corrected Returns ------- """ args = recursively_convert_elements(args, np.ndarray, _correct_zero_shape) kwargs = recursively_convert_elements(kwargs, np.ndarray, _correct_zero_shape) return args, kwargs ================================================ FILE: delira/utils/__init__.py ================================================ from delira.utils.config import DeliraConfig, Config from delira.utils.path import subdirs from delira.utils.time import now ================================================ FILE: delira/utils/codecs.py ================================================ import importlib import types import collections import inspect import numpy as np import logging import typing from functools import partial import typing class Encoder: """ Encode arbitrary objects. The encoded object consists of dicts, lists, ints, floats and strings. """ def __call__(self, obj) -> typing.Any: """ Encode arbitrary objects as dicts, str, int, float, list Parameters ---------- obj : Any object to be encoded Returns ------- Any encoded object """ return self.encode(obj) def encode(self, obj) -> typing.Any: """ Encode arbitrary objects as dicts, str, int, float, list Parameters ---------- obj : Any object to be encoded Returns ------- Any encoded object """ # use type() to check for dict and list because type() does not # consider subtypes which is the desired behaviour in this case if isinstance(obj, (str, int, float)): # end recursion return obj elif obj is None: return obj elif type(obj) == dict: # end recursion return self._encode_dict(obj) elif type(obj) == list: # end recursion return self._encode_list(obj) elif isinstance(obj, np.ndarray): return self._encode_array(obj) elif isinstance(obj, collections.Mapping): return self._encode_mapping(obj) elif isinstance(obj, collections.Iterable): return self._encode_iterable(obj) elif isinstance(obj, types.ModuleType): return self._encode_module(obj) elif inspect.isclass(obj) or isinstance(obj, type): # use both ways to determine functions here # (the second uglier one serves as fallback here in case inspect # does not cover all cases) return self._encode_type(obj) elif isinstance(obj, (types.BuiltinFunctionType, types.FunctionType)): return self._encode_function(obj) else: return self._encode_class(obj) def _encode_list(self, obj) -> list: """ Encode list Parameters ---------- obj : list list to be encoded Returns ------- list list with encoded internal items """ return [self.encode(i) for i in obj] def _encode_dict(self, obj) -> dict: """ Encode dict Parameters ---------- obj : dict dict to be encoded Returns ------- dict dict with encoded internal items """ return {self.encode(_key): self.encode(_item) for _key, _item in obj.items()} def _encode_array(self, obj) -> dict: """ Encode array Parameters ---------- obj : :class:`np.ndarray` object to be encoded Returns ------- dict array encoded as a list inside a dict """ # # if numpy array: add explicit array specifier # use tolist instead of tostring here (even though this requires # additional encoding steps and increases memory usage), since tolist # retains the shape and tostring doesn't return {"__array__": self.encode(obj.tolist())} def _encode_mapping(self, obj) -> dict: """ Encode mapping Parameters ---------- obj : collections.Mapping object to be encoded Returns ------- dict mapping encoded as a dict with original data and type """ # encode via encoding the type and the mapping converted to dict # separately and add a conversion specifier convert_repr = { "type": self.encode(type(obj)), "repr": self.encode(dict(obj)), } return {"__convert__": convert_repr} def _encode_iterable(self, obj) -> dict: """ Encode iterable Parameters ---------- obj : collections.Iterable object to be encoded Returns ------- dict iterable encoded as a dict with original data and type """ # encode via converting the type and the mapping converted to list # separately and add conversion specifier convert_repr = { "type": self.encode(type(obj)), "repr": self.encode(list(obj)), } return {"__convert__": convert_repr} def _encode_module(self, obj) -> dict: """ Encode module Parameters ---------- obj : types.ModuleType module to be encoded Returns ------- dict module encoded as a dict """ # encode via name and module specifier return {"__module__": obj.__module__} def _encode_type(self, obj) -> dict: """ Encode class or type Parameters ---------- obj : class/type to be encoded Returns ------- dict class/type encoded as a dict """ type_repr = { "module": self.encode(obj.__module__), "name": self.encode(obj.__name__), } return {"__type__": type_repr} def _encode_function(self, obj) -> dict: """ Encode function Parameters ---------- obj : function to be encoded Returns ------- dict function encoded as a dict """ function_repr = { "module": self.encode(obj.__module__), "name": self.encode(obj.__name__), } return {"__function__": function_repr} def _encode_class(self, obj) -> dict: """ Encode arbitrary object Parameters ---------- obj : arbitrary object to be encoded Returns ------- dict arbitrary object encoded as a dict """ try: class_repr = { "type": self.encode(type(obj)), "dict": self.encode(obj.__dict__) } return {"__class__": class_repr} except Exception as e: logging.error(e) class Decoder: """ Deocode arbitrary objects which were encoded by :class:`Encoder`. """ def __init__(self): super().__init__() self._decode_mapping = { "__array__": self._decode_array, "__convert__": self._decode_convert, "__module__": self._decode_module, "__type__": self._decode_type, "__function__": self._decode_function, "__class__": self._decode_class, "__classargs__": self._decode_classargs, "__functionargs__": self._decode_functionargs } def __call__(self, obj) -> typing.Any: """ Decode object Parameters ---------- obj : Any object to be decoded Returns ------- Any decoded object """ return self.decode(obj) def decode(self, obj) -> typing.Any: """ Decode object Parameters ---------- obj : Any object to be decoded Returns ------- Any decoded object """ if isinstance(obj, (str, int, float)): return obj elif isinstance(obj, dict): return self._decode_dict(obj) elif isinstance(obj, list): return self._decode_list(obj) else: return obj def _decode_dict(self, obj) -> dict: """ Decode dict with respect to unique identifier keys. Parameters ---------- obj : dict dict to be decoded Returns ------- dict decoded dict """ for key in obj.keys(): if key in self._decode_mapping: return self._decode_mapping[key](obj[key]) else: obj[key] = self.decode(obj[key]) return obj def _decode_list(self, obj) -> list: """ Decode list Parameters ---------- obj : list list to be decoded Returns ------- Any decoded list """ return [self.decode(_i) for _i in obj] def _decode_array(self, obj) -> np.ndarray: """ Decode np.ndarray Parameters ---------- obj : :class:`np.ndarray` array to be decoded Returns ------- :class:`np.ndarray` decoded array """ return np.array(self.decode(obj)) def _decode_convert(self, obj: dict) -> typing.Union[ typing.Iterable, typing.Mapping]: """ Decode mappings and iterables Parameters ---------- obj : dict dict to be decoded Returns ------- typing.Union[typing.Iterable, typing.Mapping] decoded object """ # decode items in dict representation convert_repr = self.decode(obj) # create new object return convert_repr["type"](convert_repr["repr"]) def _decode_module(self, obj: dict) -> types.ModuleType: """ Decode module Parameters ---------- obj : dict dict to be decoded Returns ------- ModuleType decoded module """ return importlib.import_module(self.decode(obj)) def _decode_type(self, obj) -> typing.Any: """ Decode type Parameters ---------- obj : dict dict to be decoded Returns ------- Any decoded type """ # decode items in dict representation type_repr = self.decode(obj) return getattr(importlib.import_module(type_repr["module"]), type_repr["name"]) def _decode_function(self, obj: dict) -> typing.Union[ types.FunctionType, types.BuiltinFunctionType]: """ Decode function Parameters ---------- obj : dict dict to be decoded Returns ------- typing.Union[types.FunctionType, types.BuiltinFunctionType] decoded function """ # decode items in dict representation function_repr = self.decode(obj) return getattr(importlib.import_module(function_repr["module"]), function_repr["name"]) def _decode_class(self, obj: dict) -> typing.Any: """ Decode arbitrary object Parameters ---------- obj : dict dict to be decoded Returns ------- Any decoded object """ class_repr = self.decode(obj) cls_type = class_repr["type"] cls_dict = class_repr["dict"] # need to create a temporary type here (which is basically a raw # object, since using object directly raises # "TypeError: __class__ assignment only supported for heap types # or ModuleType subclasses" # After a bit of research this kind of class re-creation only # seems to be possible, if the intermediate class was created in # python (which is not True for the object type since this is part # of Python's C Core) tmp_cls = type("__tmp", (), {}) # create instance of temporary class tmp_instance = tmp_cls() # change class type tmp_instance.__class__ = self.decode(cls_type) # update attributes of class tmp_instance.__dict__.update(self.decode(cls_dict)) return tmp_instance def _decode_classargs(self, obj: dict) -> typing.Any: """ Create an object from specified class and arguments Parameters ---------- obj : dict dictionary which representes the object. Must include `module` and `name`. Can optionally include `args` and `kwargs`. Returns ------- Any decoded object Raises ------ TypeError arguments and name must be encoded as a dict """ classargs = self.decode(obj) if not isinstance(classargs, dict): raise TypeError("Arguments for classargs must be defined as dict.") obj_cls = getattr(importlib.import_module(classargs["module"]), classargs["name"]) args = classargs.get("args", []) kwargs = classargs.get("kwargs", {}) return obj_cls(*args, **kwargs) def _decode_functionargs(self, obj: dict) -> typing.Any: """ Create an function from specified function and arguments Parameters ---------- obj : dict dictionary which representes the function. Must include `module` and `name`. Can optionally include `args` and `kwargs` which are passed via `functool.partial`. Returns ------- Any decoded function Raises ------ TypeError arguments and name must be encoded as a dict """ functionargs = self.decode(obj) if not isinstance(functionargs, dict): raise TypeError("Arguments for classargs must be defined as dict.") fn = getattr(importlib.import_module(functionargs["module"]), functionargs["name"]) args = functionargs.get("args", []) kwargs = functionargs.get("kwargs", {}) return partial(fn, args, kwargs) ================================================ FILE: delira/utils/config.py ================================================ import copy from delira._version import get_versions from delira.utils.time import now from nested_lookup import nested_lookup import warnings from .codecs import Encoder, Decoder import yaml import argparse import sys import collections import inspect def non_string_warning(func): def warning_wrapper(config, key, *args, **kwargs): """ Emit warning if non string keys are used Parameters ---------- config: :class:`Config` decorated function receive :param:`self` as first argument key : immutable type key which is checked Returns ------- callable original function with arguments """ if not isinstance(key, str): warnings.warn("The key {} is not a string, but a {}. " "This may lead to unwanted behavior!".format( key, type(key)), RuntimeWarning) return func(config, key, *args, **kwargs) return warning_wrapper class Config(dict): """ Baseclass to create a config which hold arbitrary data """ def __init__(self, dict_like=None, **kwargs): """ Parameters ---------- dict_like : dict, optional dict like object to initialize config, by default None kwargs: additional arguments added to the config Warnings -------- It is recommended to only use strings as keys inside the config. Because of the shortened access to nested keys the types of the keys are lost. Examples -------- Create simple configuration with nested keys >>> from delira.utils import Config >>> cf = Config() >>> # automatically generates new nested dictionaries >>> cf['first_level.second_level.third_level'] = 1 >>> # form access >>> print(cf['first_level.second_level.third_level']) >>> # traditional access >>> print(cf['first_level']['second_level']['third_level']) >>> # entries can also be accessed with dot operator >>> print(cf.first_level.second_level.thirs_level) """ super().__init__() self.__dict__ = self if dict_like is not None: self.update(dict_like) self.update(kwargs) @non_string_warning def __setattr__(self, key, value): """ Set attribute in config Parameters ---------- key : str attribute name value : any attribute value """ super().__setattr__(key, self._to_config(value)) @non_string_warning def __setitem__(self, key, value): """ Set items inside dict. Supports setting of nested entries by seperating the individual keys with a '.'. Parameters ---------- key : str key for new value value : any new value """ if not isinstance(key, str) or '.' not in key: super().__setitem__(key, value) else: current_level = self keys = key.split(".") final_key = keys.pop(-1) final_dict = self._traverse_keys(keys, create=True) final_dict._set_internal_item(final_key, value) def _traverse_keys(self, keys, create=False): """ Internal helper to traverse through nested dicts (iterative implementation to avoid problems with python stack) Parameters ---------- keys : iterable of list iterable with keys which should be traversed create : bool, optional creates new empty configs for non existant keys, by default False Returns ------- Any value defined by the traversed keys """ current_level = self for k in keys: if k not in current_level: if create: current_level[k] = self._create_internal_dict() else: raise KeyError( "{} was not found in internal dict.".format(k)) # traverse to needed dict current_level = current_level[k] return current_level def _set_internal_item(self, key, item, deepcopy=False): """ Set internal item Parameters ---------- key : str key where new item should be assigned item : Any item which should be assigned deepcopy : bool, optional if enabled the item is copied to the config, by default False """ config_item = self._to_config(item) if deepcopy: self[key] = copy.deepcopy(config_item) else: self[key] = config_item @classmethod def _to_config(cls, item): """ Convert items to config if they are a dict like object but not already a config Parameters ---------- item : Any item which is converted Returns ------- Any return a config is item is dict like, otherwise the item is returned """ if isinstance(item, dict) and not isinstance(item, cls): # convert dict to config for additional functionality return cls._create_internal_dict(item) else: return item @staticmethod def _create_internal_dict(*args, **kwargs): """ Defines how internal dicts should be created. Can be used to easily overwrite subclasses Returns ------- :class:`Config` new config """ return Config(*args, **kwargs) @non_string_warning def __getitem__(self, key): """ Get single item Parameters ---------- key : str key to desired item Returns ------- Any value inside dict """ if not isinstance(key, str) or '.' not in key: try: return super().__getitem__(int(key)) except (KeyError, ValueError): return super().__getitem__(key) else: return self._traverse_keys(key.split("."), create=False) @non_string_warning def __contains__(self, key): """ Check if key is in config (also works for nested dicts with short form) Parameters ---------- key : str key for desired value Returns ------- bool true if key is in config """ contain = True try: self[key] except KeyError: contain = False return contain def update(self, update_dict, deepcopy=False, overwrite=False): """ Update internal dicts with dict like object Parameters ---------- update_dict : dictlike values which should be added to config deepcopy : bool, optional copies values from :param:`update_dict`, by default False overwrite : bool, optional overwrite existing values inside config, by default False Raises ------ ValueError if overwrite is not enabled and `update_dict` contains same values as config """ for key, item in update_dict.items(): # update items individually self._update(key, item, deepcopy=deepcopy, overwrite=overwrite) def _update(self, key, item, deepcopy=False, overwrite=False): """ Helper function for update Parameters ---------- key : str key where new item should be assigned item : Any item which should be assigned deepcopy : bool, optional copies :param:`item`, by default False overwrite : bool, optional overwrite existing values inside config, by default False """ if isinstance(item, dict): # update nested dicts if key not in self: self[key] = self._create_internal_dict({}) self[key].update(item, deepcopy=deepcopy, overwrite=overwrite) else: # check for overwrite self._raise_overwrite(key, overwrite=overwrite) # set item self._set_internal_item(key, item, deepcopy=deepcopy) def _raise_overwrite(self, key, overwrite): """ Checks if a ValueError should be raised Parameters ---------- key : str key which needs to be checked overwrite : bool if overwrite is enabled no ValueError is raised even if the key already exists Raises ------ ValueError raised if overwrite is not enabled and key already exists """ if key in self and not overwrite: raise ValueError("{} already in config. Can " "not overwrite value.".format(key)) def dump(self, path, formatter=yaml.dump, encoder_cls=Encoder, **kwargs): """ Save config to a file and add time stamp to config Parameters ---------- path : str path where config is saved formatter : callable, optional defines the format how the config is saved, by default yaml.dump encoder_cls : :class:`Encoder`, optional transforms config to a format which can be formatted by the :param:`formatter`, by default Encoder kwargs: additional keyword arguments passed to :param:`formatter` """ self._timestamp = now() encoded_self = encoder_cls().encode(self) with open(path, "w") as f: formatter(encoded_self, f, **kwargs) def dumps(self, formatter=yaml.dump, encoder_cls=Encoder, **kwargs): """ Create a loadable string representation from the config and add time stamp to config Parameters ---------- formatter : callable, optional defines the format how the config is saved, by default yaml.dump encoder_cls : :class:`Encoder`, optional transforms config to a format which can be formatted by the :param:`formatter`, by default Encoder kwargs: additional keyword arguments passed to :param:`formatter` """ self._timestamp = now() encoded_self = encoder_cls().encode(self) return formatter(encoded_self, **kwargs) def load(self, path, formatter=yaml.load, decoder_cls=Decoder, **kwargs): """ Update config from a file Parameters ---------- path : str path to file formatter : callable, optional defines the format how the config is saved, by default yaml.dump decoder_cls : :class:`Encoder`, optional transforms config to a format which can be formatted by the :param:`formatter`, by default Encoder kwargs: additional keyword arguments passed to :param:`formatter` """ with open(path, "r") as f: decoded_format = formatter(f, **kwargs) decoded_format = decoder_cls().decode(decoded_format) self.update(decoded_format, overwrite=True) def loads(self, data, formatter=yaml.load, decoder_cls=Decoder, **kwargs): """ Update config from a string Parameters ---------- data: str string representation of config formatter : callable, optional defines the format how the config is saved, by default yaml.dump decoder_cls : :class:`Encoder`, optional transforms config to a format which can be formatted by the :param:`formatter`, by default Encoder kwargs: additional keyword arguments passed to :param:`formatter` """ decoded_format = formatter(data, **kwargs) decoded_format = decoder_cls().decode(decoded_format) self.update(decoded_format, overwrite=True) @classmethod def create_from_dict(cls, value, deepcopy=False): """ Create config from dict like object Parameters ---------- value : dict like dict like object used to create new config deepcopy : bool, optional if enabled, copies values from origin, by default False Returns ------- :class:`Config` new config Raises ------ TypeError raised if :param:`value` is not a dict (or a subclass of dict) """ if not isinstance(value, dict): raise TypeError("Value must be an instance of dict but type {} " "was found.".format(type(value))) config = cls() config.update(value, deepcopy=deepcopy) return config @classmethod def create_from_argparse(cls, value, deepcopy=False, **kwargs): """ Create config from argument parser Parameters ---------- value : argument parser or namespace if value is an argument parser, the arguments are first parsed and than a new config with the values is created if value is a Namespace the new config is created immediatly deepcopy : bool, optional if enabled, copies values from origin, by default False Returns ------- :class:`Config` new config Raises ------ TypeError if value is not an instance of :class:`ArgumentParser` or :class:`Namespace` """ if isinstance(value, argparse.ArgumentParser): args_parsed = value.parse_args(**kwargs) return cls.create_from_argparse(args_parsed, deepcopy=deepcopy) elif isinstance(value, argparse.Namespace): return cls.create_from_dict(vars(value), deepcopy=deepcopy) else: raise TypeError("Type of args not supported.") @classmethod def create_from_file(cls, path, formatter=yaml.load, decoder_cls=Decoder, **kwargs): """ Create config from a file Parameters ---------- path : str path to file formatter : callable, optional defines the format how the config is saved, by default yaml.dump decoder_cls : :class:`Encoder`, optional trasforms config to a format which can be formatted by the :param:`formatter`, by default Encoder kwargs: additional keyword arguments passed to :param:`formatter` Returns ------- :class:`Config` new config """ config = cls() config.load(path, formatter=formatter, decoder_cls=decoder_cls, **kwargs) return config @classmethod def create_from_str(cls, data, formatter=yaml.load, decoder_cls=Decoder, **kwargs): """ Create config from a string Parameters ---------- data: str string representation of config formatter : callable, optional defines the format how the config is saved, by default yaml.dump decoder_cls : :class:`Encoder`, optional trasforms config to a format which can be formatted by the :param:`formatter`, by default Encoder kwargs: additional keyword arguments passed to :param:`formatter` Returns ------- :class:`Config` new config """ config = cls() config.loads(data, formatter=formatter, decoder_cls=decoder_cls, **kwargs) return config def create_argparser(self): ''' Creates an argparser for all values in the config Following the pattern: `--training.learning_rate 1234` Returns ------- argparse.ArgumentParser parser for all variables in the config ''' parser = argparse.ArgumentParser(allow_abbrev=False) def add_val(dict_like, prefix=''): for key, val in dict_like.items(): name = "--{}".format(prefix + key) if val is None: parser.add_argument(name) else: if isinstance(val, int): parser.add_argument(name, type=type(val)) elif isinstance(val, collections.Mapping): add_val(val, prefix=key + '.') elif isinstance(val, collections.Iterable): if len(val) > 0 and type(val[0]) != type: parser.add_argument(name, type=type(val[0])) else: parser.add_argument(name) elif issubclass(val, type) or inspect.isclass(val): parser.add_argument(name, type=val) else: parser.add_argument(name, type=type(val)) add_val(self) return parser @staticmethod def _add_unknown_args(unknown_args): ''' Can add unknown args as parsed by argparsers method `parse_unknown_args`. Parameters ------ unknown_args : list list of unknown args Returns ------ Config a config of the parsed args ''' # first element in the list must be a key if not isinstance(unknown_args[0], str): unknown_args = [str(arg) for arg in unknown_args] if not unknown_args[0].startswith('--'): raise ValueError args = Config() # take first key key = unknown_args[0][2:] idx, done, val = 1, False, [] while not done: try: item = unknown_args[idx] except IndexError: done = True if item.startswith('--') or done: # save key with its value if len(val) == 0: # key is used as flag args[key] = True elif len(val) == 1: args[key] = val[0] else: args[key] = val # new key and flush data key = item[2:] val = [] else: val.append(item) idx += 1 return args def update_from_argparse(self, parser=None, add_unknown_items=False): ''' Updates the config with all values from the command line. Following the pattern: `--training.learning_rate 1234` Raises ------ TypeError raised if another datatype than currently in the config is parsed Returns ------- dict dictionary containing only updated arguments ''' if len(sys.argv) > 1: if not parser: parser = self.create_argparser() params, unknown = parser.parse_known_args() params = vars(params) if unknown and not add_unknown_items: warnings.warn( "Called with unknown arguments: {} " "They will not be stored if you do not set " "`add_unknown_items` to true.".format(unknown), RuntimeWarning) new_params = Config() for key, val in params.items(): if val is None: continue new_params[key] = val # update dict self.update(new_params, overwrite=True) if add_unknown_items: additional_params = self._add_unknown_args(unknown) self.update(additional_params) new_params.update(additional_params) return new_params class LookupConfig(Config): """ Helper class to have nested lookups in all subdicts of Config """ @staticmethod def _create_internal_dict(*args, **kwargs): """ Defines how internal dicts should be created. Can be used to easily overwrite subclasses Returns ------- :class:`LookupConfig` new config """ return LookupConfig(*args, **kwargs) @non_string_warning def __contains__(self, key): """ Check if key is in config (also works for nested dicts with short form) Parameters ---------- key : str key for desired value Returns ------- bool true if key is in config """ contain = True try: self.nested_get(key, allow_multiple=True) except KeyError: contain = False return contain def nested_get(self, key, *args, allow_multiple=False, **kwargs): """ Returns all occurances of :param:`key` in :param:`self` and subdicts Parameters ---------- key : str the key to search for *args : positional arguments to provide default value allow_multiple: bool allow multiple results **kwargs : keyword arguments to provide default value Raises ------ KeyError Multiple Values are found for key and :param:`allow_multiple` is False (unclear which value should be returned) OR No Value was found for key and no default value was given Returns ------- Any value corresponding to key (or default if value was not found) """ if "." in key: return self[key] results = nested_lookup(key, self) if len(results) > 1: if allow_multiple: return results else: raise KeyError("Multiple Values found for key %s" % key) elif len(results) == 0: if "default" in kwargs: return kwargs["default"] elif args: return args[0] else: raise KeyError("No Value found for key %s" % key) else: return results[0] class DeliraConfig(LookupConfig): """ Configure experiment for delira. Contains variables for model and training which can be either fixed or variables (for hyperparameter search) """ def __init__(self, dict_like=None, fixed_model=None, fixed_training=None, variable_model=None, variable_training=None, **kwargs): """ Parameters ---------- dict_like : dict, optional dict like object containing values for config, by default None. fixed_model : dict, optional fixed parameters for model, by default None. fixed_training : dict, optional fixed parameters for training, by default None. variable_model : dict, optional variable parameters for model, by default None. variable_training : dict, optional variable parameters for training, by default None. kwargs: additional arguments added to the config """ super().__init__(dict_like=dict_like, **kwargs) self._update("fixed_model", self.generate_dict(fixed_model), overwrite=True) self._update("fixed_training", self.generate_dict(fixed_training), overwrite=True) self._update("variable_model", self.generate_dict(variable_model), overwrite=True) self._update( "variable_training", self.generate_dict(variable_training), overwrite=True) self._version = get_versions()["version"] @staticmethod def generate_dict(value): """ If value is none an emty dict will be created Parameters ---------- value : Any checked value Returns ------- Any dict if value is none otherwise value is returned """ if value is None: return {} else: return dict(value) @property def params(self): """ Returns a :class:`LookupConfig` with all model and training parameters Returns ------- :class:`LookupConfig` config with model and training parameters """ return LookupConfig(fixed_model=self.fixed_model, fixed_training=self.fixed_training, variable_model=self.variable_model, variable_training=self.variable_training) @property def variable_params(self): """ Returns a :class:`LookupConfig` with all variable parameters Returns ------- :class:`LookupConfig` config with variable parameters """ return LookupConfig(model=self.variable_model, training=self.variable_training) @variable_params.setter def variable_params(self, new_params: dict): """ Update variable parameters from dict like object Raises ------ TypeError raised if :param:`new_params` is not a dict (or a subclass of dict) """ if not isinstance(new_params, dict): raise TypeError("new_params must be an instance of dict but " "type {} was found.".format(type(new_params))) # create empty dict if "model" not in new_params: new_params["model"] = {} # create empty dict if "training" not in new_params: new_params["training"] = {} self.variable_model = new_params["model"] self.variable_training = new_params["training"] @property def fixed_params(self): """ Returns a :class:`LookupConfig` with all fixed parameters Returns ------- :class:`LookupConfig` config with fixed parameters """ return LookupConfig(model=self.fixed_model, training=self.fixed_training) @fixed_params.setter def fixed_params(self, new_params: dict): """ Update fixed parameters from dict like object Raises ------ TypeError raised if :param:`new_params` is not a dict (or a subclass of dict) """ if not isinstance(new_params, dict): raise TypeError("new_params must be an instance of dict but " "type {} was found.".format(type(new_params))) # create empty dict if "model" not in new_params: new_params["model"] = {} # create empty dict if "training" not in new_params: new_params["training"] = {} self.fixed_model = new_params["model"] self.fixed_training = new_params["training"] @property def model_params(self): """ Returns a :class:`LookupConfig` with all model parameters Returns ------- :class:`LookupConfig` config with model parameters """ return LookupConfig(variable=self.variable_model, fixed=self.fixed_model) @model_params.setter def model_params(self, new_params: dict): """ Update model parameters from dict like object Raises ------ TypeError raised if :param:`new_params` is not a dict (or a subclass of dict) """ if not isinstance(new_params, dict): raise TypeError("new_params must be an instance of dict but " "type {} was found.".format(type(new_params))) # create empty dict if "fixed" not in new_params: new_params["fixed"] = {} # create empty dict if "variable" not in new_params: new_params["variable"] = {} self.fixed_model = new_params["fixed"] self.variable_model = new_params["variable"] @property def training_params(self): """ Returns a :class:`LookupConfig` with all training parameters Returns ------- :class:`LookupConfig` config with training parameters """ return LookupConfig(variable=self.variable_training, fixed=self.fixed_training) @training_params.setter def training_params(self, new_params: dict): """ Update training parameters from dict like object Raises ------ TypeError raised if :param:`new_params` is not a dict (or a subclass of dict) """ if not isinstance(new_params, dict): raise TypeError("new_params must be an instance of dict but " "type {} was found.".format(type(new_params))) # create empty dict if "fixed" not in new_params: new_params["fixed"] = {} # create empty dict if "variable" not in new_params: new_params["variable"] = {} self.fixed_training = new_params["fixed"] self.variable_training = new_params["variable"] def log_as_string(self, full_config=False, **kwargs): """ Log current config as a string Parameters ---------- full_config : bool, optional if enabled the complete Config is logged, by default False. Otherwise only model and training parameters will be logged. kwargs: keyword arguments passed to `self.dumps` method to create string representation Returns ------- str string representation used for logging """ from delira.logging import log if full_config: str_repr = self.dumps(**kwargs) else: str_repr = self.params.dumps(**kwargs) log({'text': {"text_string": str_repr, "tag": "DeliraConfig"}}) return str_repr ================================================ FILE: delira/utils/context_managers.py ================================================ from delira import get_current_debug_mode, set_debug_mode class DebugMode(object): """ Context Manager to set a specific debug mode for the code inside the defined context (and reverting to previous mode afterwards) """ def __init__(self, mode): """ Parameters ---------- mode : bool the debug mode; if ``True`` disables all multiprocessing """ self._mode = mode def _switch_to_new_mode(self): """ helper function to switch to the new debug mode (and saving the previous one in ``self._mode``) """ prev_mode = get_current_debug_mode() set_debug_mode(self._mode) self._mode = prev_mode def __enter__(self): """ Sets the specified debug mode on entering the context """ self._switch_to_new_mode() def __exit__(self, *args, **kwargs): """ Resets the previous debug mode on exiting the context Parameters ---------- *args : arbitrary positional arguments (ignored here, just needed for compatibility with other context managers) **kwargs : arbitrary keyword arguments (ignored here, just needed for compatibility with other context managers) """ self._switch_to_new_mode() class DebugEnabled(DebugMode): """ Context Manager to enable the debug mode for the wrapped context """ def __init__(self): super().__init__(True) class DebugDisabled(DebugMode): """ Context Manager to disable the debug mode for the wrapped context """ def __init__(self): super().__init__(False) ================================================ FILE: delira/utils/decorators.py ================================================ import warnings from functools import wraps import numpy as np from delira import get_backends def dtype_func(class_object): """ Decorator to Check whether the first argument of the decorated function is of a certain type Parameters ---------- class_object : Any type the first function argument should have Returns ------- Wrapped Function Raises ------ AssertionError First argument of decorated function is not of given type """ def instance_checker(func): @wraps(func) def func_wrapper(checked_object, *args, **kwargs): assertion_str = "Argument 1 is not of type %s but of type %s" % \ (class_object.__name__, checked_object.__class__.__name__) assert isinstance(checked_object, class_object), assertion_str return func(checked_object, *args, **kwargs) return func_wrapper return instance_checker def classtype_func(class_object): """ Decorator to Check whether the first argument of the decorated function is a subclass of a certain type Parameters ---------- class_object : Any type the first function argument should be subclassed from Returns ------- Wrapped Function Raises ------ AssertionError First argument of decorated function is not a subclass of given type """ def subclass_checker(func): @wraps(func) def func_wrapper(checked_object, *args, **kwargs): assertion_str = "Argument 1 is not subclass of %s but of type %s" \ % (class_object.__name__, checked_object.__name__) assert issubclass(checked_object, class_object), assertion_str return func(checked_object, *args, **kwargs) return func_wrapper return subclass_checker def make_deprecated(new_func): """ Decorator which raises a DeprecationWarning for the decorated object Parameters ---------- new_func : Any new function which should be used instead of the decorated one Returns ------- Wrapped Function Raises ------ Deprecation Warning """ def deprecation(func): @wraps(func) def func_wrapper(*args, **kwargs): if not isinstance(new_func, str): new_func_name = new_func.__name__ else: new_func_name = new_func if func.__name__ == '__init__': old_func_name = func.__class__.__name__ else: old_func_name = func.__name__ warnings.warn(DeprecationWarning("%s is deprecated in favor of %s" " and will be removed at next " "release" % (old_func_name, new_func_name))) return func(*args, **kwargs) return func_wrapper return deprecation numpy_array_func = dtype_func(np.ndarray) if "TORCH" in get_backends(): import torch torch_tensor_func = dtype_func(torch.Tensor) torch_module_func = dtype_func(torch.nn.Module) ================================================ FILE: delira/utils/dict_reductions.py ================================================ from collections import MutableMapping from typing import Union, Dict, Callable import numpy as np # Reduction Functions def reduce_last(items: list) -> Union[float, int, np.ndarray]: """ Reduction Function returning the last element Parameters ---------- items : list the items to reduce Returns ------- float, int or :class:`numpy.ndarray` reduced items """ return items[-1] def reduce_first(items: list) -> Union[float, int, np.ndarray]: """ Reduction Function returning the first element Parameters ---------- items : list the items to reduce Returns ------- float, int or :class:`numpy.ndarray` reduced items """ return items[0] def reduce_mean(items: list) -> Union[float, int, np.ndarray]: """ Reduction Function returning the mean element Parameters ---------- items : list the items to reduce Returns ------- float, int or :class:`numpy.ndarray` reduced items """ return np.mean(items) def reduce_median(items: list) -> Union[float, int, np.ndarray]: """ Reduction Function returning the median element Parameters ---------- items : list the items to reduce Returns ------- float, int or :class:`numpy.ndarray` reduced items """ return np.median(items) def reduce_max(items: list) -> Union[float, int, np.ndarray]: """ Reduction Function returning the max element Parameters ---------- items : list the items to reduce Returns ------- float, int or :class:`numpy.ndarray` reduced items """ return np.max(items) def reduce_min(items: list) -> Union[float, int, np.ndarray]: """ Reduction Function returning the min element Parameters ---------- items : list the items to reduce Returns ------- float, int or :class:`numpy.ndarray` reduced items """ return np.min(items) def flatten_dict(d: dict, parent_key: str = '', sep: str = '.') -> dict: """ Flattens a dictionary by concatenating all keys for subdicts with the current key separated by :param`sep` Parameters ---------- d : dict the dictionary to flatten parent_key : str the key of the parent dict (ususally empty when called by user) sep : str the separator to separate the key from the subdict's key Returns ------- dict the flattened dictionary """ items = [] for k, v in d.items(): new_key = parent_key + sep + k if parent_key else k if isinstance(v, MutableMapping): items.extend(flatten_dict(v, new_key, sep=sep).items()) else: items.append((new_key, v)) return type(d)(items) def unflatten_dict(dictionary: dict, sep: str = ".") -> dict: """ Unflattens a dict, where keys and the keys from their subdirs are separated by :param:`sep` Parameters ---------- dictionary : dict the dictionary to unflatten sep : str the separation string Returns ------- """ return_dict = {} for key, value in dictionary.items(): parts = key.split(sep) d = return_dict for part in parts[:-1]: if part not in d: d[part] = dict() d = d[part] d[parts[-1]] = value return return_dict def reduce_dict(items: list, reduce_fn) -> dict: """ A function to reduce all entries inside a dict Parameters ---------- items : list a list of dicts to reduce reduce_fn : FunctionType a function to apply to all non-equal iterables Returns ------- dict the reduced dict """ result_dict = {} # assuming the type of all items is same for all queued logging dicts and # all dicts have the same keys flattened_dicts = [flatten_dict(_tmp, sep=".") for _tmp in items] # from list of dicts to dict of lists: for d in flattened_dicts: for k, v in d.items(): try: result_dict[k].append(v) except KeyError: result_dict[k] = [v] for k, v in result_dict.items(): # check if all items are equal equals = [_v == v[0] for _v in v[1:]] for idx, equality in enumerate(equals): if isinstance(equality, np.ndarray): equals[idx] = equality.all() if all(equals): # use first item since they are equal result_dict[k] = v[0] else: # apply reduce function result_dict[k] = reduce_fn(v) # unflatten reduced dict return unflatten_dict(result_dict, sep=".") # string mapping for reduction functions _REDUCTION_FUNCTIONS = { "last": reduce_last, "first": reduce_first, "mean": reduce_mean, "median": reduce_median, "max": reduce_max, "min": reduce_min } def possible_reductions() -> tuple: """ Function returning a tuple containing all valid reduction strings Returns ------- tuple a tuple containing all valid reduction strings """ return tuple(_REDUCTION_FUNCTIONS.keys()) def get_reduction(reduce_type: str) -> Callable: """ A getter function to get a specified reduction function by it's specifier string Parameters ---------- reduce_type : str the reduction type Returns ------- Callable the actual reduction function """ return _REDUCTION_FUNCTIONS[reduce_type] ================================================ FILE: delira/utils/messenger.py ================================================ import logging import warnings from abc import ABC, abstractmethod from delira.training import BaseExperiment from delira.training.callbacks import AbstractCallback class BaseMessenger(ABC): """ Wrap arbitrary experiments and connect its functions to a notification service. """ def __init__(self, experiment: BaseExperiment, notify_epochs: int = None): """ Parameters ---------- experiment : :class:`BaseExperiment` instance of current experiment notify_epochs : int Activates notifications about finished epochs with frequency `notify_epochs`. """ super().__init__() self._experiment = experiment self._notify_epochs = notify_epochs @abstractmethod def emit_message(self, msg: str) -> dict: """ Emit message. Implement this method in base class to create new notification services. Parameters ---------- msg : str message which should be emitted Returns ------- dict dict with additional information from message """ raise NotImplementedError def __getattr__(self, attr): """ If wrapper does not implement attribute, return attribute of wrapped object Parameters ---------- attr : str name of attribute Returns ------- Any attribute """ # NOTE do note use hasattr, it goes into infinite recursion if attr in self.__dict__: # this object has it return getattr(self, attr) return getattr(self._experiment, attr) def run(self, *args, **kwargs): """ Wrapper for run function. Notifies experiment start, fail, complete. Parameters ---------- args : positional arguments passed to experiment. kwargs : additional keyword arguments passed to experiment. Returns ------- Any result of experiment """ if self._notify_epochs is not None: callbacks = list(kwargs.pop("callbacks", [])) callbacks.append(MessengerEpochCallback(self._notify_epochs, self)) kwargs["callbacks"] = callbacks msg = str(self._experiment.name) + " : Training started." self.emit_message(msg) try: out = self._experiment.run(*args, **kwargs) except Exception as e: msg = \ str(self._experiment.name) + " : Training failed. \n" + str(e) self.emit_message(msg) raise msg = str(self._experiment.name) + " : Training completed." self.emit_message(msg) return out def resume(self, *args, **kwargs): """ Wrapper for resume function. Notifies experiment start, fail, complete. Parameters ---------- args : positional arguments passed to experiment. kwargs : additional keyword arguments passed to experiment. Returns ------- Any result of experiment """ if self._notify_epochs is not None: callbacks = kwargs.pop("callbacks", []) callbacks.append(MessengerEpochCallback(self._notify_epochs, self)) kwargs["callbacks"] = callbacks msg = str(self._experiment.name) + " : Resume started." self.emit_message(msg) try: out = self._experiment.resume(*args, **kwargs) except Exception as e: msg = str(self._experiment.name) + " : Resume failed. \n" + str(e) self.emit_message(msg) raise e msg = str(self._experiment.name) + " : Resume ended." self.emit_message(msg) return out def test(self, *args, **kwargs): """ Wrapper for test function. Notifies experiment start, fail, complete. Parameters ---------- args : positional arguments passed to experiment. kwargs : additional keyword arguments passed to experiment. Returns ------- Any result of experiment """ msg = str(self._experiment.name) + " : Test started." self.emit_message(msg) try: out = self._experiment.test(*args, **kwargs) except Exception as e: msg = str(self._experiment.name) + " : Test failed. \n" + str(e) self.emit_message(msg) raise e msg = str(self._experiment.name) + " : Test completed." self.emit_message(msg) return out def kfold(self, *args, **kwargs): """ Wrapper for kfold function. Notifies experiment start, fail, complete, end of fold. Parameters ---------- args : positional arguments passed to experiment. kwargs : additional keyword arguments passed to experiment. Returns ------- Any result of experiment """ # append own callback for fold messages callbacks = kwargs.pop("callbacks", []) callbacks.append(MessengerFoldCallback(self)) # append own callback for epoch messages if self._notify_epochs is not None: callbacks.append(MessengerEpochCallback(self._notify_epochs, self)) kwargs["callbacks"] = callbacks msg = str(self._experiment.name) + " : Kfold started." self.emit_message(msg) # execute k-fold try: out = self._experiment.kfold(*args, **kwargs) except Exception as e: msg = str(self._experiment.name) + " : Kfold failed. \n" + str(e) self.emit_message(msg) raise e msg = str(self._experiment.name) + " : Kfold completed." self.emit_message(msg) return out class MessengerEpochCallback(AbstractCallback): """ Callback for "Epoch X trained" message See Also -------- :class:`BaseMessenger` """ def __init__(self, n_epochs: int, messenger: BaseMessenger): """ Parameters ---------- n_epochs : int notification frequency messenger : :class:`BaseMessenger` instance of a experiment with messanger to emit message """ super().__init__() self._n_epochs = n_epochs self._messenger = messenger def at_epoch_end(self, trainer, **kwargs) -> dict: """ Call at end of epoch Parameters ---------- trainer : :class:`BaseTrainer` instance of trainer kwargs : additional keyword arguments. Must contain ``curr_epoch``. Returns ------- dict empty dict """ curr_epoch = kwargs.pop("curr_epoch") trained_epochs = curr_epoch - trainer.start_epoch if trained_epochs % self._n_epochs == 0: msg = "Epoch " + str(curr_epoch) + " trained." self._messenger.emit_message(msg) return {} class MessengerFoldCallback(AbstractCallback): """ Callback for "Fold X completed" in slack See Also -------- :class:`BaseMessenger` """ def __init__(self, messenger: BaseMessenger): """ Parameters ---------- messenger : :class:`BaseMessenger` instance of a experiment with messanger to emit message """ super().__init__() self._messenger = messenger def at_training_begin(self, trainer, **kwargs) -> dict: """ End of training callback Parameters ---------- trainer : :class:`BaseTrainer` instance of trainer kwargs : additional keyword arguments (not used) Returns ------- dict empty dict """ msg = "Fold " + str(trainer.fold) + " started." self._messenger.emit_message(msg) return {} def at_training_end(self, trainer, **kwargs) -> dict: """ End of training callback Parameters ---------- trainer : :class:`BaseTrainer` instance of trainer kwargs : additional keyword arguments (not used) Returns ------- dict empty dict """ msg = "Fold " + str(trainer.fold) + " completed." self._messenger.emit_message(msg) return {} class SlackMessenger(BaseMessenger): """ Wrap arbitrary experiments and connect its functions to slack notification .. note:: `token`can be either your personal user token or a token from an artificial bot. To create your own bot you can visit https://api.slack.com/ and click 'Your Apps' at the top-right corner (you may need to create an own workspace where you can install your bot). .. warning:: Slack messenger has `slackclient` as a dependency which is not included in the requirements! """ def __init__(self, experiment: BaseExperiment, token: str, channel: str, notify_epochs: int = None, **kwargs): """ Parameters ---------- experiment : :class:`BaseExperiment` instance of current experiment token : str User or Bot token from slack channel : str channel id (destination of messages) notify_epochs : int Activates notifications about finished epochs with frequency `notify_epochs`. kwargs : additional keyword arguments passed to :class:`SlackClient` Raises ------ ImportError if `slackclient` is not installed See Also -------- :class:`BaseMessenger` """ super().__init__(experiment, notify_epochs=notify_epochs) # switch between different versions (with changed imports) try: from slackclient import SlackClient self._client = SlackClient(token, **kwargs) self._version = 1 except ImportError as e: try: from slack import WebClient self._client = WebClient(token=token, **kwargs) self._version = 2 except ImportError as e: warnings.warn( "Could not import `slackclient`. This package is not" "included in the default dependencies of delira!") raise e assert self._version in [1, 2], "Only version 1 and 2 supported" self._channel = channel self._ts = None # Set to None for initial message # initial slack message msg = "Created new experiment: " + str(self._experiment.name) resp = self.emit_message(msg) if self._version == 1: # old api self._ts = resp['ts'] if 'ts' in resp else None elif self._version == 2: # new api self._ts = resp.data['ts'] if hasattr(resp, 'data') else None def emit_message(self, msg, **kwargs): """ Emit message (is possible the current thread is used) Parameters ---------- msg : str message which should be emitted kwargs: additional keyword arguments passed to slack api calls Returns ------- dict dict with additional information from message Raises ------ ValueError unknown `self._version` """ # use thread of current post if possible if self._ts is not None and 'thread_ts' not in kwargs: kwargs['thread_ts'] = self._ts if self._version == 1: resp = self._emit_message_v1(msg, **kwargs) elif self._version == 2: resp = self._emit_message_v2(msg, **kwargs) else: raise ValueError("Unknown version detected!") return resp def _emit_message_v1(self, msg, **kwargs) -> dict: """ Emit message with old slack api Parameters ---------- msg : str message which should be emitted kwargs: additional keyword arguments passed to slack api calls Returns ------- dict representation dict of message """ resp = self._client.api_call( "chat.postMessage", channel=self._channel, text=msg, **kwargs, ) if not resp["ok"]: logging.error("Slack message was not emitted correctly!" " \n {}".format(msg)) return resp def _emit_message_v2(self, msg, **kwargs): """ Emit message with new slack api Parameters ---------- msg : str message which should be emitted kwargs: additional keyword arguments passed to slack api calls Returns ------- :class:`slack.web.slack_response.SlackResponse` slack api response """ resp = self._client.chat_postMessage(channel=self._channel, text=msg, **kwargs, ) if not resp.data["ok"]: logging.error("Slack message was not emitted correctly!" " \n {}".format(msg)) return resp ================================================ FILE: delira/utils/path.py ================================================ import os def subdirs(d): """For a given directory, return a list of all subdirectories (full paths) Parameters ---------- d : string given root directory Returns ------- list list of strings of all subdirectories """ return sorted([os.path.join(d, name) for name in os.listdir(d) if os.path.isdir(os.path.join(d, name))]) ================================================ FILE: delira/utils/time.py ================================================ import datetime def now(): """Return current time as YYYY-MM-DD_HH-MM-SS Returns ------- string current time """ return datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') ================================================ FILE: docker/Dockerfile ================================================ FROM nvidia/cuda:9.2-base-ubuntu18.04 RUN apt-get update && apt-get install -y \ curl \ ca-certificates \ sudo \ git \ bzip2 \ libx11-6 \ build-essential \ fonts-roboto \ && rm -rf /var/lib/apt/lists/* RUN useradd --create-home --shell /bin/bash containeruser USER containeruser WORKDIR /home/containeruser RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ chmod +x ~/miniconda.sh && \ ~/miniconda.sh -b -p /home/containeruser/conda && \ rm ~/miniconda.sh && \ /home/containeruser/conda/bin/conda clean -ya ENV PATH /home/containeruser/conda/bin:$PATH RUN conda install python=3.7 RUN pip install --upgrade pip RUN git clone https://github.com/justusschock/delira.git && \ pip install pip wheel && \ pip install -r delira/requirements.txt && \ pip install -r delira/requirements_extra_torch.txt && \ pip install delira/ ENV PYTHONPATH /home/containeruser/delira:$PYTHONPATH CMD ["/bin/bash"] ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = delira SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/_api/_build/delira/backend_resolution.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira._backends Backend Resolution ================== These functions are used to determine the installed backends and update the created config file. They also need to be used, to guard backend specific code, when writing code with several backends in one file like this: ``if "YOUR_BACKEND" in delira.get_backends():`` :hidden:`get_backends` ~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: get_backends ================================================ FILE: docs/_api/_build/delira/class_hierarchy.rst ================================================ Class Hierarchy Diagrams ======================== .. contents:: * `Coarse <../../../_static/class_hierarchy/delira_coarse.png>`_ * `Fine <../../../_static/class_hierarchy/delira_fine.png>`_ ================================================ FILE: docs/_api/_build/delira/data_loading/arbitrary_data.rst ================================================ Arbitrary Data -------------- The following classes are implemented to work with every kind of data. You can use every framework you want to load your data, but the returned samples should be a :obj:`dict` of ``numpy ndarrays`` .. toctree:: :maxdepth: 5 Dataset Dataloader Datamanager Utils ================================================ FILE: docs/_api/_build/delira/data_loading/data_loading.rst ================================================ Data Loading ============ This module provides Utilities to load the Data .. toctree:: Arbitrary Data Nii Sampler ================================================ FILE: docs/_api/_build/delira/data_loading/dataloader.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.data_loading Dataloader ********** The Dataloader wraps the dataset and combines them with a sampler (see below) to combine single samples to whole batches. ToDo: add flow chart diagramm :hidden:`DataLoader` ~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DataLoader :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/data_loading/datamanager.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.data_loading.data_manager Datamanager *********** The datamanager wraps a dataloader and combines it with augmentations and multiprocessing. :hidden:`DataManager` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DataManager :members: :undoc-members: :show-inheritance: :hidden:`Augmenter` ~~~~~~~~~~~~~~~~~~~ .. autoclass:: Augmenter :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/data_loading/dataset.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.data_loading Datasets ******** The Dataset the most basic class and implements the loading of your dataset elements. You can either load your data in a lazy way e.g. loading them just at the moment they are needed or you could preload them and cache them. Datasets can be indexed by integers and return single samples. To implement custom datasets you should derive the :class:`AbstractDataset` :hidden:`AbstractDataset` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractDataset :members: :undoc-members: :show-inheritance: :hidden:`BaseLazyDataset` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BaseLazyDataset :members: :undoc-members: :show-inheritance: :hidden:`BaseCacheDataset` ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BaseCacheDataset :members: :undoc-members: :show-inheritance: :hidden:`BaseExtendCacheDataset` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BaseExtendCacheDataset :members: :undoc-members: :show-inheritance: :hidden:`ConcatDataset` ~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ConcatDataset :members: :undoc-members: :show-inheritance: :hidden:`BlankDataset` ~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BlankDataset :members: :undoc-members: :show-inheritance: :hidden:`Nii3DLazyDataset` ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: Nii3DLazyDataset :members: :undoc-members: :show-inheritance: :hidden:`Nii3DCacheDataset` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: Nii3DCacheDataset :members: :undoc-members: :show-inheritance: :hidden:`TorchvisionClassificationDataset`: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TorchvisionClassificationDataset :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/data_loading/nii.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.data_loading.nii Nii-Data -------- Since ``delira`` aims to provide dataloading tools for medical data (which is often stored in Nii-Files), the following classes and functions provide a basic way to load data from nii-files: .. currentmodule:: delira.data_loading.nii :hidden:`load_nii` ~~~~~~~~~~~~~~~~~~ .. autofunction:: load_nii :hidden:`BaseLabelGenerator` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BaseLabelGenerator :members: :undoc-members: :show-inheritance: :hidden:`load_sample_nii` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: load_sample_nii ================================================ FILE: docs/_api/_build/delira/data_loading/sampler.rst ================================================ .. role:: hidden :class: hidden-section Sampler ------- Sampler define the way of iterating over the dataset and returning samples. .. currentmodule:: delira.data_loading.sampler :hidden:`AbstractSampler` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractSampler :members: :undoc-members: :show-inheritance: :hidden:`LambdaSampler` ~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: LambdaSampler :members: :undoc-members: :show-inheritance: :hidden:`RandomSampler` ~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: RandomSampler :members: :undoc-members: :show-inheritance: :hidden:`PrevalenceRandomSampler` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: PrevalenceRandomSampler :members: :undoc-members: :show-inheritance: :hidden:`StoppingPrevalenceRandomSampler` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: StoppingPrevalenceRandomSampler :members: :undoc-members: :show-inheritance: :hidden:`SequentialSampler` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SequentialSampler :members: :undoc-members: :show-inheritance: :hidden:`PrevalenceSequentialSampler` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: PrevalenceSequentialSampler :members: :undoc-members: :show-inheritance: :hidden:`StoppingPrevalenceSequentialSampler` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: StoppingPrevalenceSequentialSampler :members: :undoc-members: :show-inheritance: :hidden:`WeightedRandomSampler` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: WeightedRandomSampler :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/data_loading/utils.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.data_loading.load_utils Utils ***** :hidden:`norm_range` ~~~~~~~~~~~~~~~~~~~~ .. autofunction:: norm_range :hidden:`norm_zero_mean_unit_std` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: norm_zero_mean_unit_std :hidden:`is_valid_image_file` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: is_valid_image_file :hidden:`default_load_fn_2d` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: default_load_fn_2d :hidden:`LoadSample` ~~~~~~~~~~~~~~~~~~~~ .. autoclass:: LoadSample :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/debug_mode.rst ================================================ def get_current_debug_mode(): """ Getter function for the current debug mode Returns ------- bool current debug mode """ return __DEBUG_MODE def switch_debug_mode(): """ Alternates the current debug mode """ set_debug_mode(not get_current_debug_mode()) def set_debug_mode(mode: bool): """ Sets a new debug mode Parameters ---------- mode : bool the new debug mode """ global __DEBUG_MODE __DEBUG_MODE = mode .. role:: hidden :class: hidden-section .. currentmodule:: delira._debug_mode Debug Mode ========== Delira now contains a fully-fledged `Debug` mode, which disables all kinds of multiprocessing. :hidden:`get_current_debug_mode` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: get_current_debug_mode :hidden:`switch_debug_mode` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: switch_debug_mode :hidden:`set_debug_mode` ~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: set_debug_mode ================================================ FILE: docs/_api/_build/delira/delira.io.rst ================================================ .. role:: hidden :class: hidden-section IO == .. currentmodule:: delira.io if "CHAINER" in get_backends(): from delira.io.chainer import save_checkpoint as chainer_save_checkpoint from delira.io.chainer import load_checkpoint as chainer_load_checkpoint if "SKLEARN" in get_backends(): from delira.io.sklearn import load_checkpoint as sklearn_load_checkpoint from delira.io.sklearn import save_checkpoint as sklearn_save_checkpoint :hidden:`torch_load_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: torch_load_checkpoint :hidden:`torch_save_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: torch_save_checkpoint :hidden:`torchscript_load_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: torchscript_load_checkpoint :hidden:`torchscript_save_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: torchscript_save_checkpoint :hidden:`tf_load_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: tf_load_checkpoint :hidden:`tf_save_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: tf_save_checkpoint :hidden:`tf_eager_load_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: tf_eager_load_checkpoint :hidden:`tf_eager_save_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: tf_eager_save_checkpoint :hidden:`chainer_load_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: chainer_load_checkpoint :hidden:`chainer_save_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: chainer_save_checkpoint :hidden:`sklearn_load_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: sklearn_load_checkpoint :hidden:`sklearn_save_checkpoint` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: sklearn_save_checkpoint ================================================ FILE: docs/_api/_build/delira/delira.rst ================================================ Delira ====== .. toctree:: :maxdepth: 10 :glob: Data Loading IO Logging Models Training Utilities Backend Resolution Debug Mode Class Hierarchy Diagrams ================================================ FILE: docs/_api/_build/delira/delira.utils.rst ================================================ Utils ===== This package provides utility functions as image operations, various decorators, path operations and time operations. .. automodule:: delira.utils.context_managers :members: :undoc-members: :show-inheritance: .. automodule:: delira.utils.decorators :members: :undoc-members: :show-inheritance: .. automodule:: delira.utils.imageops :members: :undoc-members: :show-inheritance: .. automodule:: delira.utils.path :members: :undoc-members: :show-inheritance: .. automodule:: delira.utils.time :members: :undoc-members: :show-inheritance: .. automodule:: delira.utils.config :members: :undoc-members: :show-inheritance: .. automodule:: delira.utils.messenger :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/logging/backends.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.logging.base_backend :hidden:`BaseBackend` ~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BaseBackend :members: :undoc-members: :show-inheritance: .. currentmodule:: delira.logging.writer_backend :hidden:`WriterBackend` ~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: WriterBackend :members: :undoc-members: :show-inheritance: .. currentmodule:: delira.logging.tensorboard_backend :hidden:`TensorboardBackend` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TensorboardBackend :members: :undoc-members: :show-inheritance: .. currentmodule:: delira.logging.visdom_backend :hidden:`VisdomBackend` ~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: VisdomBackend :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/logging/base_logger.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.logging.base_logger :hidden:`Logger` ~~~~~~~~~~~~~~~~ .. autoclass:: Logger :members: :undoc-members: :show-inheritance: :hidden:`SingleThreadedLogger` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SingleThreadedLogger :members: :undoc-members: :show-inheritance: :hidden:`make_logger` ~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: make_logger ================================================ FILE: docs/_api/_build/delira/logging/handlers.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.logging :hidden:`MultiStreamHandler` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: MultiStreamHandler :members: :undoc-members: :show-inheritance: :hidden:`TrixiHandler` ~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TrixiHandler :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/logging/logging.rst ================================================ Logging ======= The logging module provides the utilities for logging arbitrary values to different backends and a logger registry. .. toctree:: Logger Logging Backends Logging Context Registry ================================================ FILE: docs/_api/_build/delira/logging/logging_context.py ================================================ from delira.logging.registry import logger_exists, register_logger, \ unregister_logger, log as _log from delira.logging.base_logger import make_logger log = _log class LoggingContext(object): """ Contextmanager to set a new logging context """ def __init__( self, name, initialize_if_missing=False, destroy_on_exit=None, **kwargs): """ Parameters ---------- name : str the name of the logger to use initialize_if_missing : bool whether to create a logger if it does not yet exist destroy_on_exit : bool whether to destroy the logger on exit; If None, the logger will only be destroyed, if it was created here **kwargs: additional keyword arguments to create a logger if necessary Raises ------ ValueError if the logger does not exist already and shall not be created """ # Logger does exist already if logger_exists(name): self._name = name if destroy_on_exit is None: destroy_on_exit = False # logger will be created elif initialize_if_missing: register_logger(make_logger(**kwargs), name) if destroy_on_exit is None: destroy_on_exit = True self._name = name # logger does not exist and shall not be created else: raise ValueError("No valid logger for name %s and " "'initialize_if_missing' is False" % name) self._destroy_on_exit = destroy_on_exit def __enter__(self): """ Function to be executed during entrance; Resets the logging context Returns ------- :class:`LoggingContext` self """ global log log = self.log return self def __exit__(self, *args): """ Function to be called during exiting the context manager; Destroys the logger if necessary and resets the old logging context Parameters ---------- *args Postional arguments to be compatible with other context managers Returns ------- """ if self._destroy_on_exit: _logger = unregister_logger(self._name) del _logger global log log = _log def log(self, msg: dict): """ Main Logging Function, Decides whether to log with the assigned backend or python's internal module Parameters ---------- msg : dict the message to log; Should be a dict, where the keys indicate the logging function to execute, and the corresponding value holds the arguments necessary to execute this function """ _log(msg, self._name) def __call__(self, log_message: dict): """ Makes the class callable and forwards the call to :meth:`delira.logging.base_logger.Logger.log` Parameters ---------- log_message : dict the logging message to log Returns ------- Any the return value obtained by :meth:`LoggingContext.log` """ return self.log(log_message) ================================================ FILE: docs/_api/_build/delira/logging/logging_context.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.logging.logging_context :hidden:`LoggingContext` ~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: LoggingContext :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/logging/registry.py ================================================ from delira.logging.base_logger import Logger from collections import OrderedDict # Registry dict containing all registered available Loggers # Use Ordered Dict here to use first logger for logging if no name was given _AVAILABLE_LOGGERS = OrderedDict() def log(msg: dict, name=None): """ Global logging function Parameters ---------- msg : dict the message to log; Should be a dict, where the keys indicate the logging function to execute, and the corresponding value holds the arguments necessary to execute this function name : str the name of the logger to use; if None: the last logger will be used Raises ------ AssertionError if the logger with the specified name does not exist AssertionError if the returned object is not a logger Returns ------- Any the value obtained by the loggers ``log`` function """ # use last name if no name is present if name is None: name = get_available_loggers()[-1] assert logger_exists(name) _logger = get_logger(name) assert isinstance(_logger, Logger) return _logger.log(msg) def logger_exists(name: str): """ Check if logger exists Parameters ---------- name : str the name to check the existence for Returns ------- bool whether a logger with the given name exists """ return name in _AVAILABLE_LOGGERS def register_logger(logger: Logger, name: str, overwrite=False): """ Register a new logger to the Registry Parameters ---------- logger : :class:`delira.logging.base_logger.Logger` the logger to register name : str the corresponding name, to register the logger at overwrite : bool whether or not to overwrite existing loggers if necessary Returns ------- :class:`delira.logging.base_logger.Logger` the registered logger object """ if not logger_exists(name) or overwrite: _AVAILABLE_LOGGERS[name] = logger return get_logger(name) def unregister_logger(name: str): """ Unregisters a logger from the registry Parameters ---------- name : str the name of the logger to unregister Returns ------- :class:`delira.logging.base_logger.Logger` the registered logger object """ return _AVAILABLE_LOGGERS.pop(name) def get_logger(name): """ Returns a logger from the registry Parameters ---------- name : str the name indicating the logger to return Returns ------- :class:`delira.logging.base_logger.Logger` the specified logger object """ return _AVAILABLE_LOGGERS[name] def get_available_loggers(): """ Gets names for all registered loggers Returns ------- tuple a tuple of strings specifying the names of all registered loggers """ return tuple(_AVAILABLE_LOGGERS.keys()) ================================================ FILE: docs/_api/_build/delira/logging/registry.rst ================================================ .. role:: hidden :class: hidden-section .. :currentmodule:: delira.logging.registry :hidden:`log` ~~~~~~~~~~~~~ .. autofunction:: log :hidden:`logger_exists` ~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: logger_exists :hidden:`register_logger` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: register_logger :hidden:`unregister_logger` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: unregister_logger :hidden:`get_logger` ~~~~~~~~~~~~~~~~~~~~ .. autofunction:: get_logger :hidden:`get_available_loggers` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: get_available_loggers ================================================ FILE: docs/_api/_build/delira/logging/tensorboard_backend.py ================================================ import tensorboardX from threading import Event from queue import Queue from delira.logging.writer_backend import WriterLoggingBackend class TensorboardBackend(WriterLoggingBackend): """ A Tensorboard logging backend """ def __init__(self, writer_kwargs=None, abort_event: Event = None, queue: Queue = None): """ Parameters ---------- writer_kwargs : dict arguments to initialize a writer abort_event : :class:`threading.Event` the abortion event queue : :class:`queue.Queue` the queue holding all logging tasks """ if writer_kwargs is None: writer_kwargs = {} super().__init__(tensorboardX.SummaryWriter, writer_kwargs, abort_event, queue) def _call_exec_fn(self, exec_fn, args): """ Helper Function calling the actual mapped function and flushing results to the writer afterwards Parameters ---------- exec_fn : function the function which will execute the actual logging args : iterable (listlike) or mapping (dictlike) the arguments passed to the ``exec_fn`` Returns ------- Any the return value obtained by the ``exec_fn`` """ ret_val = super()._call_exec_fn(exec_fn, args) self._writer.file_writer.flush() return ret_val def __del__(self): """ Function to be executed at deletion; Flushes all unsaved changes """ self._writer.file_writer.flush() def _graph_pytorch(self, model, input_to_model=None, verbose=False, **kwargs): """ Function to log a PyTorch graph Parameters ---------- model : :class:`AbstractPyTorchNetwork` the model, whose graph shall be logged input_to_model : :class:`torch.Tensor` the input to the model; necessary for graph traversal verbose : bool verbosity option **kwargs : additional keyword arguments """ converted_args, converted_kwargs = self.convert_to_npy( model=model, input_to_model=input_to_model, verbose=verbose, **kwargs) self._writer.add_graph(*converted_args, **converted_kwargs) def _graph_tf(self, graph, run_metadata=None): """ Function to log a TensorFlow Graph Parameters ---------- graph : :class:`tensorflow.Graph` or :class:`tensorflow.GraphDef` run_metadata : the run metadata Raises ------ TypeError if given graph cannot be converted to graphdef """ import tensorflow as tf from tensorboardX.proto.event_pb2 import Event, TaggedRunMetadata # convert to graphdef if isinstance(graph, tf.Graph): graphdef = graph.as_graph_def() elif isinstance(graph, tf.GraphDef): graphdef = graph elif hasattr(graph, "SerializeToString"): graphdef = graph else: raise TypeError("Invalid type given for graph: %s" % graph.__class__.__name__) if run_metadata: run_metadata = TaggedRunMetadata( tag='step1', run_metadata=run_metadata.SerializeToString()) self._writer._get_file_writer().add_event( Event( graph_def=graphdef.SerializeToString(), tagged_run_metadata=run_metadata)) def _graph_onnx(self, prototxt): """ Function to log a ONNX graph to file Parameters ---------- prototxt : str filepath to a given prototxt file containing an ONNX graph """ converted_args, converted_kwargs = self.convert_to_npy( prototxt=prototxt) self._writer.add_onnx_graph(*converted_args, **converted_kwargs) def _embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None): """ Function to create an embedding of given data Parameters ---------- mat : array-like an arraylike object, which can be converted to a numpy array; holds the actual embedding value metadata : the embeddings metadata label_img : array-like an arraylike object, which can be converted to a numpy array; holds the label image global_step : int the global step tag : str the tag to store the embedding at metadata_header : the metadata header """ converted_args, converted_kwargs = self.convert_to_npy( mat=mat, metadata=metadata, label_img=label_img, global_step=global_step ) self._writer.add_embedding(*converted_args, **converted_kwargs) def _scalars(self, main_tag: str, tag_scalar_dict: dict, global_step=None, walltime=None, sep="/"): """ Function to log multiple scalars at once. Opposing to the base function, this is done sequentially rather then parallel to avoid creating new event files Parameters ---------- main_tag : str the main tag, will be combined with the subtags inside the ``tag_scalar_dict`` tag_scalar_dict : dict dictionary of (key, scalar) pairs global_step : int the global step walltime : the overall time sep : str the character separating maintag and subtag in the final tag """ # log scalars sequentially for key, val in tag_scalar_dict.items(): # combine tags new_tag = main_tag + sep + key self._scalar(new_tag, val, global_step=global_step, walltime=walltime) @property def name(self): return "TensorFlow Backend" ================================================ FILE: docs/_api/_build/delira/logging/visdom_backend.py ================================================ import tensorboardX from threading import Event from queue import Queue from delira.logging.writer_backend import WriterLoggingBackend class VisdomBackend(WriterLoggingBackend): """ A Visdom Logging backend """ def __init__(self, writer_kwargs: dict = None, abort_event: Event = None, queue: Queue = None): """ Parameters ---------- writer_kwargs : dict arguments to initialize a writer abort_event : :class:`threading.Event` the abortion event queue : :class:`queue.Queue` the queue holding all logging tasks """ if writer_kwargs is None: writer_kwargs = {} super().__init__( tensorboardX.visdom_writer.VisdomWriter, writer_kwargs, abort_event, queue) @property def name(self): return "VisdomBackend" ================================================ FILE: docs/_api/_build/delira/logging/writer_backend.py ================================================ from delira.logging.base_backend import BaseBackend from queue import Queue from threading import Event class WriterLoggingBackend(BaseBackend): """ A Basic Writer Backend for a unspecified writer class """ def __init__(self, writer_cls, writer_kwargs: dict, abort_event: Event = None, queue: Queue = None): super().__init__(abort_event, queue) self._writer = writer_cls(**writer_kwargs) @staticmethod def convert_to_npy(*args, **kwargs): """ Function to convert all positional args and keyword args to numpy (returns identity per default, but can be overwritten in subclass to log more complex types) Parameters ---------- *args : positional arguments of arbitrary number and type **kwargs : keyword arguments of arbitrary number and type Returns ------- tuple converted positional arguments dict converted keyword arguments """ return args, kwargs def _image(self, tag, img_tensor, global_step=None, walltime=None, dataformats='CHW'): """ Function to log a single image Parameters ---------- tag : str the tag to store the image at img_tensor : array-like an array-like object containing the actual image; Must be convertible to numpy global_step : int the global step walltime : the overall time dataformats : str string specifying the image format """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, img_tensor=img_tensor, global_step=global_step, walltime=walltime, dataformats=dataformats) self._writer.add_image(*converted_args, **converted_kwargs) def _images(self, tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW'): """ Function to log multiple values Parameters ---------- tag : str the tag to store the image at img_tensor : array-like an array-like object containing the actual image; Must be convertible to numpy global_step : int the global step walltime : the overall time dataformats : str string specifying the image format """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, img_tensor=img_tensor, global_step=global_step, walltime=walltime, dataformats=dataformats) self._writer.add_images(*converted_args, **converted_kwargs) def _image_with_boxes(self, tag, img_tensor, box_tensor, global_step=None, walltime=None, dataformats='CHW', **kwargs): """ Function to log a single image with bounding boxes Parameters ---------- tag : str the tag to store the image at img_tensor : array-like an array-like object containing the actual image; Must be convertible to numpy box_tensor : array-like an array-like object containing the actual bounding boxes in xyxy format; must be convertible to numpy global_step : int the global step walltime : the overall time dataformats : str string specifying the image format **kwargs : additional keyword arguments """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, img_tensor=img_tensor, box_tensor=box_tensor, global_step=global_step, walltime=walltime, dataformats=dataformats, **kwargs) self._writer.add_image_with_boxes(*converted_args, **converted_kwargs) def _scalar(self, tag, scalar_value, global_step=None, walltime=None): """ Function to log a single scalar value Parameters ---------- tag : str the tag to store the image at scalar_value : int or float the scalar value to log global_step : int the global step walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, scalar_value=scalar_value, global_step=global_step, walltime=walltime) self._writer.add_scalar(*converted_args, **converted_kwargs) def _scalars(self, main_tag, tag_scalar_dict, global_step=None, walltime=None): """ Function to log multiple scalars Parameters ---------- main_tag : str the main tag to store the scalars at tag_scalar_dict : dict a dictionary containing tags as keys and the corresponding scalar values global_step : int the global step walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( main_tag=main_tag, tag_scalar_dict=tag_scalar_dict, global_step=global_step, walltime=walltime) self._writer.add_scalars(*converted_args, **converted_kwargs) def _histogram(self, tag, values, global_step=None, bins='tensorflow', walltime=None): """ Function to create and log a histogram out of given values Parameters ---------- tag : str the tag to store the histogram at values : arraylike an arraylike object containing the raw data to create a histogram from; Must be convertible to numpy global_step : int global step bins : str string indicating the bins format walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, values=values, global_step=global_step, bins=bins) self._writer.add_histogram(*converted_args, **converted_kwargs) def _figure(self, tag, figure, global_step=None, close=True, walltime=None): """ Function to log a ``matplotlib.pyplot`` figure Parameters ---------- tag : str the tag to store the figure at figure : :class:`matplotlib.pyplot.Figure`` the figure to log global_step : int the global step close : bool whether to close the figure after pushing it walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, figure=figure, global_step=global_step, close=close, walltime=walltime) self._writer.add_figure(*converted_args, **converted_kwargs) def _audio(self, tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None): """ Function to log a single audio signal Parameters ---------- tag : str the tag to store the sound signal at snd_tensor : arraylike arraylike object containing the sound signal; must be convertible to numpy global_step : int the global step sample_rate : int the sampling rate for the sound signal walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, snd_tensor=snd_tensor, global_step=global_step, sample_rate=sample_rate, walltime=walltime ) self._writer.add_audio(*converted_args, **converted_kwargs) def _text(self, tag, text_string, global_step=None, walltime=None): """ Function to log a single string as text Parameters ---------- tag : str the tag to store the text at text_string : str the text string to log global_step : int the global step walltime : the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, text_string=text_string, global_step=global_step, walltime=walltime) self._writer.add_text(*converted_args, **converted_kwargs) def _pr_curve(self, tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None): """ Function to create and log a PR curve out of given predictions and + labels Parameters ---------- tag : str function to store the curve at labels : arraylike arraylike object containing the groundtruth data; must be convertible to numpy predictions : arraylike arraylike object containing the predictions; must be convertible to numpy global_step : int the global step num_thresholds : int number of thresholds to apply for PR calculation weights : arraylike arraylike object containing sample weights, must be covertible to numpy walltime : overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, labels=labels, predictions=predictions, global_step=global_step, num_thresholds=num_thresholds, weights=weights, walltime=walltime) self._writer.add_pr_curve(*converted_args, **converted_kwargs) def _video(self, tag, vid_tensor, global_step=None, fps=4, walltime=None): """ Function to log a single video Parameters ---------- tag : str the tag to store the image at vid_tensor : arraylike arraylike object containing the video frames; must be convertible to numpy global_step : int the global step fps : int frames per second to display walltime : int the overall time """ converted_args, converted_kwargs = self.convert_to_npy( tag=tag, vid_tensor=vid_tensor, global_step=global_step, fps=fps, walltime=walltime) self._writer.add_video(*converted_args, **converted_kwargs) @property def name(self): return "WriterBackend" ================================================ FILE: docs/_api/_build/delira/models/chainer.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.models.backends.chainer Chainer ....... :hidden:`AbstractChainerNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractChainerNetwork :members: :undoc-members: :show-inheritance: :hidden:`DataParallelChainerNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DataParallelChainerNetwork :members: :undoc-members: :show-inheritance: :hidden:`DataParallelChainerOptimizer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DataParallelChainerOptimizer :members: :undoc-members: :show-inheritance: :hidden:`ParallelOptimizerUpdateModelParameters` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DataParallelOptimizerUpdateModelParameters :members: :undoc-members: :show-inheritance: :hidden:`ParallelOptimizerCumulateGradientsHook` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ParallelOptimizerCumulateGradientsHook :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/models/models.rst ================================================ .. role:: hidden :class: hidden-section Models ====== ``delira`` comes with it's own model-structure tree - with :class:`AbstractNetwork` at it's root - and integrates several backends deeply into it's structure. .. currentmodule:: delira.models :hidden:`AbstractNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractNetwork(type) :members: :undoc-members: :show-inheritance: Backends -------- .. toctree:: Chainer SciKit-Learn TensorFLow Eager Execution TensorFlow Graph Execution PyTorch TorchScript ================================================ FILE: docs/_api/_build/delira/models/sklearn.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.models.backends.sklearn SciKit-Learn ............ :hidden:`SklearnEstimator` ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnEstimator :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/models/tfeager.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.models.backends.tf_eager TensorFlow Eager Execution .......................... :hidden:`AbstractTfEagerNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractTfEagerNetwork :members: :undoc-members: :show-inheritance: :hidden:`DataParallelTfEagerNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DataParallelTfEagerNetwork :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/models/tfgraph.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.models.backends.tf_graph TensorFlow Graph Execution .......................... :hidden:`AbstractTfGraphNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractTfGraphNetwork :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/models/torch.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.models.backends.torch PyTorch ....... :hidden:`AbstractPyTorchNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractPyTorchNetwork :members: :undoc-members: :show-inheritance: :hidden:`DataParallelPyTorchNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DataParallelPyTorchNetwork :members: :undoc-members: :show-inheritance: :hidden:`scale_loss` ~~~~~~~~~~~~~~~~~~~~ .. autofunction:: scale_loss ================================================ FILE: docs/_api/_build/delira/models/torchscript.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.models.backends.torchscript TorchScript ........... :hidden:`AbstractTorchScriptNetwork` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractTorchScriptNetwork :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/backends/backends.rst ================================================ Backends ======== The following section contains all backends which are implemented, developed and maintained for usage with ``delira``. A single backend usually contains at least a trainer, an experiment and some models (which are capsuled in the `models<../../models/models>`_ section. .. toctree:: Chainer SciKit-Learn TensorFlow Eager Execution Tensorflow Graph Execution PyTorch TorchScript ================================================ FILE: docs/_api/_build/delira/training/backends/chainer.rst ================================================ Chainer ....... .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.backends.chainer :hidden:`ChainerNetworkTrainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ChainerNetworkTrainer :members: :undoc-members: :show-inheritance: :hidden:`ChainerExperiment` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ChainerExperiment :members: :undoc-members: :show-inheritance: :hidden:`convert_chainer_to_numpy` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: convert_chainer_to_numpy :hidden:`create_chainer_optims_default` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: create_chainer_optims_default ================================================ FILE: docs/_api/_build/delira/training/backends/sklearn.rst ================================================ SciKit-Learn ............ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.backends.sklearn :hidden:`SklearnEstimatorTrainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnEstimatorTrainer :members: :undoc-members: :show-inheritance: :hidden:`SklearnExperiment` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnExperiment :members: :undoc-members: :show-inheritance: :hidden:`create_sklearn_optims_default` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: create_sklearn_optims_default ================================================ FILE: docs/_api/_build/delira/training/backends/tfeager.rst ================================================ TensorFlow Eager Execution .......................... .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.backends.tf_eager :hidden:`TfEagerNetworkTrainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TfEagerNetworkTrainer :members: :undoc-members: :show-inheritance: :hidden:`TfEagerExperiment` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TfEagerExperiment :members: :undoc-members: :show-inheritance: :hidden:`create_tfeager_optims_default` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: create_tfeager_optims_default :hidden:`convert_tfeager_to_numpy` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: convert_tfeager_to_numpy ================================================ FILE: docs/_api/_build/delira/training/backends/tfgraph.rst ================================================ TensorFlow Graph Execution .......................... .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.backends.tf_graph :hidden:`TfGraphNetworkTrainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TfGraphNetworkTrainer :members: :undoc-members: :show-inheritance: :hidden:`TfGraphExperiment` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TfGraphExperiment :members: :undoc-members: :show-inheritance: :hidden:`initialize_uninitialized` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: initialize_uninitialized ================================================ FILE: docs/_api/_build/delira/training/backends/torch.rst ================================================ PyTorch ....... .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.backends.torch :hidden:`PyTorchNetworkTrainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: PyTorchNetworkTrainer :members: :undoc-members: :show-inheritance: :hidden:`PyTorchExperiment` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: PyTorchExperiment :members: :undoc-members: :show-inheritance: :hidden:`create_pytorch_optims_default` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: create_pytorch_optims_default :hidden:`convert_torch_to_numpy` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: convert_torch_to_numpy ================================================ FILE: docs/_api/_build/delira/training/backends/torchscript.rst ================================================ TorchScript ........... .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.backends.torchscript :hidden:`TorchScriptNetworkTrainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TorchScriptNetworkTrainer :members: :undoc-members: :show-inheritance: :hidden:`TorchScriptExperiment` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: TorchScriptExperiment :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/callbacks.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.callbacks Callbacks ========= Callbacks are essential to provide a uniform API for tasks like early stopping etc. The PyTorch learning rate schedulers are also implemented as callbacks. Every callback should ber derived from :class:`AbstractCallback` and must provide the methods ``at_epoch_begin`` and ``at_epoch_end``. :hidden:`AbstractCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AbstractCallback :members: :undoc-members: :show-inheritance: :hidden:`EarlyStopping` ~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: EarlyStopping :members: :undoc-members: :show-inheritance: :hidden:`DefaultPyTorchSchedulerCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: DefaultPyTorchSchedulerCallback :members: :undoc-members: :show-inheritance: .. currentmodule:: delira.training.callbacks.pytorch_schedulers :hidden:`CosineAnnealingLRCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: CosineAnnealingLRCallback :members: :undoc-members: :show-inheritance: :hidden:`ExponentialLRCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ExponentialLRCallback :members: :undoc-members: :show-inheritance: :hidden:`LambdaLRCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: LambdaLRCallback :members: :undoc-members: :show-inheritance: :hidden:`MultiStepLRCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: MultiStepLRCallback :members: :undoc-members: :show-inheritance: :hidden:`ReduceLROnPlateauCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ReduceLROnPlateauCallback :members: :undoc-members: :show-inheritance: :hidden:`StepLRCallback` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: StepLRCallback :members: :undoc-members: :show-inheritance: .. currentmodule:: delira.training.callbacks :hidden:`CosineAnnealingLRCallbackPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: CosineAnnealingLRCallbackPyTorch :members: :undoc-members: :show-inheritance: :hidden:`ExponentialLRCallbackPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ExponentialLRCallbackPyTorch :members: :undoc-members: :show-inheritance: :hidden:`LambdaLRCallbackPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: LambdaLRCallbackPyTorch :members: :undoc-members: :show-inheritance: :hidden:`MultiStepLRCallbackPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: MultiStepLRCallbackPyTorch :members: :undoc-members: :show-inheritance: :hidden:`ReduceLROnPlateauCallbackPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ReduceLROnPlateauCallbackPyTorch :members: :undoc-members: :show-inheritance: :hidden:`StepLRCallbackPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: StepLRCallbackPyTorch :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/experiment.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training Experiments =========== Experiments are the outermost class to control your training, it wraps your NetworkTrainer and provides utilities for cross-validation. More Experiments can be found in the sections for the specific backends :hidden:`BaseExperiment` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BaseExperiment :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/losses.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.losses Custom Loss Functions ===================== :hidden:`BCEFocalLossPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BCEFocalLossPyTorch :members: :undoc-members: :show-inheritance: :hidden:`BCEFocalLossLogitPyTorch` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BCEFocalLossLogitPyTorch :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/metrics.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.metrics Metrics ======= :hidden:`SklearnClassificationMetric` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnClassificationMetric :members: :undoc-members: :show-inheritance: :hidden:`SklearnAccuracyScore` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnAccuracyScore :members: :undoc-members: :show-inheritance: :hidden:`SklearnBalancedAccuracyScore` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnBalancedAccuracyScore :members: :undoc-members: :show-inheritance: :hidden:`SklearnF1Score` ~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnF1Score :members: :undoc-members: :show-inheritance: :hidden:`SklearnFBetaScore` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnFBetaScore :members: :undoc-members: :show-inheritance: :hidden:`SklearnHammingLoss` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnHammingLoss :members: :undoc-members: :show-inheritance: :hidden:`SklearnJaccardSimilarityScore` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnJaccardSimilarityScore :members: :undoc-members: :show-inheritance: :hidden:`SklearnLogLoss` ~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnLogLoss :members: :undoc-members: :show-inheritance: :hidden:`SklearnMatthewsCorrCoeff` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnMatthewsCorrCoeff :members: :undoc-members: :show-inheritance: :hidden:`SklearnPrecisionScore` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnPrecisionScore :members: :undoc-members: :show-inheritance: :hidden:`SklearnRecallScore` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnRecallScore :members: :undoc-members: :show-inheritance: :hidden:`SklearnZeroOneLoss` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: SklearnZeroOneLoss :members: :undoc-members: :show-inheritance: :hidden:`AurocMetric` ~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: AurocMetric :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/parameters.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training Parameters =============== :hidden:`Parameters` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: Parameters :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/predictor.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training Predictor ========= The predictor implements the basic prediction and metric calculation routines and can be subclassed for special routines. It is also the baseclass of all the trainers which extend it's functionality by training routines :hidden:`Predictor` ~~~~~~~~~~~~~~~~~~~ .. autoclass:: Predictor :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/trainer.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training NetworkTrainer ============== The network trainer implements the actual training routine and can be subclassed for special routines. More specific trainers can be found in the backend-specific sections :hidden:`BaseNetworkTrainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: BaseNetworkTrainer :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/_api/_build/delira/training/training.rst ================================================ Training ======== The training subpackage implements Callbacks, a class for Hyperparameters, training routines and wrapping experiments. .. toctree:: Parameters Network Trainer Predictor Experiment Backends Callbacks Losses Metrics Utilities ================================================ FILE: docs/_api/_build/delira/training/utils.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: delira.training.utils def recursively_convert_elements(element, check_type, conversion_fn): def convert_to_numpy_identity(*args, **kwargs): Utilities ......... :hidden:`recursively_convert_elements` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: recursively_convert_elements :hidden:`convert_to_numpy_identity` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: convert_to_numpy_identity ================================================ FILE: docs/_api/_build/modules.rst ================================================ API Documentation ================= .. toctree:: :maxdepth: 10 delira/delira ================================================ FILE: docs/classification_pytorch.rst ================================================ Classification with Delira - A very short introduction ====================================================== *Author: Justus Schock* *Date: 04.12.2018* This Example shows how to set up a basic classification PyTorch experiment and Visdom Logging Environment. Let's first setup the essential hyperparameters. We will use ``delira``'s ``Parameters``-class for this: .. code:: ipython3 logger = None import torch from delira.training import Parameters params = Parameters(fixed_params={ "model": { "in_channels": 1, "n_outputs": 10 }, "training": { "batch_size": 64, # batchsize to use "num_epochs": 10, # number of epochs to train "optimizer_cls": torch.optim.Adam, # optimization algorithm to use "optimizer_params": {'lr': 1e-3}, # initialization parameters for this algorithm "losses": {"CE": torch.nn.CrossEntropyLoss()}, # the loss function "lr_sched_cls": None, # the learning rate scheduling algorithm to use "lr_sched_params": {}, # the corresponding initialization parameters "metrics": {} # and some evaluation metrics } }) Since we did not specify any metric, only the ``CrossEntropyLoss`` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using ``Adam`` as optimizer of choice. Logging and Visualization ------------------------- To get a visualization of our results, we should monitor them somehow. For logging we will use ``Visdom``. To start a visdom server you need to execute the following command inside an environment which has visdom installed: .. code:: shell visdom -port=9999 This will start a visdom server on port 9999 of your machine and now we can start to configure our logging environment. To view your results you can open http://localhost:9999 in your browser. .. code:: ipython3 from trixi.logger import PytorchVisdomLogger from delira.logging import TrixiHandler import logging logger_kwargs = { 'name': 'ClassificationExampleLogger', # name of our logging environment 'port': 9999 # port on which our visdom server is alive } logger_cls = PytorchVisdomLogger # configure logging module (and root logger) logging.basicConfig(level=logging.INFO, handlers=[TrixiHandler(logger_cls, **logger_kwargs)]) # derive logger from root logger # (don't do `logger = logging.Logger("...")` since this will create a new # logger which is unrelated to the root logger logger = logging.getLogger("Test Logger") Since a single visdom server can run multiple environments, we need to specify a (unique) name for our environment and need to tell the logger, on which port it can find the visdom server. Data Preparation ---------------- Loading ~~~~~~~ Next we will create a small train and validation set (based on ``torchvision`` MNIST): .. code:: ipython3 from delira.data_loading import TorchvisionClassificationDataset dataset_train = TorchvisionClassificationDataset("mnist", # which dataset to use train=True, # use trainset img_shape=(224, 224) # resample to 224 x 224 pixels ) dataset_val = TorchvisionClassificationDataset("mnist", train=False, img_shape=(224, 224) ) Augmentation ~~~~~~~~~~~~ For Data-Augmentation we will apply a few transformations: .. code:: ipython3 from batchgenerators.transforms import RandomCropTransform, \ ContrastAugmentationTransform, Compose from batchgenerators.transforms.spatial_transforms import ResizeTransform from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform transforms = Compose([ RandomCropTransform(200), # Perform Random Crops of Size 200 x 200 pixels ResizeTransform(224), # Resample these crops back to 224 x 224 pixels ContrastAugmentationTransform(), # randomly adjust contrast MeanStdNormalizationTransform(mean=[0.5], std=[0.5])]) With these transformations we can now wrap our datasets into datamanagers: .. code:: ipython3 from delira.data_loading import DataManager, SequentialSampler, RandomSampler manager_train = DataManager(dataset_train, params.nested_get("batch_size"), transforms=transforms, sampler_cls=RandomSampler, n_process_augmentation=4) manager_val = DataManager(dataset_val, params.nested_get("batch_size"), transforms=transforms, sampler_cls=SequentialSampler, n_process_augmentation=4) Training -------- After we have done that, we can finally specify our experiment and run it. We will therfore use the already implemented ``ClassificationNetworkBasePyTorch`` which is basically a ResNet18: .. code:: ipython3 import warnings warnings.simplefilter("ignore", UserWarning) # ignore UserWarnings raised by dependency code warnings.simplefilter("ignore", FutureWarning) # ignore FutureWarnings raised by dependency code from delira.training import PyTorchExperiment from delira.training.train_utils import create_optims_default_pytorch from delira.models.classification import ClassificationNetworkBasePyTorch if logger is not None: logger.info("Init Experiment") experiment = PyTorchExperiment(params, ClassificationNetworkBasePyTorch, name="ClassificationExample", save_path="./tmp/delira_Experiments", optim_builder=create_optims_default_pytorch, gpu_ids=[0]) experiment.save() model = experiment.run(manager_train, manager_val) Congratulations, you have now trained your first Classification Model using ``delira``, we will now predict a few samples from the testset to show, that the networks predictions are valid: .. code:: ipython3 import numpy as np from tqdm.auto import tqdm # utility for progress bars device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set device (use GPU if available) model = model.to(device) # push model to device preds, labels = [], [] with torch.no_grad(): for i in tqdm(range(len(dataset_val))): img = dataset_val[i]["data"] # get image from current batch img_tensor = torch.from_numpy(img).unsqueeze(0).to(device).to(torch.float) # create a tensor from image, push it to device and add batch dimension pred_tensor = model(img_tensor) # feed it through the network pred = pred_tensor.argmax(1).item() # get index with maximum class confidence label = np.asscalar(dataset_val[i]["label"]) # get label from batch if i % 1000 == 0: print("Prediction: %d \t label: %d" % (pred, label)) # print result preds.append(pred) labels.append(label) # calculate accuracy accuracy = (np.asarray(preds) == np.asarray(labels)).sum() / len(preds) print("Accuracy: %.3f" % accuracy) See Also -------- For a more detailed explanation have a look at \* `the introduction tutorial `__ \* `the 2d segmentation example `__ \* `the 3d segmentation example `__ \* `the generative adversarial example `__ ================================================ FILE: docs/conda.yml ================================================ name: delira-docs dependencies: - python=3.7 - pip: - sphinx==1.8.4 - sphinx-rtd-theme ================================================ FILE: docs/conf.py ================================================ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # from delira._version import get_versions import os import sys import re # source code directory, relative to this file, for sphinx-build sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.path.pardir)) # -- Project information ----------------------------------------------------- project = 'delira' copyright = '2019, Justus Schock, Michael Baumgartner, Oliver Rippel, Christoph Haarburger' author = 'Justus Schock, Michael Baumgartner, Oliver Rippel, Christoph Haarburger' def read_file(file): with open(file) as f: content = f.read() return content whole_version = get_versions()["version"] # The short X.Y version version = whole_version.split("+", 1)[0] # The full version, including alpha/beta/rc tags release = whole_version # delira.__version__ # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.githubpages', 'sphinx.ext.napoleon', 'sphinx.ext.inheritance_diagram', 'sphinx.ext.autosectionlabel', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { "collapse_navigation": False, "logo_only": True } html_logo = "_static/logo/delira.svg" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} html_sidebars = { '**': [ 'relations.html', # needs 'show_related': True theme option to display 'searchbox.html', 'localtoc.html', 'sourcelink.html', ] } # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'deliradoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'delira.tex', 'delira Documentation', author, 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'delira', 'delira Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'delira', 'delira Documentation', author, 'delira', 'One line description of project.', 'Miscellaneous'), ] # -- Extension configuration ------------------------------------------------- # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { 'https://docs.python.org/': None, 'trixi': ( 'https://trixi.readthedocs.io/en/latest/', None), 'torch': ( 'https://pytorch.org/docs/stable/', None), 'tensorflow': ( 'https://www.tensorflow.org/api_docs/python/', None), 'chainer': ( 'https://docs.chainer.org/en/stable/', None), 'sklearn': ( 'https://scikit-learn.org/stable/documentation/', None), 'numpy': ( 'https://docs.scipy.org/doc/numpy/reference/', None), 'scipy': ( 'https://docs.scipy.org/doc/scipy/reference/' ) } # -- Options for todo extension ---------------------------------------------- # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True autoclass_content = 'both' add_module_names = False autodoc_default_flags = ['members', 'undoc-members', 'private-members', 'inherited-members', 'show-inheritance'] autodoc_inherit_docstrings = True autodoc_mock_imports = [ "numpy", "torchvision", "torch", "skimage", "sklearn", "jupyter", "flake8" "pytest-cov", "autopep8", "ipython", "joblib", "pillow", "SimpleITK", "pylint", "tqdm", "visdom", "pyyaml", "trixi", "batchgenerators", "psutil", "nested_lookup", "colorlover", "flask", "graphviz", "matplotlib", "seaborn", "scipy", "scipy.ndimage", "telegram", "portalocker", "plotly", "PIL", "umap", "tensorflow", "yaml", "chainer" ] # autodoc_mock_imports = [ # "torch.optim", # "torch.optim.lr_scheduler", # "yaml", # "numpy", # "torchvision", # "torchvision.datasets", # "torch", # "torch.nn", # "torch.nn.functional", # "skimage", # "skimage.io", # "skimage.transform", # "sklearn", # "sklearn.model_selection", # "jupyter", # "flake8" # "pytest-cov", # "autopep8", # "ipython", # "joblib", # "pillow", # "SimpleITK", # "pylint", # "tqdm", # "visdom", # "pyyaml", # "trixi", # "trixi.experiment", # "trixi.logger", # "trixi.util", # "batchgenerators", # "batchgenerators.dataloading", # "batchgenerators.dataloading.data_loader", # "batchgenerators.transforms", # "psutil", # "nested_lookup", # "colorlover", # "flask", # "graphviz", # "matplotlib", # "seaborn", # "scipy", # "scipy.ndimage", # "telegram", # "portalocker", # "plotly", # "PIL", # "umap", # "PIL.Image", # "tensorflow", # "tqdm.auto", # "trixi.logger.tensorboard", # "trixi.logger.tensorboard.tensorboardxlogger", # "sklearn.metrics", # ] ================================================ FILE: docs/custom_backend.rst ================================================ How To: Integrate your own Computation Backend ============================================== *Author: Justus Schock* *Date: 15.05.2019* This howto will take you on a trip through the ``delira`` internals, while we will see, how to add a custom computation backend on the examplaric case of the ``torch.jit`` or ``TorchScript`` backend Model Definitions ----------------- In order to implement a network, we will first have to define the network itself. In ``delira`` there is a single backend-specific implementation of an abstract network class for each of the backends. These interface classes are all based on the ``AbstractNetwork``-class, defining the major API. So let's start having a look at this class to see, what we will have to implement for our own backend. Of course we will have to implement an ``__init__`` defining our class. The ``__init__`` of ``AbstractNetwork`` (which should be called during our the ``__init__`` of our baseclass) accepts a number of kwargs and simply registers them to be ``init_kwargs``, so there is nothing we have to take care of. The next function to inspect is the ``__call__`` function, which makes the class callable and the docstrings indicate, that it should take care of our model's forward-pass. After the ``__call__`` we now have the ``closure`` function, which defines a single training step (including, but not limited to, forward-pass, calculation of losses and train-metrics, backward-pass and optimization). The last method to implement is the ``prepare_batch`` function which converts the input to a suitable format and the correct data-type and device. TorchScript Limitations ~~~~~~~~~~~~~~~~~~~~~~~ Since we want to implement an abstract network class for this specific backend, we should have a look on how to generally implement models in this backend. According the the `PyTorch docs `__ this works as follows: You can write TorchScript code directly using Python syntax. You do this using the ``torch.jit.script`` decorator (for functions) or ``torch.jit.script_method`` decorator (for methods) on subclasses of ``ScriptModule``. With this decorator the body of the annotated function is directly translated into TorchScript. TorchScript itself is a subset of the Python language, so not all features in Python work, but we provide enough functionality to compute on tensors and do control-dependent operations. Since our use-case is to implement the interface class for networks, we want to use the way of subclassing ``torch.jit.ScriptModule``, implement it's ``forward`` and use the ``torch.jit.script_method`` decorator on it. The example given in the very same docs for this case is: .. code:: ipython3 import torch class MyScriptModule(torch.jit.ScriptModule): def __init__(self, N, M): super().__init__() self.weight = torch.nn.Parameter(torch.rand(N, M)) @torch.jit.script_method def forward(self, input): return self.weight.mv(input) my_script_module = MyScriptModule(5, 3) input_tensor = torch.rand(3) my_script_module(input_tensor) .. parsed-literal:: tensor([0.4997, 0.2955, 0.1588, 0.1873, 0.4753], grad_fn=) Merging TorchScript into our Abstract Class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This little example gives us a few things, we have to do for a successful definition of our base class: **1.)** Our class has to subclass both, the ``AbstractNetwork`` and the ``torch.jit.ScriptModule`` classes. **2.)** We need to implement a ``forward`` method, which takes care of the forward-pass (as it's name indicates). **3.)** We don't have to take care of the backward-pass (thanks to ``PyTorch``'s and ``TorchScript``'s AutoGrad (which is a framework for automatic differentiation). **4.)** Since ``torch.jit.ScriptModule`` is callable (seen in the example), it already implements a ``__call__`` method and we may simply use this one. **5.)** The ``closure`` is completely network-dependent and thus has to remain an abstract method here. **6.)** The ``prepare_batch`` function also depends on the combination of network, inputs and loss functions to use, but we can at least give a prototype of such an function, which handles the devices correctly and converts everything to ``float`` Actual Implementation ~~~~~~~~~~~~~~~~~~~~~ Now, let's start with the actual implementation and do one function by another and keep the things in mind, we just discovered. Class Signature and ``__init__``-Method ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To subclass both networks, we cannot use the simple ``super().__init__`` approach, because we have to init both parent classes, so we do .. code:: python class AbstractTorchScriptNetwork(AbstractNetwork, torch.jit.ScriptModule): @abc.abstractmethod def __init__(self, optimize=True, **kwargs): """ Parameters ---------- optimize : bool whether to optimize the network graph or not; default: True **kwargs : additional keyword arguments (passed to :class:`AbstractNetwork`) """ torch.jit.ScriptModule.__init__(self, optimize=optimize) AbstractNetwork.__init__(self, **kwargs) instead. This ensures all parent classes to be initialized correctly. ``__call__``-Method ^^^^^^^^^^^^^^^^^^^ As mentioned above, the ``__call__`` method is very easy to implement, because we can simply use the implementation of our ``TorchScript`` base class like this: .. code:: python def __call__(self, *args, **kwargs): """ Calls Forward method Parameters ---------- *args : positional arguments (passed to `forward`) **kwargs : keyword arguments (passed to `forward`) Returns ------- Any result: module results of arbitrary type and number """ return torch.jit.ScriptModule.__call__(self, *args, **kwargs) This also ensures, that we can pass an arbitrary number or positional and keyword arguments of arbitrary types to it (which are all passed to the ``forward``-function). The advantage over directly calling the ``forward`` method here, is that the ``ScriptModule.__call__`` already does the handling of `forward-pre-hooks `__, `forward-hooks `__ and `backward-hooks `__. ``closure``-Method ^^^^^^^^^^^^^^^^^^ Since this method is highly model-dependant, we just don't implement it, which forces the user to implement it (since it is marked as an ``abstractmethod`` in ``AbstractExperiment``). ``prepare_batch``-Method ^^^^^^^^^^^^^^^^^^^^^^^^ The above mentioned prototype of pushing everything to the correct device and convert it to float looks like this: .. code:: python @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : torch.device device for network inputs output_device : torch.device device for network outputs Returns ------- dict dictionary containing data in correct type and shape and on correct device """ return_dict = {"data": torch.from_numpy(batch.pop("data")).to( input_device).to(torch.float)} for key, vals in batch.items(): return_dict[key] = torch.from_numpy(vals).to(output_device).to( torch.float) return return_dict Since we don't want to use any of the model's attributes here (and for conformity with the ``AbstractNetwork`` class), this method is defined as ``staticmethod``, meaning it is class-bound, not instance-bound. The ``closure`` method has to be a ``staticmethod`` too. ``forward``-Method ^^^^^^^^^^^^^^^^^^ The only thing left now, is the ``forward`` method, which is internally called by ``ScriptModule.__call__``. The bad news is: We currently can't implement it. Subclassing a ``ScriptModule`` to overwrite a function decorated with ``torch.jit.script_method`` is not (yet) supported, but will be soon, once `this PR `__ is merged and released. For now: you simply have to implement this method in your own network despite the missing of an abstract interface-method. Putting it all together ^^^^^^^^^^^^^^^^^^^^^^^ If we combine all the function implementations to one class, it looks like this: .. code:: python class AbstractTorchScriptNetwork(AbstractNetwork, torch.jit.ScriptModule): """ Abstract Interface Class for TorchScript Networks. For more information have a look at https://pytorch.org/docs/stable/jit.html#torchscript Warnings -------- In addition to the here defined API, a forward function must be implemented and decorated with ``@torch.jit.script_method`` """ @abc.abstractmethod def __init__(self, optimize=True, **kwargs): """ Parameters ---------- optimize : bool whether to optimize the network graph or not; default: True **kwargs : additional keyword arguments (passed to :class:`AbstractNetwork`) """ torch.jit.ScriptModule.__init__(self, optimize=optimize) AbstractNetwork.__init__(self, **kwargs) def __call__(self, *args, **kwargs): """ Calls Forward method Parameters ---------- *args : positional arguments (passed to `forward`) **kwargs : keyword arguments (passed to `forward`) Returns ------- Any result: module results of arbitrary type and number """ return torch.jit.ScriptModule.__call__(self, *args, **kwargs) @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : torch.device device for network inputs output_device : torch.device device for network outputs Returns ------- dict dictionary containing data in correct type and shape and on correct device """ return_dict = {"data": torch.from_numpy(batch.pop("data")).to( input_device).to(torch.float)} for key, vals in batch.items(): return_dict[key] = torch.from_numpy(vals).to(output_device).to( torch.float) return return_dict Saving and loading ------------------ Now that we have the ability to implement ``delira``-suitable TorchScript models, we want to store them on disk and load them again, so that we don't have to retrain them every time we want to use them. These I/O functions are usually located in ``delira.io``. Saving ~~~~~~ Our saving function utilizes multiple functions: ``torch.jit.save`` to simply save the model (including it's graph) and the ``save_checkpoint_torch`` function implemented for the ``PyTorch`` backend to store the trainer state, since ``TorchScript`` allows us to use plain ``PyTorch`` optimizers. The implementation of the function looks like this: .. code:: python def save_checkpoint_torchscript(file: str, model=None, optimizers={}, epoch=None, **kwargs): """ Save current checkpoint to two different files: 1.) ``file + "_model.ptj"``: Will include the state of the model (including the graph; this is the opposite to :func:`save_checkpoint`) 2.) ``file + "_trainer_state.pt"``: Will include the states of all optimizers and the current epoch (if given) Parameters ---------- file : str filepath the model should be saved to model : AbstractPyTorchJITNetwork or None the model which should be saved if None: empty dict will be saved as state dict optimizers : dict dictionary containing all optimizers epoch : int current epoch (will also be pickled) """ # remove file extension if given if any([file.endswith(ext) for ext in [".pth", ".pt", ".ptj"]]): file = file.rsplit(".", 1)[0] if isinstance(model, AbstractPyTorchJITNetwork): torch.jit.save(model, file + "_model.ptj") if optimizers or epoch is not None: save_checkpoint_torch(file + "_trainer_state.pt", None, optimizers=optimizers, epoch=epoch, **kwargs) Loading ~~~~~~~ To load a model, which has been saved to disk by this function we have to revert each part of it. We do this by using ``torch.jit.load`` for the model (and the graph) and ``load_checkpoint_torch`` by the ``PyTorch`` backend. The actual implementation is given here: .. code:: python def load_checkpoint_torchscript(file: str, **kwargs): """ Loads a saved checkpoint consisting of 2 files (see :func:`save_checkpoint_jit` for details) Parameters ---------- file : str filepath to a file containing a saved model **kwargs: Additional keyword arguments (passed to torch.load) Especially "map_location" is important to change the device the state_dict should be loaded to Returns ------- OrderedDict checkpoint state_dict """ # remove file extensions if any([file.endswith(ext) for ext in [".pth", ".pt", ".ptj"]]): file = file.rsplit(".", 1)[0] # load model if os.path.isfile(file + ".ptj"): model_file = file elif os.path.isfile(file + "_model.ptj"): model_file = file + "_model.ptj" else: raise ValueError("No Model File found for %s" % file) # load trainer state (if possible) trainer_file = model_file.replace("_model.ptj", "_trainer_state.pt") if os.path.isfile(trainer_file): trainer_state = load_checkpoint_torch(trainer_file, **kwargs) else: trainer_state = {"optimizer": {}, "epoch": None} trainer_state.update({"model": torch.jit.load(model_file)}) return trainer_state A Trainer to train ------------------ Now, that we can define and save/load our models, we want to train them. Luckily ``delira`` has already implemented a very modular backend-agnostic trainer (the ``BaseNetworkTrainer``) and build upon this a ``PyTorchNetworkTrainer``. Since the training process in PyTorch and TorchScript is nearly the same, we can just extend the ``PyTorchNetworkTrainer``. Usually one would have to extend the ``BaseNetworkTrainer`` to provide some backend specific functions (like necessary initializations, optimizer setup, seeding etc.). To see how this is done, you could either have a look at the ``PyTorchNetworkTrainer`` or the ``TfNetworkTrainer`` for tensorflow, which are both following this principle. Usually the only stuff to completely change is the loading/saving behavior and the ``_setup`` function, which defines the backend-specific initialization. Some other functions may have to be extended (by implementing the extension and calling the parent-classes function). Things to change: ~~~~~~~~~~~~~~~~~ By Subclassing the ``PyTorchNetworkTrainer`` we have to change the following things: - The trainer's default arguments - The behavior for trying to resume a previous training - The saving, loading and updating behavior We will access this one by one: The Default Arguments ^^^^^^^^^^^^^^^^^^^^^ We want to use ``AbstractTorchScriptNetwork``\ s instead of ``AbstractPyTorchNetwork``\ s here and we have to change the behavior if passing multiple GPUs, because currently Multi-GPU training is not supported by ``TorchScript``. To do this: we implement the functions ``__init__``, apply our changes and forward these changes to the call of the base-classes ``__init__`` like this (omitted docstrings for the sake of shortness): .. code:: python class TorchScriptNetworkTrainer(PyTorchNetworkTrainer): def __init__(self, network: AbstractTorchScriptNetwork, save_path: str, key_mapping, losses=None, optimizer_cls=None, optimizer_params={}, train_metrics={}, val_metrics={}, lr_scheduler_cls=None, lr_scheduler_params={}, gpu_ids=[], save_freq=1, optim_fn=create_optims_default, logging_type="tensorboardx", logging_kwargs={}, fold=0, callbacks=[], start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_torch_tensor_to_npy, criterions=None, val_freq=1, **kwargs): if len(gpu_ids) > 1: # only use first GPU due to # https://github.com/pytorch/pytorch/issues/15421 gpu_ids = [gpu_ids[0]] logging.warning("Multiple GPUs specified. Torch JIT currently " "supports only single-GPU training. " "Switching to use only the first GPU for now...") super().__init__(network=network, save_path=save_path, key_mapping=key_mapping, losses=losses, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, train_metrics=train_metrics, val_metrics=val_metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, gpu_ids=gpu_ids, save_freq=save_freq, optim_fn=optim_fn, logging_type=logging_type, logging_kwargs=logging_kwargs, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, mixed_precision=False, mixed_precision_kwargs={}, criterions=criterions, val_freq=val_freq, **kwargs ) Resuming Training ^^^^^^^^^^^^^^^^^ For resuming the training, we have to completely change the ``try_resume_training`` function and cannot reuse the parent's implementation of it. Thus, we don't call ``super().try_resume_training`` here, but completely reimplement it from scratch: .. code:: python def try_resume_training(self): """ Load the latest state of a previous training if possible """ # Load latest epoch file if available if os.path.isdir(self.save_path): # check all files in directory starting with "checkpoint" and # not ending with "_best.pth" files = [x for x in os.listdir(self.save_path) if os.path.isfile(os.path.join(self.save_path, x)) and x.startswith("checkpoint") and not x.endswith("_best.ptj") ] # if list is not empty: load previous state if files: latest_epoch = max([ int(x.rsplit("_", 1)[-1].rsplit(".", 1)[0]) for x in files]) latest_state_path = os.path.join(self.save_path, "checkpoint_epoch_%d.ptj" % latest_epoch) # if pth file does not exist, load pt file instead if not os.path.isfile(latest_state_path): latest_state_path = latest_state_path[:-1] logger.info("Attempting to load state from previous \ training from %s" % latest_state_path) try: self.update_state(latest_state_path) except KeyError: logger.warning("Previous State could not be loaded, \ although it exists.Training will be \ restarted") Saving and Loading ^^^^^^^^^^^^^^^^^^ Now we need to change the saving and loading behavior. As always we try to reuse as much code as possible to avoid code duplication. Saving '''''' To save the current training state, we simply call the ``save_checkpoint_torchscript`` function: .. code:: python def save_state(self, file_name, epoch, **kwargs): """ saves the current state via :func:`delira.io.torch.save_checkpoint_jit` Parameters ---------- file_name : str filename to save the state to epoch : int current epoch (will be saved for mapping back) **kwargs : keyword arguments """ if file_name.endswith(".pt") or file_name.endswith(".pth"): file_name = file_name.rsplit(".", 1)[0] save_checkpoint_torchscript(file_name, self.module, self.optimizers, **kwargs) Loading ''''''' To load the training state, we simply return the state loaded by ``load_checkpoint_torchscript``. Since we don't use any arguments of the trainer itself here, the function is a ``staticmethod``: .. code:: python @staticmethod def load_state(file_name, **kwargs): """ Loads the new state from file via :func:`delira.io.torch.load_checkpoint:jit` Parameters ---------- file_name : str the file to load the state from **kwargs : keyword arguments Returns ------- dict new state """ return load_checkpoint_torchscript(file_name, **kwargs) Updating '''''''' After we loaded the new state, we need to update the trainer's internal state by this new state. We do this by directly assigning the model here (since the graph was stored/loaded too) instead of only updating the state\_dict and calling the parent-classes method afterwards: .. code:: python def _update_state(self, new_state): """ Update the state from a given new state Parameters ---------- new_state : dict new state to update internal state from Returns ------- :class:`PyTorchNetworkJITTrainer` the trainer with a modified state """ if "model" in new_state: self.module = new_state.pop("model").to(self.input_device) return super()._update_state(new_state) A Whole Trainer ~~~~~~~~~~~~~~~ After combining all the changes above, we finally get our new trainer as: .. code:: python class TorchScriptNetworkTrainer(PyTorchNetworkTrainer): def __init__(self, network: AbstractTorchScriptNetwork, save_path: str, key_mapping, losses=None, optimizer_cls=None, optimizer_params={}, train_metrics={}, val_metrics={}, lr_scheduler_cls=None, lr_scheduler_params={}, gpu_ids=[], save_freq=1, optim_fn=create_optims_default, logging_type="tensorboardx", logging_kwargs={}, fold=0, callbacks=[], start_epoch=1, metric_keys=None, convert_batch_to_npy_fn=convert_torch_tensor_to_npy, criterions=None, val_freq=1, **kwargs): """ Parameters ---------- network : :class:`AbstractPyTorchJITNetwork` the network to train save_path : str path to save networks to key_mapping : dict a dictionary containing the mapping from the ``data_dict`` to the actual model's inputs. E.g. if a model accepts one input named 'x' and the data_dict contains one entry named 'data' this argument would have to be ``{'x': 'data'}`` losses : dict dictionary containing the training losses optimizer_cls : subclass of tf.train.Optimizer optimizer class implementing the optimization algorithm of choice optimizer_params : dict keyword arguments passed to optimizer during construction train_metrics : dict, optional metrics, which will be evaluated during train phase (should work on framework's tensor types) val_metrics : dict, optional metrics, which will be evaluated during test phase (should work on numpy arrays) lr_scheduler_cls : Any learning rate schedule class: must implement step() method lr_scheduler_params : dict keyword arguments passed to lr scheduler during construction gpu_ids : list list containing ids of GPUs to use; if empty: use cpu instead Currently ``torch.jit`` only supports single GPU-Training, thus only the first GPU will be used if multiple GPUs are passed save_freq : int integer specifying how often to save the current model's state. State is saved every state_freq epochs optim_fn : function creates a dictionary containing all necessary optimizers logging_type : str or callable the type of logging. If string: it must be one of ["visdom", "tensorboardx"] If callable: it must be a logging handler class logging_kwargs : dict dictionary containing all logging keyword arguments fold : int current cross validation fold (0 per default) callbacks : list initial callbacks to register start_epoch : int epoch to start training at metric_keys : dict dict specifying which batch_dict entry to use for which metric as target; default: None, which will result in key "label" for all metrics convert_batch_to_npy_fn : type, optional function converting a batch-tensor to numpy, per default this is a function, which detaches the tensor, moves it to cpu and the calls ``.numpy()`` on it mixed_precision : bool whether to use mixed precision or not (False per default) mixed_precision_kwargs : dict additional keyword arguments for mixed precision val_freq : int validation frequency specifying how often to validate the trained model (a value of 1 denotes validating every epoch, a value of 2 denotes validating every second epoch etc.); defaults to 1 **kwargs : additional keyword arguments """ if len(gpu_ids) > 1: # only use first GPU due to # https://github.com/pytorch/pytorch/issues/15421 gpu_ids = [gpu_ids[0]] logging.warning("Multiple GPUs specified. Torch JIT currently " "supports only single-GPU training. " "Switching to use only the first GPU for now...") super().__init__(network=network, save_path=save_path, key_mapping=key_mapping, losses=losses, optimizer_cls=optimizer_cls, optimizer_params=optimizer_params, train_metrics=train_metrics, val_metrics=val_metrics, lr_scheduler_cls=lr_scheduler_cls, lr_scheduler_params=lr_scheduler_params, gpu_ids=gpu_ids, save_freq=save_freq, optim_fn=optim_fn, logging_type=logging_type, logging_kwargs=logging_kwargs, fold=fold, callbacks=callbacks, start_epoch=start_epoch, metric_keys=metric_keys, convert_batch_to_npy_fn=convert_batch_to_npy_fn, mixed_precision=False, mixed_precision_kwargs={}, criterions=criterions, val_freq=val_freq, **kwargs ) def try_resume_training(self): """ Load the latest state of a previous training if possible """ # Load latest epoch file if available if os.path.isdir(self.save_path): # check all files in directory starting with "checkpoint" and # not ending with "_best.pth" files = [x for x in os.listdir(self.save_path) if os.path.isfile(os.path.join(self.save_path, x)) and x.startswith("checkpoint") and not x.endswith("_best.ptj") ] # if list is not empty: load previous state if files: latest_epoch = max([ int(x.rsplit("_", 1)[-1].rsplit(".", 1)[0]) for x in files]) latest_state_path = os.path.join(self.save_path, "checkpoint_epoch_%d.ptj" % latest_epoch) # if pth file does not exist, load pt file instead if not os.path.isfile(latest_state_path): latest_state_path = latest_state_path[:-1] logger.info("Attempting to load state from previous \ training from %s" % latest_state_path) try: self.update_state(latest_state_path) except KeyError: logger.warning("Previous State could not be loaded, \ although it exists.Training will be \ restarted") def save_state(self, file_name, epoch, **kwargs): """ saves the current state via :func:`delira.io.torch.save_checkpoint_jit` Parameters ---------- file_name : str filename to save the state to epoch : int current epoch (will be saved for mapping back) **kwargs : keyword arguments """ if file_name.endswith(".pt") or file_name.endswith(".pth"): file_name = file_name.rsplit(".", 1)[0] save_checkpoint_torchscript(file_name, self.module, self.optimizers, **kwargs) @staticmethod def load_state(file_name, **kwargs): """ Loads the new state from file via :func:`delira.io.torch.load_checkpoint:jit` Parameters ---------- file_name : str the file to load the state from **kwargs : keyword arguments Returns ------- dict new state """ return load_checkpoint_torchscript(file_name, **kwargs) def _update_state(self, new_state): """ Update the state from a given new state Parameters ---------- new_state : dict new state to update internal state from Returns ------- :class:`PyTorchNetworkJITTrainer` the trainer with a modified state """ if "model" in new_state: self.module = new_state.pop("model").to(self.input_device) return super()._update_state(new_state) Wrapping it all in an Experiment -------------------------------- To have access to methods like a K-Fold (and the not yet finished) hyperparameter tuning, we need to wrap the trainer in an Experiment. We will use the same approach as we did for implementing the trainer: Extending an already provided class. This time we extend the ``PyTorchExperiment`` which itself extends the ``BaseExperiment`` by some backend-specific defaults, types and seeds. Our whole class definition just changes the default arguments of the ``PyTorchExperiment`` and thus, we only have to implenent it's ``__init__``: .. code:: python class TorchScriptExperiment(PyTorchExperiment): def __init__(self, params: typing.Union[str, Parameters], model_cls: AbstractTorchScriptNetwork, # not AbstractPyTorchNetwork anymore n_epochs=None, name=None, save_path=None, key_mapping=None, val_score_key=None, optim_builder=create_optims_default_pytorch, checkpoint_freq=1, trainer_cls=TorchScriptNetworkTrainer, # not PyTorchNetworkTrainer anymore **kwargs): """ Parameters ---------- params : :class:`Parameters` or str the training parameters, if string is passed, it is treated as a path to a pickle file, where the parameters are loaded from model_cls : Subclass of :class:`AbstractTorchScriptNetwork` the class implementing the model to train n_epochs : int or None the number of epochs to train, if None: can be specified later during actual training name : str or None the Experiment's name save_path : str or None the path to save the results and checkpoints to. if None: Current working directory will be used key_mapping : dict mapping between data_dict and model inputs (necessary for prediction with :class:`Predictor`-API), if no keymapping is given, a default key_mapping of {"x": "data"} will be used here val_score_key : str or None key defining which metric to use for validation (determining best model and scheduling lr); if None: No validation-based operations will be done (model might still get validated, but validation metrics can only be logged and not used further) optim_builder : function Function returning a dict of backend-specific optimizers. defaults to :func:`create_optims_default_pytorch` checkpoint_freq : int frequency of saving checkpoints (1 denotes saving every epoch, 2 denotes saving every second epoch etc.); default: 1 trainer_cls : subclass of :class:`TorchScriptNetworkTrainer` the trainer class to use for training the model, defaults to :class:`TorchScriptNetworkTrainer` **kwargs : additional keyword arguments """ super().__init__(params=params, model_cls=model_cls, n_epochs=n_epochs, name=name, save_path=save_path, key_mapping=key_mapping, val_score_key=val_score_key, optim_builder=optim_builder, checkpoint_freq=checkpoint_freq, trainer_cls=trainer_cls, **kwargs) Testing it ---------- Now that we finished the implementation of the backend (which is the outermost wrapper; Congratulations!), we can just test it. We'll use a very simple network and test it with dummy data. We also only test the ``run`` and ``test`` functionality of our experiment, since everything else is just used for setting up the internal state or a composition of these two methods and already tested: Now, let's just define our dataset, instantiate it three times (for training, validation and testing) and wrap each of them into a ``DataManager``: .. code:: ipython3 from delira.data_loading import AbstractDataset from delira.data_loading import DataManager class DummyDataset(AbstractDataset): def __init__(self, length): super().__init__(None, None) self.length = length def __getitem__(self, index): return {"data": np.random.rand(32), "label": np.random.randint(0, 1, 1)} def __len__(self): return self.length def get_sample_from_index(self, index): return self.__getitem__(index) dset_train = DummyDataset(500) dset_val = DummyDataset(50) dset_test = DummyDataset(10) # training, validation and testing with #a batchsize of 16, 1 loading thread and no transformations. dmgr_train = DataManager(dset_train, 16, 1, None) dmgr_val = DataManager(dset_val, 16, 1, None) dmgr_test = DataManager(dset_test, 16, 1, None) Now, that we have created three datasets, we need to define our small dummy network. We do this by subclassing ``delira.models.AbstractTorchScriptNetwork`` (which is the exactly implementation given above, be we need to use the internal one, because there are some typechecks against this one). .. code:: ipython3 from delira.models import AbstractTorchScriptNetwork import torch class DummyNetworkTorchScript(AbstractTorchScriptNetwork): __constants__ = ["module"] def __init__(self): super().__init__() self.module = self._build_model(32, 1) @torch.jit.script_method def forward(self, x): return {"pred": self.module(x)} @staticmethod def prepare_batch(batch_dict, input_device, output_device): return {"data": torch.from_numpy(batch_dict["data"] ).to(input_device, torch.float), "label": torch.from_numpy(batch_dict["label"] ).to(output_device, torch.float)} @staticmethod def closure(model: AbstractTorchScriptNetwork, data_dict: dict, optimizers: dict, losses={}, metrics={}, fold=0, **kwargs): """ closure method to do a single backpropagation step Parameters ---------- model : trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters losses : dict dict holding the losses to calculate errors (gradients from different losses will be accumulated) metrics : dict dict holding the metrics to calculate fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Metric values (with same keys as input dict metrics) dict Loss values (with same keys as input dict losses) list Arbitrary number of predictions as torch.Tensor Raises ------ AssertionError if optimizers or losses are empty or the optimizers are not specified """ assert (optimizers and losses) or not optimizers, \ "Criterion dict cannot be emtpy, if optimizers are passed" loss_vals = {} metric_vals = {} total_loss = 0 # choose suitable context manager: if optimizers: context_man = torch.enable_grad else: context_man = torch.no_grad with context_man(): inputs = data_dict.pop("data") preds = model(inputs) if data_dict: for key, crit_fn in losses.items(): _loss_val = crit_fn(preds["pred"], *data_dict.values()) loss_vals[key] = _loss_val.item() total_loss += _loss_val with torch.no_grad(): for key, metric_fn in metrics.items(): metric_vals[key] = metric_fn( preds["pred"], *data_dict.values()).item() if optimizers: optimizers['default'].zero_grad() # perform loss scaling via apex if half precision is enabled with optimizers["default"].scale_loss(total_loss) as scaled_loss: scaled_loss.backward() optimizers['default'].step() else: # add prefix "val" in validation mode eval_loss_vals, eval_metrics_vals = {}, {} for key in loss_vals.keys(): eval_loss_vals["val_" + str(key)] = loss_vals[key] for key in metric_vals: eval_metrics_vals["val_" + str(key)] = metric_vals[key] loss_vals = eval_loss_vals metric_vals = eval_metrics_vals return metric_vals, loss_vals, {k: v.detach() for k, v in preds.items()} @staticmethod def _build_model(in_channels, n_outputs): return torch.nn.Sequential( torch.nn.Linear(in_channels, 64), torch.nn.ReLU(), torch.nn.Linear(64, n_outputs) ) Now, that we defined our model, let's just test, if we really can forward some tensors through it. We will just use some random ``torch.Tensors`` (created by ``torch.rand``). Since our model accepts 1d inputs of length 32, we need to pass 2d tensors to it (the additional dimension is the batch-dimension). .. code:: ipython3 input_tensor_single = torch.rand(1, 32) # use a single-sample batch (batchsize=1) here input_tensor_batched = torch.rand(4, 32) # use a batch with batchsize 4 here # create model instance model = DummyNetworkTorchScript() outputs = {"single": model(input_tensor_single)["pred"], "batched": model(input_tensor_batched)["pred"]} outputs .. parsed-literal:: {'single': tensor([[-0.1934]], grad_fn=), 'batched': tensor([[-0.0525], [-0.0884], [-0.1492], [-0.0431]], grad_fn=)} .. code:: ipython3 from sklearn.metrics import mean_absolute_error from delira.training.callbacks import ReduceLROnPlateauCallbackPyTorch from delira.training import Parameters params = Parameters(fixed_params={ "model": {}, "training": { "losses": {"CE": torch.nn.BCEWithLogitsLoss()}, "optimizer_cls": torch.optim.Adam, "optimizer_params": {"lr": 1e-3}, "num_epochs": 2, "val_metrics": {"mae": mean_absolute_error}, "lr_sched_cls": ReduceLROnPlateauCallbackPyTorch, "lr_sched_params": {"mode": "min"} } } ) from delira.training import TorchScriptExperiment exp = TorchScriptExperiment(params, DummyNetworkTorchScript, key_mapping={"x": "data"}, val_score_key="mae", val_score_mode="min") trained_model = exp.run(dmgr_train, dmgr_val) exp.test(trained_model, dmgr_test, params.nested_get("val_metrics")) Congratulations. You have implemented your first fully-workable ``delira``-Backend. Wasn't that hard, was it? Before you start implementing backends for all the other frameworks out there, let me just give you some advices: - You should test everything you implement or extend - Make sure, to keep your backend-specification in mind - Always follow the API of already existing backends. If this is not possible: test this extensively - If you extend another backend (like we did here; we extended the ``PyTorch``-backend for ``TorchScript``), make sure, that the "base-backend" is always installed (best if they can only be installed together) - If you have questions regarding the implementation, don't hestiate to contact us. ================================================ FILE: docs/gan_pytorch.rst ================================================ Generative Adversarial Nets with Delira - A very short introduction =================================================================== *Author: Justus Schock* *Date: 04.12.2018* This Example shows how to set up a basic GAN PyTorch experiment and Visdom Logging Environment. HyperParameters --------------- Let's first setup the essential hyperparameters. We will use ``delira``'s ``Parameters``-class for this: .. code:: ipython3 logger = None import torch from delira.training import Parameters params = Parameters(fixed_params={ "model": { "n_channels": 1, "noise_length": 10 }, "training": { "batch_size": 64, # batchsize to use "num_epochs": 10, # number of epochs to train "optimizer_cls": torch.optim.Adam, # optimization algorithm to use "optimizer_params": {'lr': 1e-3}, # initialization parameters for this algorithm "losses": {"L1": torch.nn.L1Loss()}, # the loss function "lr_sched_cls": None, # the learning rate scheduling algorithm to use "lr_sched_params": {}, # the corresponding initialization parameters "metrics": {} # and some evaluation metrics } }) Since we specified ``torch.nn.L1Loss`` as criterion and ``torch.nn.MSELoss`` as metric, they will be both calculated for each batch, but only the criterion will be used for backpropagation. Since we have a simple generative task, this should be sufficient. We will train our network with a batchsize of 64 by using ``Adam`` as optimizer of choice. Logging and Visualization ------------------------- To get a visualization of our results, we should monitor them somehow. For logging we will use ``Visdom``. To start a visdom server you need to execute the following command inside an environment which has visdom installed: .. code:: shell visdom -port=9999 This will start a visdom server on port 9999 of your machine and now we can start to configure our logging environment. To view your results you can open http://localhost:9999 in your browser. .. code:: ipython3 from trixi.logger import PytorchVisdomLogger from delira.logging import TrixiHandler import logging logger_kwargs = { 'name': 'GANExampleLogger', # name of our logging environment 'port': 9999 # port on which our visdom server is alive } logger_cls = PytorchVisdomLogger # configure logging module (and root logger) logging.basicConfig(level=logging.INFO, handlers=[TrixiHandler(logger_cls, **logger_kwargs)]) # derive logger from root logger # (don't do `logger = logging.Logger("...")` since this will create a new # logger which is unrelated to the root logger logger = logging.getLogger("Test Logger") Since a single visdom server can run multiple environments, we need to specify a (unique) name for our environment and need to tell the logger, on which port it can find the visdom server. Data Preparation ---------------- Loading ~~~~~~~ Next we will create a small train and validation set (based on ``torchvision`` MNIST): .. code:: ipython3 from delira.data_loading import TorchvisionClassificationDataset dataset_train = TorchvisionClassificationDataset("mnist", # which dataset to use train=True, # use trainset img_shape=(224, 224) # resample to 224 x 224 pixels ) dataset_val = TorchvisionClassificationDataset("mnist", train=False, img_shape=(224, 224) ) Augmentation ~~~~~~~~~~~~ For Data-Augmentation we will apply a few transformations: .. code:: ipython3 from batchgenerators.transforms import RandomCropTransform, \ ContrastAugmentationTransform, Compose from batchgenerators.transforms.spatial_transforms import ResizeTransform from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform transforms = Compose([ RandomCropTransform(200), # Perform Random Crops of Size 200 x 200 pixels ResizeTransform(224), # Resample these crops back to 224 x 224 pixels ContrastAugmentationTransform(), # randomly adjust contrast MeanStdNormalizationTransform(mean=[0.5], std=[0.5])]) With these transformations we can now wrap our datasets into datamanagers: .. code:: ipython3 from delira.data_loading import DataManager, SequentialSampler, RandomSampler manager_train = DataManager(dataset_train, params.nested_get("batch_size"), transforms=transforms, sampler_cls=RandomSampler, n_process_augmentation=4) manager_val = DataManager(dataset_val, params.nested_get("batch_size"), transforms=transforms, sampler_cls=SequentialSampler, n_process_augmentation=4) Training -------- After we have done that, we can finally specify our experiment and run it. We will therfore use the already implemented ``GenerativeAdversarialNetworkBasePyTorch`` which is basically a vanilla DCGAN: .. code:: ipython3 import warnings warnings.simplefilter("ignore", UserWarning) # ignore UserWarnings raised by dependency code warnings.simplefilter("ignore", FutureWarning) # ignore FutureWarnings raised by dependency code from delira.training import PyTorchExperiment from delira.training.train_utils import create_optims_gan_default_pytorch from delira.models.gan import GenerativeAdversarialNetworkBasePyTorch if logger is not None: logger.info("Init Experiment") experiment = PyTorchExperiment(params, GenerativeAdversarialNetworkBasePyTorch, name="GANExample", save_path="./tmp/delira_Experiments", optim_builder=create_optims_gan_default_pytorch, gpu_ids=[0]) experiment.save() model = experiment.run(manager_train, manager_val) Congratulations, you have now trained your first Generative Adversarial Model using ``delira``. See Also -------- For a more detailed explanation have a look at \* `the introduction tutorial `__ \* `the 2d segmentation example `__ \* `the 3d segmentation example `__ \* `the classification example `__ ================================================ FILE: docs/getting_started.rst ================================================ Getting started =============== Backends -------- Before installing ``delira``, you have to choose a suitable backend. ``delira`` handles backends as optional dependencies and tries to escape all uses of a not-installed backend. The currently supported backends are: * `torch `_ (recommended, since it is the most tested backend): Suffix ``torch`` .. note:: ``delira`` supports mixed-precision training via `apex `_, but ``apex`` must be installed separately * `torchscript `_ : Suffix ``torchscript`` .. note:: ``delira`` with ``torchscript`` backend dies currently not support Multi-GPU training. * `tensorflow eager execution `_: Suffix ``tensorflow`` .. note:: ``delira`` with ``tensorflow eager`` backend dies currently not support Multi-GPU training. * `tensorflow graph mode `_: Suffix ``tensorflow`` .. note:: ``delira`` with ``tensorflow graph`` backend dies currently not support Multi-GPU training. * `chainer `_: Suffix ``chainer`` * `scikit-learn `_: No Suffix * None: No Suffix * All (installs all registered backends and their dependencies; not recommended, since this will install many large packages): Suffix ``full`` .. note:: Depending on the backend, some functionalities may not be available for you. If you want to ensure, you can use each functionality, please use the ``full`` option, since it installs all backends .. note:: If you want to add a backend like `CNTK `_, `MXNET `_ or something similar, please open an issue for that and we will guide you during that process (don't worry, it is not much effort at all). Installation ------------ =================== =================================== ================================================================================================= ====================================================================================================================== Backend Binary Installation Source Installation Notes =================== =================================== ================================================================================================= ====================================================================================================================== None ``pip install delira`` ``pip install git+https://github.com/delira-dev/delira.git`` Training not possible if backend is not installed separately `torch`_ ``pip install delira[torch]`` ``git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[torch]`` ``delira`` with ``torch`` backend supports mixed-precision training via `NVIDIA/apex`_ (must be installed separately). `torchscript`_ ``pip install delira[torchscript]`` ``git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[torchscript]`` The ``torchscript`` backend currently supports only single-GPU-training `tensorflow eager`_ ``pip install delira[tensorflow]`` ``git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[tensorflow]`` the ``tensorflow`` backend is still very experimental and lacks some `features`_ `tensorflow graph`_ ``pip install delira[tensorflow]`` ``git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[tensorflow]`` the ``tensorflow`` backend is still very experimental and lacks some `features`_ `scikit-learn`_ ``pip install delira`` ``pip install git+https://github.com/delira-dev/delira.git`` / `chainer`_ ``pip install delira[chainer]`` ``git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[chainer]`` / Full ``pip install delira[full]`` ``git clone https://github.com/delira-dev/delira.git && cd delira && pip install .[full]`` All backends will be installed =================== =================================== ================================================================================================= ====================================================================================================================== .. _torch: https://pytorch.org .. _NVIDIA/apex: https://github.com/NVIDIA/apex.git .. _torchscript: https://pytorch.org/docs/stable/jit.html .. _tensorflow eager: https://www.tensorflow.org/ .. _features: https://github.com/delira-dev/delira/issues/47 .. _tensorflow graph: https://www.tensorflow.org/ .. _scikit-learn: https://scikit-learn.org/stable/ .. _chainer: https://chainer.org/ ================================================ FILE: docs/index.rst ================================================ .. delira documentation master file, created by sphinx-quickstart on Sat Dec 1 20:56:35 2018. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. ===================================================================== delira - A Backend Agnostic High Level Deep Learning Library ===================================================================== .. toctree:: :maxdepth: 2 :caption: Getting Started getting_started .. toctree:: :maxdepth: 2 :caption: Tutorials: tutorial_delira classification_pytorch gan_pytorch segmentation_2d_pytorch segmentation_3d_pytorch custom_backend .. toctree:: :maxdepth: 10 :titlesonly: :caption: API Documentation: _api/_build/modules GitHub Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/requirements.txt ================================================ sphinx==1.8.4 sphinx-rtd-theme ================================================ FILE: docs/segmentation_2d_pytorch.rst ================================================ Segmentation in 2D using U-Nets with Delira - A very short introduction ======================================================================= *Author: Justus Schock, Alexander Moriz* *Date: 17.12.2018* This Example shows how use the U-Net implementation in Delira with PyTorch. Let's first setup the essential hyperparameters. We will use ``delira``'s ``Parameters``-class for this: .. code:: ipython3 logger = None import torch from delira.training import Parameters params = Parameters(fixed_params={ "model": { "in_channels": 1, "num_classes": 4 }, "training": { "batch_size": 64, # batchsize to use "num_epochs": 10, # number of epochs to train "optimizer_cls": torch.optim.Adam, # optimization algorithm to use "optimizer_params": {'lr': 1e-3}, # initialization parameters for this algorithm "losses": {"CE": torch.nn.CrossEntropyLoss()}, # the loss function "lr_sched_cls": None, # the learning rate scheduling algorithm to use "lr_sched_params": {}, # the corresponding initialization parameters "metrics": {} # and some evaluation metrics } }) Since we did not specify any metric, only the ``CrossEntropyLoss`` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using ``Adam`` as optimizer of choice. Logging and Visualization ------------------------- To get a visualization of our results, we should monitor them somehow. For logging we will use ``Visdom``. To start a visdom server you need to execute the following command inside an environment which has visdom installed: .. code:: shell visdom -port=9999 This will start a visdom server on port 9999 of your machine and now we can start to configure our logging environment. To view your results you can open http://localhost:9999 in your browser. .. code:: ipython3 from trixi.logger import PytorchVisdomLogger from delira.logging import TrixiHandler import logging logger_kwargs = { 'name': 'ClassificationExampleLogger', # name of our logging environment 'port': 9999 # port on which our visdom server is alive } logger_cls = PytorchVisdomLogger # configure logging module (and root logger) logging.basicConfig(level=logging.INFO, handlers=[TrixiHandler(logger_cls, **logger_kwargs)]) # derive logger from root logger # (don't do `logger = logging.Logger("...")` since this will create a new # logger which is unrelated to the root logger logger = logging.getLogger("Test Logger") Since a single visdom server can run multiple environments, we need to specify a (unique) name for our environment and need to tell the logger, on which port it can find the visdom server. Data Praparation ---------------- Loading ~~~~~~~ Next we will create a small train and validation set (in this case they will be the same to show the overfitting capability of the UNet). Our data is a brain MR-image thankfully provided by the `FSL `__ in their `introduction `__. We first download the data and extract the T1 image and the corresponding segmentation: .. code:: ipython3 from io import BytesIO from zipfile import ZipFile from urllib.request import urlopen resp = urlopen("http://www.fmrib.ox.ac.uk/primers/intro_primer/ExBox3/ExBox3.zip") zipfile = ZipFile(BytesIO(resp.read())) #zipfile_list = zipfile.namelist() #print(zipfile_list) img_file = zipfile.extract("ExBox3/T1_brain.nii.gz") mask_file = zipfile.extract("ExBox3/T1_brain_seg.nii.gz") Now, we load the image and the mask (they are both 3D), convert them to a 32-bit floating point numpy array and ensure, they have the same shape (i.e. that for each voxel in the image, there is a voxel in the mask): .. code:: ipython3 import SimpleITK as sitk import numpy as np # load image and mask img = sitk.GetArrayFromImage(sitk.ReadImage(img_file)) img = img.astype(np.float32) mask = mask = sitk.GetArrayFromImage(sitk.ReadImage(mask_file)) mask = mask.astype(np.float32) assert mask.shape == img.shape print(img.shape) By querying the unique values in the mask, we get the following: .. code:: ipython3 np.unique(mask) This means, there are 4 classes (background and 3 types of tissue) in our sample. Since we want to do a 2D segmentation, we extract a single slice out of the image and the mask (we choose slice 100 here) and plot it: .. code:: ipython3 import matplotlib.pyplot as plt # load single slice img_slice = img[:, :, 100] mask_slice = mask[:, :, 100] # plot slices plt.figure(1, figsize=(15,10)) plt.subplot(121) plt.imshow(img_slice, cmap="gray") plt.colorbar(fraction=0.046, pad=0.04) plt.subplot(122) plt.imshow(mask_slice, cmap="gray") plt.colorbar(fraction=0.046, pad=0.04) plt.show() To load the data, we have to use a ``Dataset``. The following defines a very simple dataset, accepting an image slice, a mask slice and the number of samples. It always returns the same sample until ``num_samples`` samples have been returned. .. code:: ipython3 from delira.data_loading import AbstractDataset class CustomDataset(AbstractDataset): def __init__(self, img, mask, num_samples=1000): super().__init__(None, None, None, None) self.data = {"data": img.reshape(1, *img.shape), "label": mask.reshape(1, *mask.shape)} self.num_samples = num_samples def __getitem__(self, index): return self.data def __len__(self): return self.num_samples Now, we can finally instantiate our datasets: .. code:: ipython3 dataset_train = CustomDataset(img_slice, mask_slice, num_samples=10000) dataset_val = CustomDataset(img_slice, mask_slice, num_samples=1) Augmentation ~~~~~~~~~~~~ For Data-Augmentation we will apply a few transformations: .. code:: ipython3 from batchgenerators.transforms import RandomCropTransform, \ ContrastAugmentationTransform, Compose from batchgenerators.transforms.spatial_transforms import ResizeTransform from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform transforms = Compose([ RandomCropTransform(150, label_key="label"), # Perform Random Crops of Size 150 x 150 pixels ResizeTransform(224, label_key="label"), # Resample these crops back to 224 x 224 pixels ContrastAugmentationTransform(), # randomly adjust contrast MeanStdNormalizationTransform(mean=[img_slice.mean()], std=[img_slice.std()])]) # use concrete values since we only have one sample (have to estimate it over whole dataset otherwise) With these transformations we can now wrap our datasets into datamanagers: .. code:: ipython3 from delira.data_loading import DataManager, SequentialSampler, RandomSampler manager_train = DataManager(dataset_train, params.nested_get("batch_size"), transforms=transforms, sampler_cls=RandomSampler, n_process_augmentation=4) manager_val = DataManager(dataset_val, params.nested_get("batch_size"), transforms=transforms, sampler_cls=SequentialSampler, n_process_augmentation=4) Training -------- After we have done that, we can finally specify our experiment and run it. We will therfore use the already implemented ``UNet2dPytorch``: .. code:: ipython3 import warnings warnings.simplefilter("ignore", UserWarning) # ignore UserWarnings raised by dependency code warnings.simplefilter("ignore", FutureWarning) # ignore FutureWarnings raised by dependency code from delira.training import PyTorchExperiment from delira.training.train_utils import create_optims_default_pytorch from delira.models.segmentation import UNet2dPyTorch if logger is not None: logger.info("Init Experiment") experiment = PyTorchExperiment(params, UNet2dPyTorch, name="Segmentation2dExample", save_path="./tmp/delira_Experiments", optim_builder=create_optims_default_pytorch, gpu_ids=[0], mixed_precision=True) experiment.save() model = experiment.run(manager_train, manager_val) See Also -------- For a more detailed explanation have a look at \* `the introduction tutorial `__ \* `the classification example `__ \* `the 3d segmentation example `__ \* `the generative adversarial example `__ ================================================ FILE: docs/segmentation_3d_pytorch.rst ================================================ Segmentation in 3D using U-Nets with Delira - A very short introduction ======================================================================= *Author: Justus Schock, Alexander Moriz* *Date: 17.12.2018* This Example shows how use the U-Net implementation in Delira with PyTorch. Let's first setup the essential hyperparameters. We will use ``delira``'s ``Parameters``-class for this: .. code:: ipython3 logger = None import torch from delira.training import Parameters params = Parameters(fixed_params={ "model": { "in_channels": 1, "num_classes": 4 }, "training": { "batch_size": 64, # batchsize to use "num_epochs": 10, # number of epochs to train "optimizer_cls": torch.optim.Adam, # optimization algorithm to use "optimizer_params": {'lr': 1e-3}, # initialization parameters for this algorithm "losses": {"CE": torch.nn.CrossEntropyLoss()}, # the loss function "lr_sched_cls": None, # the learning rate scheduling algorithm to use "lr_sched_params": {}, # the corresponding initialization parameters "metrics": {} # and some evaluation metrics } }) Since we did not specify any metric, only the ``CrossEntropyLoss`` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using ``Adam`` as optimizer of choice. Logging and Visualization ------------------------- To get a visualization of our results, we should monitor them somehow. For logging we will use ``Visdom``. To start a visdom server you need to execute the following command inside an environment which has visdom installed: .. code:: shell visdom -port=9999 This will start a visdom server on port 9999 of your machine and now we can start to configure our logging environment. To view your results you can open http://localhost:9999 in your browser. .. code:: ipython3 from trixi.logger import PytorchVisdomLogger from delira.logging import TrixiHandler import logging logger_kwargs = { 'name': 'ClassificationExampleLogger', # name of our logging environment 'port': 9999 # port on which our visdom server is alive } logger_cls = PytorchVisdomLogger # configure logging module (and root logger) logging.basicConfig(level=logging.INFO, handlers=[TrixiHandler(logger_cls, **logger_kwargs)]) # derive logger from root logger # (don't do `logger = logging.Logger("...")` since this will create a new # logger which is unrelated to the root logger logger = logging.getLogger("Test Logger") Since a single visdom server can run multiple environments, we need to specify a (unique) name for our environment and need to tell the logger, on which port it can find the visdom server. Data Praparation ---------------- Loading ~~~~~~~ Next we will create a small train and validation set (in this case they will be the same to show the overfitting capability of the UNet). Our data is a brain MR-image thankfully provided by the `FSL `__ in their `introduction `__. We first download the data and extract the T1 image and the corresponding segmentation: .. code:: ipython3 from io import BytesIO from zipfile import ZipFile from urllib.request import urlopen resp = urlopen("http://www.fmrib.ox.ac.uk/primers/intro_primer/ExBox3/ExBox3.zip") zipfile = ZipFile(BytesIO(resp.read())) #zipfile_list = zipfile.namelist() #print(zipfile_list) img_file = zipfile.extract("ExBox3/T1_brain.nii.gz") mask_file = zipfile.extract("ExBox3/T1_brain_seg.nii.gz") Now, we load the image and the mask (they are both 3D), convert them to a 32-bit floating point numpy array and ensure, they have the same shape (i.e. that for each voxel in the image, there is a voxel in the mask): .. code:: ipython3 import SimpleITK as sitk import numpy as np # load image and mask img = sitk.GetArrayFromImage(sitk.ReadImage(img_file)) img = img.astype(np.float32) mask = mask = sitk.GetArrayFromImage(sitk.ReadImage(mask_file)) mask = mask.astype(np.float32) assert mask.shape == img.shape print(img.shape) By querying the unique values in the mask, we get the following: .. code:: ipython3 np.unique(mask) This means, there are 4 classes (background and 3 types of tissue) in our sample. To load the data, we have to use a ``Dataset``. The following defines a very simple dataset, accepting an image slice, a mask slice and the number of samples. It always returns the same sample until ``num_samples`` samples have been returned. .. code:: ipython3 from delira.data_loading import AbstractDataset class CustomDataset(AbstractDataset): def __init__(self, img, mask, num_samples=1000): super().__init__(None, None, None, None) self.data = {"data": img.reshape(1, *img.shape), "label": mask.reshape(1, *mask.shape)} self.num_samples = num_samples def __getitem__(self, index): return self.data def __len__(self): return self.num_samples Now, we can finally instantiate our datasets: .. code:: ipython3 dataset_train = CustomDataset(img, mask, num_samples=10000) dataset_val = CustomDataset(img, mask, num_samples=1) Augmentation ~~~~~~~~~~~~ For Data-Augmentation we will apply a few transformations: .. code:: ipython3 from batchgenerators.transforms import ContrastAugmentationTransform, Compose from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform transforms = Compose([ ContrastAugmentationTransform(), # randomly adjust contrast MeanStdNormalizationTransform(mean=[img.mean()], std=[img.std()])]) # use concrete values since we only have one sample (have to estimate it over whole dataset otherwise) With these transformations we can now wrap our datasets into datamanagers: .. code:: ipython3 from delira.data_loading import DataManager, SequentialSampler, RandomSampler manager_train = DataManager(dataset_train, params.nested_get("batch_size"), transforms=transforms, sampler_cls=RandomSampler, n_process_augmentation=4) manager_val = DataManager(dataset_val, params.nested_get("batch_size"), transforms=transforms, sampler_cls=SequentialSampler, n_process_augmentation=4) Training -------- After we have done that, we can finally specify our experiment and run it. We will therfore use the already implemented ``UNet3dPytorch``: .. code:: ipython3 import warnings warnings.simplefilter("ignore", UserWarning) # ignore UserWarnings raised by dependency code warnings.simplefilter("ignore", FutureWarning) # ignore FutureWarnings raised by dependency code from delira.training import PyTorchExperiment from delira.training.train_utils import create_optims_default_pytorch from delira.models.segmentation import UNet3dPyTorch if logger: logger.info("Init Experiment") experiment = PyTorchExperiment(params, UNet3dPyTorch, name="Segmentation3dExample", save_path="./tmp/delira_Experiments", optim_builder=create_optims_default_pytorch, gpu_ids=[0], mixed_precision=True) experiment.save() model = experiment.run(manager_train, manager_val) See Also -------- For a more detailed explanation have a look at \* `the introduction tutorial `__ \* `the classification example `__ \* `the 2d segmentation example `__ \* `the generative adversarial example `__ ================================================ FILE: docs/tutorial_delira.rst ================================================ Delira Introduction =================== *Last updated: 09.05.2019* Authors: Justus Schock, Christoph Haarburger Loading Data ------------ To train your network you first need to load your training data (and probably also your validation data). This chapter will therefore deal with ``delira``'s capabilities to load your data (and apply some augmentation). The Dataset ~~~~~~~~~~~ There are mainly two ways to load your data: Lazy or non-lazy. Loading in a lazy way means that you load the data just in time and keep the used memory to a bare minimum. This has, however, the disadvantage that your loading function could be a bottleneck since all postponed operations may have to wait until the needed data samples are loaded. In a no-lazy way, one would preload all data to RAM before starting any other operations. This has the advantage that there cannot be a loading bottleneck during latter operations. This advantage comes at cost of a higher memory usage and a (possibly) huge latency at the beginning of each experiment. Both ways to load your data are implemented in ``delira`` and they are named ``BaseLazyDataset``\ and ``BaseCacheDataset``. In the following steps you will only see the ``BaseLazyDataset`` since exchanging them is trivial. All Datasets (including the ones you might want to create yourself later) must be derived of ``delira.data_loading.AbstractDataset`` to ensure a minimum common API. The dataset's ``__init__`` has the following signature: .. code:: python def __init__(self, data_path, load_fn, **load_kwargs): This means, you have to pass the path to the directory containing your data (``data_path``), a function to load a single sample of your data (``load_fn``). To get a single sample of your dataset after creating it, you can index it like this: ``dataset[0]``. Additionally you can iterate over your dataset just like over any other ``python`` iterator via .. code:: python for sample in dataset: # do your stuff here or enumerate it via .. code:: python for idx, sample in enumerate(dataset): # do your stuff here . The missing argument ``**load_kwargs`` accepts an arbitrary amount of additional keyword arguments which are directly passed to your loading function. An example of how loading your data may look like is given below: .. code:: python from delira.data_loading import BaseLazyDataset, default_load_fn_2d dataset_train = BaseLazyDataset("/images/datasets/external/mnist/train", default_load_fn_2d, img_shape=(224, 224)) In this case all data lying in ``/images/datasets/external/mnist/train`` is loaded by ``default_load_fn_2d``. The files containing the data must be PNG-files, while the groundtruth is defined in TXT-files. The ``default_load_fn_2d`` needs the additional argument ``img_shape`` which is passed as keyword argument via ``**load_kwargs``. **Note:** for reproducability we decided to use some wrapped PyTorch datasets for this introduction. Now, let's just initialize our trainset: .. code:: ipython3 from delira.data_loading import TorchvisionClassificationDataset dataset_train = TorchvisionClassificationDataset("mnist", train=True, img_shape=(224, 224)) Getting a single sample of your dataset with dataset\_train[0] will produce: .. code:: ipython3 dataset_train[0] which means, that our data is stored in a dictionary containing the keys ``data`` and ``label``, each of them holding the corresponding numpy arrays. The dataloading works on ``numpy`` purely and is thus backend agnostic. It does not matter in which format or with which library you load/preprocess your data, but at the end it must be converted to numpy arrays For validation purposes another dataset could be created with the test data like this: .. code:: ipython3 dataset_val = TorchvisionClassificationDataset("mnist", train=False, img_shape=(224, 224)) The Dataloader ~~~~~~~~~~~~~~ The Dataloader wraps your dataset to privode the ability to load whole batches with an abstract interface. To create a dataloader, one would have to pass the following arguments to it's ``__init__``: the previously created ``dataset``.Additionally, it is possible to pass the ``batch_size`` defining the number of samples per batch, the total number of batches (``num_batches``), which will be the number of samples in your dataset devided by the batchsize per default, a random ``seed``\ for always getting the same behaviour of random number generators and a ```sampler`` <>`__ defining your sampling strategy. This would create a dataloader for your ``dataset_train``: .. code:: ipython3 from delira.data_loading import DataLoader batch_size = 32 loader_train = DataLoader(dataset_train, batch_size) Since the batch\_size has been set to 32, the loader will load 32 samples as one batch. Even though it would be possible to train your network with an instance of ``DataLoader``, ``malira`` also offers a different approach that covers multithreaded data loading and augmentation: The Datamanager ~~~~~~~~~~~~~~~ The data manager is implemented as ``delira.data_loading.DataManager`` and wraps a ``DataLoader``. It also encapsulates augmentations. Having a view on the ``DataManager``'s signature, it becomes obvious that it accepts the same arguments as the ```DataLoader`` <#The-Dataloader>`__. You can either pass a ``dataset`` or a combination of path, dataset class and load function. Additionally, you can pass a custom dataloder class if necessary and a sampler class to choose a sampling algorithm. The parameter ``transforms`` accepts augmentation transformations as implemented in ``batchgenerators``. Augmentation is applied on the fly using ``n_process_augmentation`` threads. All in all the DataManager is the recommended way to generate batches from your dataset. The following example shows how to create a data manager instance: .. code:: ipython3 from delira.data_loading import DataManager from batchgenerators.transforms.abstract_transforms import Compose from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform batchsize = 64 transforms = Compose([MeanStdNormalizationTransform(mean=1*[0], std=1*[1])]) data_manager_train = DataManager(dataset_train, # dataset to use batchsize, # batchsize n_process_augmentation=1, # number of augmentation processes transforms=transforms) # augmentation transforms The approach to initialize a DataManager from a datapath takes more arguments since, in opposite to initializaton from dataset, it needs all the arguments which are necessary to internally create a dataset. Since we want to validate our model we have to create a second manager containing our ``dataset_val``: .. code:: ipython3 data_manager_val = DataManager(dataset_val, batchsize, n_process_augmentation=1, transforms=transforms) That's it - we just finished loading our data! Iterating over a DataManager is possible in simple loops: .. code:: ipython3 from tqdm.auto import tqdm # utility for progress bars # create actual batch generator from DataManager batchgen = data_manager_val.get_batchgen() for data in tqdm(batchgen): pass # here you can access the data of the current batch Sampler ~~~~~~~ In previous section samplers have been already mentioned but not yet explained. A sampler implements an algorithm how a batch should be assembled from single samples in a dataset. ``delira`` provides the following sampler classes in it's subpackage ``delira.data_loading.sampler``: - ``AbstractSampler`` - ``SequentialSampler`` - ``PrevalenceSequentialSampler`` - ``RandomSampler`` - ``PrevalenceRandomSampler`` - ``WeightedRandomSampler`` - ``LambdaSampler`` The ``AbstractSampler`` implements no sampling algorithm but defines a sampling API and thus all custom samplers must inherit from this class. The ``Sequential`` sampler builds batches by just iterating over the samples' indices in a sequential way. Following this, the ``RandomSampler`` builds batches by randomly drawing the samples' indices with replacement. If the class each sample belongs to is known for each sample at the beginning, the ``PrevalenceSequentialSampler`` and the ``PrevalenceRandomSampler`` perform a per-class sequential or random sampling and building each batch with the exactly same number of samples from each class. The ``WeightedRandomSampler``\ accepts custom weights to give specific samples a higher probability during random sampling than others. The ``LambdaSampler`` is a wrapper for a custom sampling function, which can be passed to the wrapper during it's initialization, to ensure API conformity. It can be passed to the DataLoader or DataManager as class (argument ``sampler_cls``) or as instance (argument ``sampler``). Models ------ Since the purpose of this framework is to use machine learning algorithms, there has to be a way to define them. Defining models is straight forward. ``delira`` provides a class ``delira.models.AbstractNetwork``. *All models must inherit from this class*. To inherit this class four functions must be implemented in the subclass: - ``__init__`` - ``closure`` - ``prepare_batch`` - ``__call__`` ``__init__`` ~~~~~~~~~~~~ The ``__init__``\ function is a classes constructor. In our case it builds the entire model (maybe using some helper functions). If writing your own custom model, you have to override this method. **Note:** If you want the best experience for saving your model and completely recreating it during the loading process you need to take care of a few things: \* if using ``torchvision.models`` to build your model, always import it with ``from torchvision import models as t_models`` \* register all arguments in your custom ``__init__`` in the abstract class. A init\_prototype could look like this: .. code:: python def __init__(self, in_channels: int, n_outputs: int, **kwargs): """ Parameters ---------- in_channels: int number of input_channels n_outputs: int number of outputs (usually same as number of classes) """ # register params by passing them as kwargs to parent class __init__ # only params registered like this will be saved! super().__init__(in_channels=in_channels, n_outputs=n_outputs, **kwargs) ``closure`` ~~~~~~~~~~~ The ``closure``\ function defines one batch iteration to train the network. This function is needed for the framework to provide a generic trainer function which works with all kind of networks and loss functions. The closure function must implement all steps from forwarding, over loss calculation, metric calculation, logging (for which ``delira.logging_handlers`` provides some extensions for pythons logging module), and the actual backpropagation. It is called with an empty optimizer-dict to evaluate and should thus work with optional optimizers. ``prepare_batch`` ~~~~~~~~~~~~~~~~~ The ``prepare_batch``\ function defines the transformation from loaded data to match the networks input and output shape and pushes everything to the right device. Abstract Networks for specific Backends --------------------------------------- PyTorch ~~~~~~~ At the time of writing, PyTorch is the only backend which is supported, but other backends are planned. In PyTorch every network should be implemented as a subclass of ``torch.nn.Module``, which also provides a ``__call__`` method. This results in sloghtly different requirements for PyTorch networks: instead of implementing a ``__call__`` method, we simply call the ``torch.nn.Module.__call__`` and therefore have to implement the ``forward`` method, which defines the module's behaviour and is internally called by ``torch.nn.Module.__call__`` (among other stuff). To give a default behaviour suiting most cases and not have to care about internals, ``delira`` provides the ``AbstractPyTorchNetwork`` which is a more specific case of the ``AbstractNetwork`` for PyTorch modules. ``forward`` ^^^^^^^^^^^ The ``forward`` function defines what has to be done to forward your input through your network and must return a dictionary. Assuming your network has three convolutional layers stored in ``self.conv1``, ``self.conv2`` and ``self.conv3`` and a ReLU stored in ``self.relu``, a simple ``forward`` function could look like this: .. code:: python def forward(self, input_batch: torch.Tensor): out_1 = self.relu(self.conv1(input_batch)) out_2 = self.relu(self.conv2(out_1)) out_3 = self.conv3(out2) return {"pred": out_3} ``prepare_batch`` ^^^^^^^^^^^^^^^^^ The default ``prepare_batch`` function for PyTorch networks looks like this: .. code:: python @staticmethod def prepare_batch(batch: dict, input_device, output_device): """ Helper Function to prepare Network Inputs and Labels (convert them to correct type and shape and push them to correct devices) Parameters ---------- batch : dict dictionary containing all the data input_device : torch.device device for network inputs output_device : torch.device device for network outputs Returns ------- dict dictionary containing data in correct type and shape and on correct device """ return_dict = {"data": torch.from_numpy(batch.pop("data")).to( input_device)} for key, vals in batch.items(): return_dict[key] = torch.from_numpy(vals).to(output_device) return return_dict and can be customized by subclassing the ``AbstractPyTorchNetwork``. ``closure example`` ^^^^^^^^^^^^^^^^^^^ A simple closure function for a PyTorch module could look like this: .. code:: python @staticmethod def closure(model: AbstractPyTorchNetwork, data_dict: dict, optimizers: dict, criterions={}, metrics={}, fold=0, **kwargs): """ closure method to do a single backpropagation step Parameters ---------- model : :class:`ClassificationNetworkBasePyTorch` trainable model data_dict : dict dictionary containing the data optimizers : dict dictionary of optimizers to optimize model's parameters criterions : dict dict holding the criterions to calculate errors (gradients from different criterions will be accumulated) metrics : dict dict holding the metrics to calculate fold : int Current Fold in Crossvalidation (default: 0) **kwargs: additional keyword arguments Returns ------- dict Metric values (with same keys as input dict metrics) dict Loss values (with same keys as input dict criterions) list Arbitrary number of predictions as torch.Tensor Raises ------ AssertionError if optimizers or criterions are empty or the optimizers are not specified """ assert (optimizers and criterions) or not optimizers, \ "Criterion dict cannot be emtpy, if optimizers are passed" loss_vals = {} metric_vals = {} total_loss = 0 # choose suitable context manager: if optimizers: context_man = torch.enable_grad else: context_man = torch.no_grad with context_man(): inputs = data_dict.pop("data") # obtain outputs from network preds = model(inputs)["pred"] if data_dict: for key, crit_fn in criterions.items(): _loss_val = crit_fn(preds, *data_dict.values()) loss_vals[key] = _loss_val.detach() total_loss += _loss_val with torch.no_grad(): for key, metric_fn in metrics.items(): metric_vals[key] = metric_fn( preds, *data_dict.values()) if optimizers: optimizers['default'].zero_grad() total_loss.backward() optimizers['default'].step() else: # add prefix "val" in validation mode eval_loss_vals, eval_metrics_vals = {}, {} for key in loss_vals.keys(): eval_loss_vals["val_" + str(key)] = loss_vals[key] for key in metric_vals: eval_metrics_vals["val_" + str(key)] = metric_vals[key] loss_vals = eval_loss_vals metric_vals = eval_metrics_vals for key, val in {**metric_vals, **loss_vals}.items(): logging.info({"value": {"value": val.item(), "name": key, "env_appendix": "_%02d" % fold }}) logging.info({'image_grid': {"images": inputs, "name": "input_images", "env_appendix": "_%02d" % fold}}) return metric_vals, loss_vals, preds **Note:** This closure is taken from the ``delira.models.classification.ClassificationNetworkBasePyTorch`` Other examples ~~~~~~~~~~~~~~ In ``delira.models`` you can find exemplaric implementations of generative adversarial networks, classification and regression approaches or segmentation networks. Training -------- Parameters ~~~~~~~~~~ Training-parameters (often called hyperparameters) can be defined in the ``delira.training.Parameters`` class. The class accepts the parameters ``batch_size`` and ``num_epochs`` to define the batchsize and the number of epochs to train, the parameters ``optimizer_cls`` and ``optimizer_params`` to create an optimizer or training, the parameter ``criterions`` to specify the training criterions (whose gradients will be accumulated by defaut), the parameters ``lr_sched_cls`` and ``lr_sched_params`` to define the learning rate scheduling and the parameter ``metrics`` to specify evaluation metrics. Additionally, it is possible to pass an aritrary number of keyword arguments to the class It is good practice to create a ``Parameters`` object at the beginning and then use it for creating other objects which are needed for training, since you can use the classes attributes and changes in hyperparameters only have to be done once: .. code:: ipython3 import torch from delira.training import Parameters from delira.data_loading import RandomSampler, SequentialSampler params = Parameters(fixed_params={ "model": {}, "training": { "batch_size": 64, # batchsize to use "num_epochs": 2, # number of epochs to train "optimizer_cls": torch.optim.Adam, # optimization algorithm to use "optimizer_params": {'lr': 1e-3}, # initialization parameters for this algorithm "criterions": {"CE": torch.nn.CrossEntropyLoss()}, # the loss function "lr_sched_cls": None, # the learning rate scheduling algorithm to use "lr_sched_params": {}, # the corresponding initialization parameters "metrics": {} # and some evaluation metrics } }) # recreating the data managers with the batchsize of the params object manager_train = DataManager(dataset_train, params.nested_get("batch_size"), 1, transforms=None, sampler_cls=RandomSampler, n_process_loading=4) manager_val = DataManager(dataset_val, params.nested_get("batch_size"), 3, transforms=None, sampler_cls=SequentialSampler, n_process_loading=4) Trainer ~~~~~~~ The ``delira.training.NetworkTrainer`` class provides functions to train a single network by passing attributes from your parameter object, a ``save_freq`` to specify how often your model should be saved (``save_freq=1`` indicates every epoch, ``save_freq=2`` every second epoch etc.) and ``gpu_ids``. If you don't pass any ids at all, your network will be trained on CPU (and probably take a lot of time). If you specify 1 id, the network will be trained on the GPU with the corresponding index and if you pass multiple ``gpu_ids`` your network will be trained on multiple GPUs in parallel. **Note:** The GPU indices are refering to the devices listed in ``CUDA_VISIBLE_DEVICES``. E.g if ``CUDA_VISIBLE_DEVICES`` lists GPUs 3, 4, 5 then gpu\_id 0 will be the index for GPU 3 etc. **Note:** training on multiple GPUs is not recommended for easy and small networks, since for these networks the synchronization overhead is far greater than the parallelization benefit. Training your network might look like this: .. code:: ipython3 from delira.training import PyTorchNetworkTrainer from delira.models.classification import ClassificationNetworkBasePyTorch # path where checkpoints should be saved save_path = "./results/checkpoints" model = ClassificationNetworkBasePyTorch(in_channels=1, n_outputs=10) trainer = PyTorchNetworkTrainer(network=model, save_path=save_path, criterions=params.nested_get("criterions"), optimizer_cls=params.nested_get("optimizer_cls"), optimizer_params=params.nested_get("optimizer_params"), metrics=params.nested_get("metrics"), lr_scheduler_cls=params.nested_get("lr_sched_cls"), lr_scheduler_params=params.nested_get("lr_sched_params"), gpu_ids=[0] ) #trainer.train(params.nested_get("num_epochs"), manager_train, manager_val) Experiment ~~~~~~~~~~ The ``delira.training.AbstractExperiment`` class needs an experiment name, a path to save it's results to, a parameter object, a model class and the keyword arguments to create an instance of this class. It provides methods to perform a single training and also a method for running a kfold-cross validation. In order to create it, you must choose the ``PyTorchExperiment``, which is basically just a subclass of the ``AbstractExperiment`` to provide a general setup for PyTorch modules. Running an experiment could look like this: .. code:: ipython3 from delira.training import PyTorchExperiment from delira.training.train_utils import create_optims_default_pytorch # Add model parameters to Parameter class params.fixed.model = {"in_channels": 1, "n_outputs": 10} experiment = PyTorchExperiment(params=params, model_cls=ClassificationNetworkBasePyTorch, name="TestExperiment", save_path="./results", optim_builder=create_optims_default_pytorch, gpu_ids=[0]) experiment.run(manager_train, manager_val) An ``Experiment`` is the most abstract (and recommended) way to define, train and validate your network. Logging ------- Previous class and function definitions used pythons's ``logging`` library. As extensions for this library ``delira`` provides a package (``delira.logging``) containing handlers to realize different logging methods. To use these handlers simply add them to your logger like this: .. code:: python logger.addHandler(logging.StreamHandler()) Nowadays, delira mainly relies on `trixi `__ for logging and provides only a ``MultiStreamHandler`` and a ``TrixiHandler``, which is a binding to ``trixi``'s loggers and integrates them into the python ``logging`` module ``MultiStreamHandler`` ~~~~~~~~~~~~~~~~~~~~~~ The ``MultiStreamHandler`` accepts an arbitrary number of streams during initialization and writes the message to all of it's streams during logging. Logging with ``Visdom`` - The ``trixi`` Loggers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ```Visdom`` `__ is a tool designed to visualize your logs. To use this tool you need to open a port on the machine you want to train on via ``visdom -port YOUR_PORTNUMBER`` Afterwards just add the handler of your choice to the logger. For more detailed information and customization have a look at `this `__ website. Logging the scalar tensors containing ``1``, ``2``, ``3``, ``4`` (at the beginning; will increase to show epochwise logging) with the corresponding keys ``"one"``, ``"two"``, ``"three"``, ``"four"`` and two random images with the keys ``"prediction"`` and ``"groundtruth"`` would look like this: .. code:: ipython3 NUM_ITERS = 4 # import logging handler and logging module from delira.logging import TrixiHandler from trixi.logger import PytorchVisdomLogger import logging # configure logging module (and root logger) logger_kwargs = { 'name': 'test_env', # name of loggin environment 'port': 9999 # visdom port to connect to } logger_cls = PytorchVisdomLogger # configure logging module (and root logger) logging.basicConfig(level=logging.INFO, handlers=[TrixiHandler(logger_cls, **logger_kwargs)]) # derive logger from root logger # (don't do `logger = logging.Logger("...")` since this will create a new # logger which is unrelated to the root logger logger = logging.getLogger("Test Logger") # create dict containing the scalar numbers as torch.Tensor scalars = {"one": torch.Tensor([1]), "two": torch.Tensor([2]), "three": torch.Tensor([3]), "four": torch.Tensor([4])} # create dict containing the images as torch.Tensor # pytorch awaits tensor dimensionality of # batchsize x image channels x height x width images = {"prediction": torch.rand(1, 3, 224, 224), "groundtruth": torch.rand(1, 3, 224, 224)} # Simulate 4 Epochs for i in range(4*NUM_ITERS): logger.info({"image_grid": {"images": images["prediction"], "name": "predictions"}}) for key, val_tensor in scalars.items(): logger.info({"value": {"value": val_tensor.item(), "name": key}}) scalars[key] += 1 More Examples ------------- More Examples can be found in \* `the classification example `__ \* `the 2d segmentation example `__ \* `the 3d segmentation example `__ \* `the generative adversarial example `__ ================================================ FILE: notebooks/classification_examples/chainer.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Classification with Delira and Chainer - A very short introduction\n", "*Author: Justus Schock* \n", "\n", "*Date: 31.07.2019*\n", "\n", "This Example shows how to set up a basic classification model and experiment using Chainer.\n", "\n", "Let\u0027s first setup the essential hyperparameters. We will use `delira`\u0027s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\pywt\\_utils.py:6: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " from collections import Iterable\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\google\\protobuf\\descriptor.py:47: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " from google.protobuf.pyext import _message\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\util\\nest.py:1286: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " _pywrap_tensorflow.RegisterType(\"Mapping\", _collections.Mapping)\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\util\\nest.py:1287: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " _pywrap_tensorflow.RegisterType(\"Sequence\", _collections.Sequence)\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:516: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint8 \u003d np.dtype([(\"qint8\", np.int8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:517: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint8 \u003d np.dtype([(\"quint8\", np.uint8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:518: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint16 \u003d np.dtype([(\"qint16\", np.int16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:519: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint16 \u003d np.dtype([(\"quint16\", np.uint16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:520: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint32 \u003d np.dtype([(\"qint32\", np.int32, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:525: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " np_resource \u003d np.dtype([(\"resource\", np.ubyte, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\training\\tracking\\object_identity.py:61: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " class ObjectIdentityDictionary(collections.MutableMapping):\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\training\\tracking\\object_identity.py:112: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " class ObjectIdentitySet(collections.MutableSet):\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:541: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint8 \u003d np.dtype([(\"qint8\", np.int8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:542: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint8 \u003d np.dtype([(\"quint8\", np.uint8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:543: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint16 \u003d np.dtype([(\"qint16\", np.int16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:544: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint16 \u003d np.dtype([(\"quint16\", np.uint16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:545: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint32 \u003d np.dtype([(\"qint32\", np.int32, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:550: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " np_resource \u003d np.dtype([(\"resource\", np.ubyte, 1)])\n", "WARNING: Logging before flag parsing goes to stderr.\n", "W0731 14:01:15.852783 27416 deprecation_wrapper.py:119] From c:\\users\\jsc7rng\\downloads\\delira\\delira\\models\\backends\\tf_eager\\abstract_network.py:113: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", "\n", "W0731 14:01:15.869738 27416 deprecation_wrapper.py:119] From c:\\users\\jsc7rng\\downloads\\delira\\delira\\models\\backends\\tf_graph\\abstract_network.py:20: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n", "\n" ] } ], "source": [ "logger \u003d None\n", "import chainer\n", "from delira.training import Parameters\n", "params \u003d Parameters(fixed_params\u003d{\n", " \"model\": {\n", " \"in_channels\": 1, \n", " \"n_outputs\": 10\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": chainer.optimizers.Adam, # optimization algorithm to use\n", " \"optimizer_params\": {\u0027lr\u0027: 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"L1\": chainer.functions.mean_absolute_error}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `CrossEntropyLoss` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Tensorboard`. Per default the logging directory will be the same as our experiment directory." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "\n", "## Data Preparation\n", "### Loading\n", "Next we will create some fake data. For this we use the `ClassificationFakeData`-Dataset, which is already implemented in `deliravision`. To avoid getting the exact same data from both datasets, we use a random offset." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from deliravision.data.fakedata import ClassificationFakeData\n", "dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10)\n", "dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10,\n", " rng_offset\u003d10001\n", " )" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": "from batchgenerators.transforms import RandomCropTransform, \\\n ContrastAugmentationTransform, Compose\nfrom batchgenerators.transforms.spatial_transforms import ResizeTransform\nfrom batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n\ntransforms \u003d Compose([\n RandomCropTransform(24), # Perform Random Crops of Size 24 x 24 pixels\n ResizeTransform(32), # Resample these crops back to 32 x 32 pixels\n ContrastAugmentationTransform(), # randomly adjust contrast\n MeanStdNormalizationTransform(mean\u003d[0.5], std\u003d[0.5])]) \n\n" }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train \u003d DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dRandomSampler,\n", " n_process_augmentation\u003d4)\n", "\n", "manager_val \u003d DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dSequentialSampler,\n", " n_process_augmentation\u003d4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Model\n", "\n", "After we have done that, we can specify our model: We will use a smaller version of a [VGG-Network](https://arxiv.org/pdf/1409.1556.pdf) in this case. We will use more convolutions to reduce the feature dimensionality and reduce the number of units in the linear layers to save up memory (and we only have to deal with 10 classes, not the 1000 imagenet classes)." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.models import AbstractChainerNetwork\n", "import chainer\n", "from functools import partial\n", " \n", " \n", "class SmallVGGChainer(AbstractChainerNetwork):\n", " def __init__(self, in_channels, num_classes):\n", " super().__init__()\n", " \n", " self.model \u003d chainer.Sequential(\n", " chainer.links.Convolution2d(in_channels, 64, 3, padding\u003d1), # 28 x 28\n", " chainer.functions.relu,\n", " partial(chainer.functions.max_pooling_2d, ksize\u003d2), # 14 x 14\n", " chainer.links.Convolution2d(64, 128, 3, padding\u003d1),\n", " chainer.functions.relu,\n", " partial(chainer.functions.max_pooling_2d, ksize\u003d2), # 7 x 7\n", " chainer.links.Convolution2d(128, 256, 3), # 6 x 6\n", " chainer.functions.relu,\n", " partial(chainer.functions.max_pooling_2d, ksize\u003d2), # 3 x 3\n", " chainer.links.Convolution2d(256, 512, 3), # 1 x 1\n", " chainer.functions.flatten,\n", " chainer.links.Linear(1*1*512, num_classes)\n", " )\n", " \n", " def forward(self, x):\n", " return {\"pred\": self.model(x)}\n", " \n", " @staticmethod\n", " def prepare_batch(data_dict, input_device, output_device):\n", " new_batch \u003d {k: chainer.as_variable(v.astype(np.float32))\n", " for k, v in batch.items()}\n", "\n", " for k, v in new_batch.items():\n", " if k \u003d\u003d \"data\":\n", " device \u003d input_device\n", " else:\n", " device \u003d output_device\n", "\n", " # makes modification inplace!\n", " v.to_device(device)\n", "\n", " return new_batch\n", " \n", " @staticmethod\n", " def closure(model, data_dict: dict, optimizers: dict, losses: dict,\n", " fold\u003d0, **kwargs):\n", "\n", " loss_vals \u003d {}\n", " metric_vals \u003d {}\n", " total_loss \u003d 0\n", "\n", " inputs \u003d data_dict[\"data\"]\n", " preds \u003d model(inputs)\n", "\n", " with chainer.using_config(\"train\", True):\n", " for key, crit_fn in losses.items():\n", " _loss_val \u003d crit_fn(preds[\"pred\"], data_dict[\"label\"])\n", " loss_vals[key] \u003d _loss_val.item()\n", " total_loss +\u003d _loss_val\n", "\n", " model.cleargrads()\n", " total_loss.backward()\n", " optimizers[\u0027default\u0027].update()\n", " \n", " return loss_vals, {k: v.unchain()\n", " for k, v in preds.items()}\n", "\n", " \n", " " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "So let\u0027s evisit, what we have just done.\n", "\n", "In `delira` all networks must be derived from `delira.models.AbstractNetwork`. For each backend there is a class derived from this class, handling some backend-specific function calls and registrations. For the `Chainer` Backend this class is `AbstractChainerNetwork` and all Chainer Networks should be derived from it.\n", "\n", "First we defined the network itself (this is the part simply concatenating the layers into a sequential model). Next, we defined the logic to apply, when we want to predict from the model (this is the `forward` method).\n", "\n", "So far this was plain `Chainer`. The `prepare_batch` function is not plain Chainer anymore, but allows us to ensure the data is in the correct shape, has the correct data-type and lies on the correct device. The function above is the standard `prepare_batch` function, which is also implemented in the `AbstractChainerNetwork` and just re-implemented here for the sake of completeness.\n", "\n", "Same goes for the `closure` function. This function defines the update rule for our parameters (and how to calculate the losses). These funcitons are good to go for many simple networks but can be overwritten for customization when training more complex networks.\n", "\n", "\n", "## Training\n", "Now that we have defined our network, we can finally specify our experiment and run it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import ChainerExperiment\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment \u003d PyTorchExperiment(params, SmallVGGChainer,\n", " name\u003d\"ClassificationExample\",\n", " save_path\u003d\"./tmp/delira_Experiments\",\n", " key_mapping\u003d{\"x\": \"data\"}\n", " gpu_ids\u003d[0])\n", "experiment.save()\n", "\n", "model \u003d experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Congratulations, you have now trained your first Classification Model using `delira`, we will now predict a few samples from the testset to show, that the networks predictions are valid (for now, this is done manually, but we also have a `Predictor` class to automate stuff like this):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import numpy as np\n", "from tqdm.auto import tqdm # utility for progress bars\n", "\n", "device \u003d \"@numpy\"\n", "model \u003d model.to(device) # push model to device\n", "preds, labels \u003d [], []\n", "\n", "with torch.no_grad():\n", " for i in tqdm(range(len(dataset_val))):\n", " img \u003d dataset_val[i][\"data\"] # get image from current batch\n", " img_tensor \u003d torch.from_numpy(img).unsqueeze(0).to(device).to(torch.float) # create a tensor from image, push it to device and add batch dimension\n", " pred_tensor \u003d model(img_tensor) # feed it through the network\n", " pred \u003d pred_tensor.argmax(1).item() # get index with maximum class confidence\n", " label \u003d np.asscalar(dataset_val[i][\"label\"]) # get label from batch\n", " if i % 1000 \u003d\u003d 0:\n", " print(\"Prediction: %d \\t label: %d\" % (pred, label)) # print result\n", " preds.append(pred)\n", " labels.append(label)\n", " \n", "# calculate accuracy\n", "accuracy \u003d (np.asarray(preds) \u003d\u003d np.asarray(labels)).sum() / len(preds)\n", "print(\"Accuracy: %.3f\" % accuracy)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/classification_examples/pytorch.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Classification with Delira and PyTorch - A very short introduction\n", "*Author: Justus Schock* \n", "\n", "*Date: 31.07.2019*\n", "\n", "This Example shows how to set up a basic classification model and experiment using PyTorch.\n", "\n", "Let\u0027s first setup the essential hyperparameters. We will use `delira`\u0027s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "logger \u003d None\n", "import torch\n", "from delira.training import Parameters\n", "params \u003d Parameters(fixed_params\u003d{\n", " \"model\": {\n", " \"in_channels\": 1, \n", " \"n_outputs\": 10\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": torch.optim.Adam, # optimization algorithm to use\n", " \"optimizer_params\": {\u0027lr\u0027: 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"CE\": torch.nn.CrossEntropyLoss()}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `CrossEntropyLoss` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Tensorboard`. Per default the logging directory will be the same as our experiment directory." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "\n", "## Data Preparation\n", "### Loading\n", "Next we will create some fake data. For this we use the `ClassificationFakeData`-Dataset, which is already implemented in `deliravision`. To avoid getting the exact same data from both datasets, we use a random offset." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from deliravision.data.fakedata import ClassificationFakeData\n", "dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10)\n", "dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10,\n", " rng_offset\u003d10001\n", " )" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": "from batchgenerators.transforms import RandomCropTransform, \\\n ContrastAugmentationTransform, Compose\nfrom batchgenerators.transforms.spatial_transforms import ResizeTransform\nfrom batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n\ntransforms \u003d Compose([\n RandomCropTransform(24), # Perform Random Crops of Size 24 x 24 pixels\n ResizeTransform(32), # Resample these crops back to 32 x 32 pixels\n ContrastAugmentationTransform(), # randomly adjust contrast\n MeanStdNormalizationTransform(mean\u003d[0.5], std\u003d[0.5])]) \n\n" }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train \u003d DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dRandomSampler,\n", " n_process_augmentation\u003d4)\n", "\n", "manager_val \u003d DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dSequentialSampler,\n", " n_process_augmentation\u003d4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Model\n", "\n", "After we have done that, we can specify our model: We will use a smaller version of a [VGG-Network](https://arxiv.org/pdf/1409.1556.pdf) in this case. We will use more convolutions to reduce the feature dimensionality and reduce the number of units in the linear layers to save up memory (and we only have to deal with 10 classes, not the 1000 imagenet classes)." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.models import AbstractPyTorchNetwork\n", "import torch\n", "\n", "class Flatten(torch.nn.Module):\n", " \n", " def forward(self, x):\n", " return x.view(x.size(0), -1)\n", "\n", "class SmallVGGPyTorch(AbstractPyTorchNetwork):\n", " def __init__(self, in_channels, num_classes):\n", " super().__init__()\n", " \n", " self.model \u003d torch.nn.Sequential(\n", " torch.nn.Conv2d(in_channels, 64, 3, padding\u003d1), # 32 x 32\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(2), # 16 x 16\n", " torch.nn.Conv2d(64, 128, 3, padding\u003d1),\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(2), # 8 x 8\n", " torch.nn.Conv2d(128, 256, 3, padding\u003d1), # 4 x 4\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(2), # 4 x 4\n", " torch.nn.Conv2d(256, 512, 3, padding\u003d1),\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(), # 2 x 2\n", " torch.nn.Conv2d(512, 512, 3, padding\u003d1),\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(), # 1 x 1\n", " Flatten(),\n", " torch.nn.Linear(1*1*512, num_classes),\n", " )\n", " \n", " def forward(self, x: torch.Tensor):\n", " return {\"pred\": self.model(x)}\n", " \n", " @staticmethod\n", " def prepare_batch(data_dict, input_device, output_device):\n", " return_dict \u003d {\"data\": torch.from_numpy(batch[\"data\"]).to(\n", " input_device).to(torch.float)}\n", "\n", " for key, vals in batch.items():\n", " if key \u003d\u003d \"data\": \n", " continue\n", " return_dict[key] \u003d torch.from_numpy(vals).to(output_device).to(\n", " torch.float)\n", "\n", " return return_dict\n", " \n", " @staticmethod\n", " def closure(model, data_dict: dict, optimizers: dict, losses: dict,\n", " fold\u003d0, **kwargs):\n", "\n", " loss_vals \u003d {}\n", " total_loss \u003d 0\n", "\n", "\n", " # predict\n", " inputs \u003d data_dict.pop(\"data\")\n", " preds \u003d model(inputs)\n", "\n", " # calculate losses\n", " for key, crit_fn in losses.items():\n", " _loss_val \u003d crit_fn(preds[\"pred\"], data_dict[\"label\"])\n", " loss_vals[key] \u003d _loss_val.item()\n", " total_loss +\u003d _loss_val\n", "\n", " optimizers[\u0027default\u0027].zero_grad()\n", " # perform loss scaling via apex if half precision is enabled\n", " with scale_loss(total_loss, optimizers[\"default\"]) as scaled_loss:\n", " scaled_loss.backward()\n", " optimizers[\u0027default\u0027].step()\n", "\n", " return loss_vals, {k: v.detach()\n", " for k, v in preds.items()}\n", " \n", " " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "So let\u0027s evisit, what we have just done.\n", "\n", "In `delira` all networks must be derived from `delira.models.AbstractNetwork`. For each backend there is a class derived from this class, handling some backend-specific function calls and registrations. For the `PyTorch` Backend this class is `AbstractPyTorchNetwork` and all PyTorch Networks should be derived from it.\n", "\n", "First we defined the network itself (this is the part simply concatenating the layers into a sequential model). Next, we defined the logic to apply, when we want to predict from the model (this is the `forward` method).\n", "\n", "So far this was plain `PyTorch`. The `prepare_batch` function is not plain PyTorch anymore, but allows us to ensure the data is in the correct shape, has the correct data-type and lies on the correct device. The function above is the standard `prepare_batch` function, which is also implemented in the `AbstractPyTorchNetwork` and just re-implemented here for the sake of completeness.\n", "\n", "Same goes for the `closure` function. This function defines the update rule for our parameters (and how to calculate the losses). These funcitons are good to go for many simple networks but can be overwritten for customization when training more complex networks.\n", "\n", "\n", "## Training\n", "Now that we have defined our network, we can finally specify our experiment and run it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import PyTorchExperiment\n", "from delira.training.train_utils import create_optims_default_pytorch\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment \u003d PyTorchExperiment(params, SmallVGGPyTorch,\n", " name\u003d\"ClassificationExample\",\n", " save_path\u003d\"./tmp/delira_Experiments\",\n", " optim_builder\u003dcreate_optims_default_pytorch,\n", " key_mapping\u003d{\"x\": \"data\"}\n", " gpu_ids\u003d[0])\n", "experiment.save()\n", "\n", "model \u003d experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Congratulations, you have now trained your first Classification Model using `delira`, we will now predict a few samples from the testset to show, that the networks predictions are valid (for now, this is done manually, but we also have a `Predictor` class to automate stuff like this):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import numpy as np\n", "from tqdm.auto import tqdm # utility for progress bars\n", "\n", "device \u003d torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # set device (use GPU if available)\n", "model \u003d model.to(device) # push model to device\n", "preds, labels \u003d [], []\n", "\n", "with torch.no_grad():\n", " for i in tqdm(range(len(dataset_val))):\n", " img \u003d dataset_val[i][\"data\"] # get image from current batch\n", " img_tensor \u003d torch.from_numpy(img).unsqueeze(0).to(device).to(torch.float) # create a tensor from image, push it to device and add batch dimension\n", " pred_tensor \u003d model(img_tensor) # feed it through the network\n", " pred \u003d pred_tensor.argmax(1).item() # get index with maximum class confidence\n", " label \u003d np.asscalar(dataset_val[i][\"label\"]) # get label from batch\n", " if i % 1000 \u003d\u003d 0:\n", " print(\"Prediction: %d \\t label: %d\" % (pred, label)) # print result\n", " preds.append(pred)\n", " labels.append(label)\n", " \n", "# calculate accuracy\n", "accuracy \u003d (np.asarray(preds) \u003d\u003d np.asarray(labels)).sum() / len(preds)\n", "print(\"Accuracy: %.3f\" % accuracy)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/classification_examples/sklearn.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Classification with Delira and SciKit-Learn - A very short introduction\n", "*Author: Justus Schock* \n", "\n", "*Date: 31.07.2019*\n", "\n", "This Example shows how to set up a basic classification model and experiment using SciKit-Learn.\n", "\n", "Let\u0027s first setup the essential hyperparameters. We will use `delira`\u0027s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\pywt\\_utils.py:6: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " from collections import Iterable\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\google\\protobuf\\descriptor.py:47: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " from google.protobuf.pyext import _message\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\util\\nest.py:1286: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " _pywrap_tensorflow.RegisterType(\"Mapping\", _collections.Mapping)\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\util\\nest.py:1287: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " _pywrap_tensorflow.RegisterType(\"Sequence\", _collections.Sequence)\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:516: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint8 \u003d np.dtype([(\"qint8\", np.int8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:517: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint8 \u003d np.dtype([(\"quint8\", np.uint8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:518: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint16 \u003d np.dtype([(\"qint16\", np.int16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:519: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint16 \u003d np.dtype([(\"quint16\", np.uint16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:520: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint32 \u003d np.dtype([(\"qint32\", np.int32, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:525: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " np_resource \u003d np.dtype([(\"resource\", np.ubyte, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\training\\tracking\\object_identity.py:61: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " class ObjectIdentityDictionary(collections.MutableMapping):\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\training\\tracking\\object_identity.py:112: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " class ObjectIdentitySet(collections.MutableSet):\n" ] } ], "source": [ "logger \u003d None\n", "from delira.training import Parameters\n", "import sklearn\n", "params \u003d Parameters(fixed_params\u003d{\n", " \"model\": {},\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": None, # optimization algorithm to use\n", " \"optimizer_params\": {}, # initialization parameters for this algorithm\n", " \"losses\": {}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {\"mae\": mean_absolute_error} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `CrossEntropyLoss` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Tensorboard`. Per default the logging directory will be the same as our experiment directory." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "\n", "## Data Preparation\n", "### Loading\n", "Next we will create some fake data. For this we use the `ClassificationFakeData`-Dataset, which is already implemented in `deliravision`. To avoid getting the exact same data from both datasets, we use a random offset." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from deliravision.data.fakedata import ClassificationFakeData\n", "dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10)\n", "dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10,\n", " rng_offset\u003d10001\n", " )" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": "from batchgenerators.transforms import RandomCropTransform, \\\n ContrastAugmentationTransform, Compose\nfrom batchgenerators.transforms.spatial_transforms import ResizeTransform\nfrom batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n\ntransforms \u003d Compose([\n RandomCropTransform(24), # Perform Random Crops of Size 24 x 24 pixels\n ResizeTransform(32), # Resample these crops back to 32 x 32 pixels\n ContrastAugmentationTransform(), # randomly adjust contrast\n MeanStdNormalizationTransform(mean\u003d[0.5], std\u003d[0.5])]) \n\n" }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train \u003d DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dRandomSampler,\n", " n_process_augmentation\u003d4)\n", "\n", "manager_val \u003d DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dSequentialSampler,\n", " n_process_augmentation\u003d4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Model\n", "\n", "After we have done that, we can specify our model: We will use a very simple MultiLayer Perceptron here. \n", "In opposite to other backends, we don\u0027t need to provide a custom implementation of our model, but we can simply use it as-is. It will be automatically wrapped by `SklearnEstimator`, which can be subclassed for more advanced usage.\n", "\n", "## Training\n", "Now that we have defined our network, we can finally specify our experiment and run it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "from sklearn.neural_network import MLPClassifier\n", "\n", "from delira.training import SklearnExperiment\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment \u003d PyTorchExperiment(params, MLPClassifier,\n", " name\u003d\"ClassificationExample\",\n", " save_path\u003d\"./tmp/delira_Experiments\",\n", " key_mapping\u003d{\"X\": \"X\"}\n", " gpu_ids\u003d[0])\n", "experiment.save()\n", "\n", "model \u003d experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Congratulations, you have now trained your first Classification Model using `delira`, we will now predict a few samples from the testset to show, that the networks predictions are valid (for now, this is done manually, but we also have a `Predictor` class to automate stuff like this):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import numpy as np\n", "from tqdm.auto import tqdm # utility for progress bars\n", "\n", "preds, labels \u003d [], []\n", "\n", "with torch.no_grad():\n", " for i in tqdm(range(len(dataset_val))):\n", " img \u003d dataset_val[i][\"data\"] # get image from current batch\n", " img_tensor \u003d img.astype(np.float) # create a tensor from image, push it to device and add batch dimension\n", " pred_tensor \u003d model(img_tensor) # feed it through the network\n", " pred \u003d pred_tensor.argmax(1).item() # get index with maximum class confidence\n", " label \u003d np.asscalar(dataset_val[i][\"label\"]) # get label from batch\n", " if i % 1000 \u003d\u003d 0:\n", " print(\"Prediction: %d \\t label: %d\" % (pred, label)) # print result\n", " preds.append(pred)\n", " labels.append(label)\n", " \n", "# calculate accuracy\n", "accuracy \u003d (np.asarray(preds) \u003d\u003d np.asarray(labels)).sum() / len(preds)\n", "print(\"Accuracy: %.3f\" % accuracy)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/classification_examples/tf_eager.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Classification with Delira and TensorFlow Eager Execution- A very short introduction\n", "*Author: Justus Schock* \n", "\n", "*Date: 31.07.2019*\n", "\n", "This Example shows how to set up a basic classification model and experiment using TensorFlow\u0027s Eager Execution Mode.\n", "\n", "Let\u0027s first setup the essential hyperparameters. We will use `delira`\u0027s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:516: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint8 \u003d np.dtype([(\"qint8\", np.int8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:517: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint8 \u003d np.dtype([(\"quint8\", np.uint8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:518: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint16 \u003d np.dtype([(\"qint16\", np.int16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:519: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint16 \u003d np.dtype([(\"quint16\", np.uint16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:520: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint32 \u003d np.dtype([(\"qint32\", np.int32, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:525: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " np_resource \u003d np.dtype([(\"resource\", np.ubyte, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:541: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint8 \u003d np.dtype([(\"qint8\", np.int8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:542: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint8 \u003d np.dtype([(\"quint8\", np.uint8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:543: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint16 \u003d np.dtype([(\"qint16\", np.int16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:544: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint16 \u003d np.dtype([(\"quint16\", np.uint16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:545: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint32 \u003d np.dtype([(\"qint32\", np.int32, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:550: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " np_resource \u003d np.dtype([(\"resource\", np.ubyte, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\pywt\\_utils.py:6: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " from collections import Iterable\n", "WARNING: Logging before flag parsing goes to stderr.\n", "W0731 13:38:30.713174 21496 deprecation_wrapper.py:119] From c:\\users\\jsc7rng\\downloads\\delira\\delira\\models\\backends\\tf_eager\\abstract_network.py:113: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", "\n", "W0731 13:38:30.727135 21496 deprecation_wrapper.py:119] From c:\\users\\jsc7rng\\downloads\\delira\\delira\\models\\backends\\tf_graph\\abstract_network.py:20: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n", "\n" ] } ], "source": [ "logger \u003d None\n", "import tensorflow as tf\n", "tf.enable_eager_execution()\n", "from delira.training import Parameters\n", "params \u003d Parameters(fixed_params\u003d{\n", " \"model\": {\n", " \"in_channels\": 1, \n", " \"n_outputs\": 10\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": tf.train.AdamOptimizer, # optimization algorithm to use\n", " \"optimizer_params\": {\u0027lr\u0027: 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"L1\": tf.losses.absolute_difference}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `L1-Loss` will be calculated for each batch. Since this is just a toy example, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Tensorboard`. Per default the logging directory will be the same as our experiment directory." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "\n", "## Data Preparation\n", "### Loading\n", "Next we will create some fake data. For this we use the `ClassificationFakeData`-Dataset, which is already implemented in `deliravision`. To avoid getting the exact same data from both datasets, we use a random offset." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named \u0027deliravision\u0027", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u003cipython-input-2-c638229a3dc2\u003e\u001b[0m in \u001b[0;36m\u003cmodule\u003e\u001b[1;34m\u001b[0m\n\u001b[1;32m----\u003e 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mdeliravision\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfakedata\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mClassificationFakeData\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n\u001b[0;32m 3\u001b[0m \u001b[0mimg_size\u001b[0m\u001b[1;33m\u003d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m224\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m224\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m num_classes\u003d10)\n\u001b[0;32m 5\u001b[0m dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named \u0027deliravision\u0027" ] } ], "source": [ "from deliravision.data.fakedata import ClassificationFakeData\n", "dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n", " img_size\u003d(3, 32, 43), \n", " num_classes\u003d10)\n", "dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10,\n", " rng_offset\u003d10001\n", " )" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": "from batchgenerators.transforms import RandomCropTransform, \\\n ContrastAugmentationTransform, Compose\nfrom batchgenerators.transforms.spatial_transforms import ResizeTransform\nfrom batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n\ntransforms \u003d Compose([\n RandomCropTransform(24), # Perform Random Crops of Size 24 x 24 pixels\n ResizeTransform(32), # Resample these crops back to 32 x 32 pixels\n ContrastAugmentationTransform(), # randomly adjust contrast\n MeanStdNormalizationTransform(mean\u003d[0.5], std\u003d[0.5])]) \n\n" }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train \u003d DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dRandomSampler,\n", " n_process_augmentation\u003d4)\n", "\n", "manager_val \u003d DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dSequentialSampler,\n", " n_process_augmentation\u003d4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Model\n", "\n", "After we have done that, we can specify our model: We will use a smaller version of a [VGG-Network](https://arxiv.org/pdf/1409.1556.pdf) in this case. We will use more convolutions to reduce the feature dimensionality and reduce the number of units in the linear layers to save up memory (and we only have to deal with 10 classes, not the 1000 imagenet classes)." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.models import AbstractTfEagerNetwork\n", "import tensorflow as tf\n", "import numpy as np\n", "\n", "class SmallVGGTfEager(AbstractTfEagerNetwork):\n", " def __init__(self, in_channels, num_classes, data_format\u003d\"channels_last\"):\n", " if data_format \u003d\u003d \"channels_last\":\n", " input_shape \u003d (32, 32, 3)\n", " else:\n", " input_shape \u003d (3, 32, 32)\n", " super().__init__(data_format\u003ddata_format)\n", " \n", " self.model \u003d tf.keras.models.Sequential(\n", " tf.keras.layers.Conv2d(in_channels, 64, 3, padding\u003d1, input_shape\u003dinput_shape), # 32, 32\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(2), # 16 x 16\n", " tf.keras.layers.Conv2d(128, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(2), # 8 x 8\n", " tf.keras.layers.Conv2d(256, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(2), # 4 x 4\n", " tf.keras.layers.Conv2d(512, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(), # 2 x 2\n", " tf.keras.layers.Conv2d(512, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(), # 1 x 1\n", " tf.keras.layers.Flatten(),\n", " tf.keras.layers.Dense(num_classes),\n", " )\n", " \n", " def call(self, x: tf.Tensor):\n", " return {\"pred\": self.model(x)}\n", " \n", " @staticmethod\n", " def prepare_batch(data_dict, input_device, output_device):\n", " with tf.device(input_device):\n", " return_dict \u003d {\"data\": tf.convert.to.tensor(\n", " batch[\"data\"].astype(np.float32))}\n", " \n", " with tf.device(output_device):\n", " for key, vals in batch.items():\n", " if key \u003d\u003d \"data\": \n", " continue\n", " return_dict[key] \u003d tf.convert_to_tensor(\n", " vals.astype(np.float32))\n", "\n", " return return_dict\n", " \n", " @staticmethod\n", " def closure(model, data_dict: dict, optimizers: dict, losses: dict,\n", " fold\u003d0, **kwargs):\n", "\n", " loss_vals \u003d {}\n", " total_loss \u003d 0\n", "\n", " # calculate loss with graph created by gradient taping\n", " with tf.GradientTape() as tape:\n", " preds \u003d model(data_dict[\"data\"])\n", " total_loss \u003d None\n", " for k, loss_fn in losses.items():\n", " _loss_val \u003d loss_fn(preds[\"pred\"],\n", " data_dict[\"label\"])\n", " loss_vals[k] \u003d _loss_val.numpy()\n", " if total_loss is None:\n", " total_loss \u003d _loss_val\n", " else:\n", " total_loss +\u003d _loss_val\n", " \n", " return loss_vals, preds\n", " \n", " " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "So let\u0027s evisit, what we have just done.\n", "\n", "In `delira` all networks must be derived from `delira.models.AbstractNetwork`. For each backend there is a class derived from this class, handling some backend-specific function calls and registrations. For the `Tensorflow Eager` Backend this class is `AbstractTfEagerNetwork` and all TensorFlow Eager Execution Networks should be derived from it.\n", "\n", "First we defined the network itself (this is the part simply concatenating the layers into a sequential model). Next, we defined the logic to apply, when we want to predict from the model (this is the `call` method).\n", "\n", "So far this was plain `TensorFlow`. The `prepare_batch` function is not plain TF anymore, but allows us to ensure the data is in the correct shape, has the correct data-type and lies on the correct device. The function above is the standard `prepare_batch` function, which is also implemented in the `AbstractTfEagerNetwork` and just re-implemented here for the sake of completeness.\n", "\n", "Same goes for the `closure` function. This function defines the update rule for our parameters (and how to calculate the losses). These funcitons are good to go for many simple networks but can be overwritten for customization when training more complex networks.\n", "\n", "\n", "## Training\n", "Now that we have defined our network, we can finally specify our experiment and run it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import TfEagerExperiment\n", "from delira.training.train_utils import create_tf_eager_optims_default\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment \u003d PyTorchExperiment(params, SmallVGGTfEager,\n", " name\u003d\"ClassificationExample\",\n", " save_path\u003d\"./tmp/delira_Experiments\",\n", " optim_builder\u003dcreate_tf_eager_optims_default,\n", " key_mapping\u003d{\"x\": \"data\"}\n", " gpu_ids\u003d[0])\n", "experiment.save()\n", "\n", "model \u003d experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Congratulations, you have now trained your first Classification Model using `delira`, we will now predict a few samples from the testset to show, that the networks predictions are valid (for now, this is done manually, but we also have a `Predictor` class to automate stuff like this):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import numpy as np\n", "from tqdm.auto import tqdm # utility for progress bars\n", "import tensorflow as tf\n", "\n", "device \u003d \"/cpu:0\"\n", "preds, labels \u003d [], []\n", "\n", "with tf.device(device):\n", " for i in tqdm(range(len(dataset_val))):\n", " img \u003d dataset_val[i][\"data\"] # get image from current batch\n", " img_tensor \u003d tf.convert_to_tensor(img[None, ...].astype(np.float)) # create a tensor from image, push it to device and add batch dimension\n", " pred_tensor \u003d model(img_tensor) # feed it through the network\n", " pred \u003d pred_tensor.argmax(1).item() # get index with maximum class confidence\n", " label \u003d np.asscalar(dataset_val[i][\"label\"]) # get label from batch\n", " if i % 1000 \u003d\u003d 0:\n", " print(\"Prediction: %d \\t label: %d\" % (pred, label)) # print result\n", " preds.append(pred)\n", " labels.append(label)\n", "\n", "# calculate accuracy\n", "accuracy \u003d (np.asarray(preds) \u003d\u003d np.asarray(labels)).sum() / len(preds)\n", "print(\"Accuracy: %.3f\" % accuracy)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/classification_examples/tf_graph.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Classification with Delira and TensorFlow Graph Execution- A very short introduction\n", "*Author: Justus Schock* \n", "\n", "*Date: 31.07.2019*\n", "\n", "This Example shows how to set up a basic classification model and experiment using TensorFlow\u0027s Graph Execution Mode.\n", "\n", "Let\u0027s first setup the essential hyperparameters. We will use `delira`\u0027s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:516: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint8 \u003d np.dtype([(\"qint8\", np.int8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:517: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint8 \u003d np.dtype([(\"quint8\", np.uint8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:518: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint16 \u003d np.dtype([(\"qint16\", np.int16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:519: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint16 \u003d np.dtype([(\"quint16\", np.uint16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:520: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint32 \u003d np.dtype([(\"qint32\", np.int32, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorflow\\python\\framework\\dtypes.py:525: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " np_resource \u003d np.dtype([(\"resource\", np.ubyte, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:541: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint8 \u003d np.dtype([(\"qint8\", np.int8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:542: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint8 \u003d np.dtype([(\"quint8\", np.uint8, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:543: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint16 \u003d np.dtype([(\"qint16\", np.int16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:544: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_quint16 \u003d np.dtype([(\"quint16\", np.uint16, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:545: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " _np_qint32 \u003d np.dtype([(\"qint32\", np.int32, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\tensorboard\\compat\\tensorflow_stub\\dtypes.py:550: FutureWarning: Passing (type, 1) or \u00271type\u0027 as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / \u0027(1,)type\u0027.\n", " np_resource \u003d np.dtype([(\"resource\", np.ubyte, 1)])\n", "c:\\users\\jsc7rng\\appdata\\local\\conda\\conda\\envs\\delira-dev\\lib\\site-packages\\pywt\\_utils.py:6: DeprecationWarning: Using or importing the ABCs from \u0027collections\u0027 instead of from \u0027collections.abc\u0027 is deprecated, and in 3.8 it will stop working\n", " from collections import Iterable\n", "WARNING: Logging before flag parsing goes to stderr.\n", "W0731 13:38:30.713174 21496 deprecation_wrapper.py:119] From c:\\users\\jsc7rng\\downloads\\delira\\delira\\models\\backends\\tf_eager\\abstract_network.py:113: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", "\n", "W0731 13:38:30.727135 21496 deprecation_wrapper.py:119] From c:\\users\\jsc7rng\\downloads\\delira\\delira\\models\\backends\\tf_graph\\abstract_network.py:20: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n", "\n" ] } ], "source": [ "logger \u003d None\n", "import tensorflow as tf\n", "tf.disable_eager_execution()\n", "from delira.training import Parameters\n", "params \u003d Parameters(fixed_params\u003d{\n", " \"model\": {\n", " \"in_channels\": 1, \n", " \"n_outputs\": 10\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": tf.train.AdamOptimizer, # optimization algorithm to use\n", " \"optimizer_params\": {\u0027lr\u0027: 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"L1\": tf.losses.absolute_difference}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `L1-Loss` will be calculated for each batch. Since this is just a toy example, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Tensorboard`. Per default the logging directory will be the same as our experiment directory." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "\n", "## Data Preparation\n", "### Loading\n", "Next we will create some fake data. For this we use the `ClassificationFakeData`-Dataset, which is already implemented in `deliravision`. To avoid getting the exact same data from both datasets, we use a random offset." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named \u0027deliravision\u0027", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u003cipython-input-2-c638229a3dc2\u003e\u001b[0m in \u001b[0;36m\u003cmodule\u003e\u001b[1;34m\u001b[0m\n\u001b[1;32m----\u003e 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mdeliravision\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfakedata\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mClassificationFakeData\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n\u001b[0;32m 3\u001b[0m \u001b[0mimg_size\u001b[0m\u001b[1;33m\u003d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m224\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m224\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m num_classes\u003d10)\n\u001b[0;32m 5\u001b[0m dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named \u0027deliravision\u0027" ] } ], "source": [ "from deliravision.data.fakedata import ClassificationFakeData\n", "dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10)\n", "dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10,\n", " rng_offset\u003d10001\n", " )" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": "from batchgenerators.transforms import RandomCropTransform, \\\n ContrastAugmentationTransform, Compose\nfrom batchgenerators.transforms.spatial_transforms import ResizeTransform\nfrom batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n\ntransforms \u003d Compose([\n RandomCropTransform(24), # Perform Random Crops of Size 24 x 24 pixels\n ResizeTransform(32), # Resample these crops back to 32 x 32 pixels\n ContrastAugmentationTransform(), # randomly adjust contrast\n MeanStdNormalizationTransform(mean\u003d[0.5], std\u003d[0.5])]) \n\n" }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train \u003d DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dRandomSampler,\n", " n_process_augmentation\u003d4)\n", "\n", "manager_val \u003d DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dSequentialSampler,\n", " n_process_augmentation\u003d4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Model\n", "\n", "After we have done that, we can specify our model: We will use a smaller version of a [VGG-Network](https://arxiv.org/pdf/1409.1556.pdf) in this case. We will use more convolutions to reduce the feature dimensionality and reduce the number of units in the linear layers to save up memory (and we only have to deal with 10 classes, not the 1000 imagenet classes)." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.models import AbstractTfGraphNetwork\n", "import tensorflow as tf\n", "import numpy as np\n", "\n", "class SmallVGGTfEager(AbstractTfGraphNetwork):\n", " def __init__(self, in_channels, num_classes, data_format\u003d\"channels_last\"):\n", " if data_format \u003d\u003d \"channels_last\":\n", " input_shape \u003d (32, 32, 3)\n", " else:\n", " input_shape \u003d (3, 32, 32)\n", " super().__init__()\n", " \n", " self.model \u003d tf.keras.models.Sequential(\n", " tf.keras.layers.Conv2d(in_channels, 64, 3, padding\u003d1, input_shape\u003dinput_shape), # 32, 32\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(2), # 16 x 16\n", " tf.keras.layers.Conv2d(128, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(2), # 8 x 8\n", " tf.keras.layers.Conv2d(256, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(2), # 4 x 4\n", " tf.keras.layers.Conv2d(512, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(), # 2 x 2\n", " tf.keras.layers.Conv2d(512, 3, padding\u003d1),\n", " tf.keras.layers.ReLU(),\n", " tf.keras.layers.MaxPool2d(), # 1 x 1\n", " tf.keras.layers.Flatten(),\n", " tf.keras.layers.Dense(num_classes),\n", " )\n", " \n", " # create computation graph\n", " data \u003d tf.placeholder(shape\u003d[None, 32], dtype\u003dtf.float32)\n", " labels \u003d tf.placeholder_with_default(\n", " tf.zeros([tf.shape(data)[0], 1]), shape\u003d[None, 1])\n", "\n", " preds_train \u003d self.model(data)\n", " preds_eval \u003d self.model(data)\n", "\n", " self.inputs[\"data\"] \u003d data\n", " self.inputs[\"label\"] \u003d labels\n", " self.outputs_train[\"pred\"] \u003d preds_train\n", " self.outputs_eval[\"pred\"] \u003d preds_eval\n", " \n", " @staticmethod\n", " def prepare_batch(data_dict, input_device, output_device):\n", " with tf.device(input_device):\n", " return_dict \u003d {\"data\": tf.convert.to.tensor(\n", " batch[\"data\"].astype(np.float32))}\n", " \n", " with tf.device(output_device):\n", " for key, vals in batch.items():\n", " if key \u003d\u003d \"data\": \n", " continue\n", " return_dict[key] \u003d tf.convert_to_tensor(\n", " vals.astype(np.float32))\n", "\n", " return return_dict\n", " \n", " @staticmethod\n", " def closure(model, data_dict: dict, optimizers: dict, losses: dict,\n", " fold\u003d0, **kwargs):\n", "\n", " outputs \u003d model.run(data\u003dinputs, label\u003ddata_dict[\u0027label\u0027])\n", " preds \u003d outputs[\u0027pred\u0027]\n", " loss_vals \u003d outputs[\u0027losses\u0027]\n", " \n", " return loss_vals, preds\n", " \n", " " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "So let\u0027s evisit, what we have just done.\n", "\n", "In `delira` all networks must be derived from `delira.models.AbstractNetwork`. For each backend there is a class derived from this class, handling some backend-specific function calls and registrations. For the `Tensorflow Graph` Backend this class is `AbstractTfGraphNetwork` and all TensorFlow Eager Execution Networks should be derived from it.\n", "\n", "First we defined the network itself (this is the part simply concatenating the layers into a sequential model). Next, we defined the logic to apply, when we want to predict from the model (this is the `call` method).\n", "\n", "So far this was plain `TensorFlow`. The `prepare_batch` function is not plain TF anymore, but allows us to ensure the data is in the correct shape, has the correct data-type and lies on the correct device. The function above is the standard `prepare_batch` function, which is also implemented in the `AbstractTfGraphNetwork` and just re-implemented here for the sake of completeness.\n", "\n", "Same goes for the `closure` function. This function defines the update rule for our parameters (and how to calculate the losses). These funcitons are good to go for many simple networks but can be overwritten for customization when training more complex networks.\n", "\n", "\n", "## Training\n", "Now that we have defined our network, we can finally specify our experiment and run it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import TfGraphExperiment\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment \u003d TfGraphExperiment(params, SmallVGGTfGraph,\n", " name\u003d\"ClassificationExample\",\n", " save_path\u003d\"./tmp/delira_Experiments\",\n", " key_mapping\u003d{\"x\": \"data\"}\n", " gpu_ids\u003d[0])\n", "experiment.save()\n", "\n", "model \u003d experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Congratulations, you have now trained your first Classification Model using `delira`, we will now predict a few samples from the testset to show, that the networks predictions are valid (for now, this is done manually, but we also have a `Predictor` class to automate stuff like this):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import numpy as np\n", "from tqdm.auto import tqdm # utility for progress bars\n", "import tensorflow as tf\n", "\n", "device \u003d \"/cpu:0\"\n", "preds, labels \u003d [], []\n", "\n", "with tf.device(device):\n", " for i in tqdm(range(len(dataset_val))):\n", " img \u003d dataset_val[i][\"data\"] # get image from current batch\n", " img_tensor \u003d tf.convert_to_tensor(img[None, ...].astype(np.float)) # create a tensor from image, push it to device and add batch dimension\n", " pred_tensor \u003d model(img_tensor) # feed it through the network\n", " pred \u003d pred_tensor.argmax(1).item() # get index with maximum class confidence\n", " label \u003d np.asscalar(dataset_val[i][\"label\"]) # get label from batch\n", " if i % 1000 \u003d\u003d 0:\n", " print(\"Prediction: %d \\t label: %d\" % (pred, label)) # print result\n", " preds.append(pred)\n", " labels.append(label)\n", "\n", "# calculate accuracy\n", "accuracy \u003d (np.asarray(preds) \u003d\u003d np.asarray(labels)).sum() / len(preds)\n", "print(\"Accuracy: %.3f\" % accuracy)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/classification_examples/torchscript.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Classification with Delira and TorchScript - A very short introduction\n", "*Author: Justus Schock* \n", "\n", "*Date: 04.12.2018*\n", "\n", "This Example shows how to set up a basic classification `TorchScript` model and experiment.\n", "`TorchScript` is basically `PyTorch` with a static computation graph. Thus, we require only minor changes compared to the `PyTorch`-example. These changes will be highlighted.\n", "\n", "Let\u0027s first setup the essential hyperparameters. We will use `delira`\u0027s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "logger \u003d None\n", "import torch\n", "from delira.training import Parameters\n", "params \u003d Parameters(fixed_params\u003d{\n", " \"model\": {\n", " \"in_channels\": 1, \n", " \"n_outputs\": 10\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": torch.optim.Adam, # optimization algorithm to use\n", " \"optimizer_params\": {\u0027lr\u0027: 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"CE\": torch.nn.CrossEntropyLoss()}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `CrossEntropyLoss` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Tensorboard`. Per default the logging directory will be the same as our experiment directory." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "\n", "## Data Preparation\n", "### Loading\n", "Next we will create some fake data. For this we use the `ClassificationFakeData`-Dataset, which is already implemented in `deliravision`. To avoid getting the exact same data from both datasets, we use a random offset." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from deliravision.data.fakedata import ClassificationFakeData\n", "dataset_train \u003d ClassificationFakeData(num_samples\u003d10000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10)\n", "dataset_val \u003d ClassificationFakeData(num_samples\u003d1000, \n", " img_size\u003d(3, 32, 32), \n", " num_classes\u003d10,\n", " rng_offset\u003d10001\n", " )" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": "from batchgenerators.transforms import RandomCropTransform, \\\n ContrastAugmentationTransform, Compose\nfrom batchgenerators.transforms.spatial_transforms import ResizeTransform\nfrom batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n\ntransforms \u003d Compose([\n RandomCropTransform(24), # Perform Random Crops of Size 24 x 24 pixels\n ResizeTransform(32), # Resample these crops back to 32 x 32 pixels\n ContrastAugmentationTransform(), # randomly adjust contrast\n MeanStdNormalizationTransform(mean\u003d[0.5], std\u003d[0.5])]) \n\n" }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": false } }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train \u003d DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dRandomSampler,\n", " n_process_augmentation\u003d4)\n", "\n", "manager_val \u003d DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms\u003dtransforms,\n", " sampler_cls\u003dSequentialSampler,\n", " n_process_augmentation\u003d4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Model\n", "\n", "After we have done that, we can specify our model: We will use a smaller version of a [VGG11](https://arxiv.org/pdf/1409.1556.pdf) in this case. We will use more convolutions to reduce the feature dimensionality and reduce the number of units in the linear layers to save up memory (and we only have to deal with 10 classes, not the 1000 imagenet classes)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.models import AbstractTorchScriptNetwork\n", "import torch\n", "\n", "class Flatten(torch.nn.Module):\n", " \n", " def forward(self, x):\n", " return x.view(x.size(0), -1)\n", "\n", "class VGG11TorchScript(AbstractTorchScriptNetwork):\n", " def __init__(self, in_channels, num_classes):\n", " super().__init__()\n", " \n", " self.model \u003d torch.nn.Sequential(\n", " torch.nn.Conv2d(in_channels, 64, 3, padding\u003d1), # 32 x 32\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(2), # 16 x 16\n", " torch.nn.Conv2d(64, 128, 3, padding\u003d1),\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(2), # 8 x 8\n", " torch.nn.Conv2d(128, 256, 3, padding\u003d1), # 4 x 4\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(2), # 4 x 4\n", " torch.nn.Conv2d(256, 512, 3, padding\u003d1),\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(), # 2 x 2\n", " torch.nn.Conv2d(512, 512, 3, padding\u003d1),\n", " torch.nn.ReLU(),\n", " torch.nn.MaxPool2d(), # 1 x 1\n", " Flatten(),\n", " torch.nn.Linear(1*1*512, num_classes),\n", " )\n", " \n", " @torch.jit.script_method \n", " def forward(self, x: torch.Tensor):\n", " return {\"pred\": self.model(x)}\n", " \n", " @staticmethod\n", " def prepare_batch(data_dict, input_device, output_device):\n", " return_dict \u003d {\"data\": torch.from_numpy(batch[\"data\"]).to(\n", " input_device).to(torch.float)}\n", "\n", " for key, vals in batch.items():\n", " if key \u003d\u003d \"data\": \n", " continue\n", " return_dict[key] \u003d torch.from_numpy(vals).to(output_device).to(\n", " torch.float)\n", "\n", " return return_dict\n", " \n", " @staticmethod\n", " def closure(model, data_dict: dict, optimizers: dict, losses: dict,\n", " fold\u003d0, **kwargs):\n", "\n", " loss_vals \u003d {}\n", " total_loss \u003d 0\n", "\n", "\n", " # predict\n", " inputs \u003d data_dict[\"data\"]\n", " preds \u003d model(inputs)\n", "\n", " # calculate losses\n", " for key, crit_fn in losses.items():\n", " _loss_val \u003d crit_fn(preds[\"pred\"], data_dict[\"label\"])\n", " loss_vals[key] \u003d _loss_val.item()\n", " total_loss +\u003d _loss_val\n", "\n", " optimizers[\u0027default\u0027].zero_grad()\n", " total_loss.backward()\n", " optimizers[\u0027default\u0027].step()\n", "\n", " return loss_vals, {k: v.detach()\n", " for k, v in preds.items()}\n", " \n", " " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "So let\u0027s evisit, what we have just done.\n", "\n", "In `delira` all networks must be derived from `delira.models.AbstractNetwork`. For each backend there is a class derived from this class, handling some backend-specific function calls and registrations. For the `TorchScript` Backend this class is `AbstractTorchScriptNetwork` and all TorchScript Networks should be derived from it.\n", "\n", "\u003e **Note:** This is different from `PyTorch`, where the base class has to be `AbstractPyTorchNetwork`\n", "\n", "First we defined the network itself (this is the part simply concatenating the layers into a sequential model). Next, we defined the logic to apply, when we want to predict from the model (this is the `forward` method).\n", "\n", "\u003e **Note:** In `TorchScript` all methods adding options to the computation graph must be decorated with `torch.jit.script_method`. See [here](https://pytorch.org/docs/stable/jit.html#creating-torchscript-code) for more details\n", "\n", "So far this was plain `TorchScript`. The `prepare_batch` function is not plain TorchScript anymore, but allows us to ensure the data is in the correct shape, has the correct data-type and lies on the correct device. The function above is the standard `prepare_batch` function, which is also implemented in the `AbstractTorchScriptNetwork` and just re-implemented here for the sake of completeness.\n", "\n", "Same goes for the `closure` function. This function defines the update rule for our parameters (and how to calculate the losses). These funcitons are good to go for many simple networks but can be overwritten for customization when training more complex networks.\n", "\n", "## Training\n", "Now that we have defined our network, we can finally specify our experiment and run it." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import TorchScriptExperiment\n", "from delira.training.train_utils import create_optims_default_pytorch\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment \u003d TorchScriptExperiment(params, SmallTorchScript,\n", " name\u003d\"ClassificationExample\",\n", " save_path\u003d\"./tmp/delira_Experiments\",\n", " optim_builder\u003dcreate_optims_default_pytorch,\n", " key_mapping\u003d{\"x\": \"data\"}\n", " gpu_ids\u003d[0])\n", "experiment.save()\n", "\n", "model \u003d experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Congratulations, you have now trained your first Classification Model using `delira`, we will now predict a few samples from the testset to show, that the networks predictions are valid:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import numpy as np\n", "from tqdm.auto import tqdm # utility for progress bars\n", "\n", "device \u003d torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # set device (use GPU if available)\n", "model \u003d model.to(device) # push model to device\n", "preds, labels \u003d [], []\n", "\n", "with torch.no_grad():\n", " for i in tqdm(range(len(dataset_val))):\n", " img \u003d dataset_val[i][\"data\"] # get image from current batch\n", " img_tensor \u003d torch.from_numpy(img).unsqueeze(0).to(device).to(torch.float) # create a tensor from image, push it to device and add batch dimension\n", " pred_tensor \u003d model(img_tensor) # feed it through the network\n", " pred \u003d pred_tensor.argmax(1).item() # get index with maximum class confidence\n", " label \u003d np.asscalar(dataset_val[i][\"label\"]) # get label from batch\n", " if i % 1000 \u003d\u003d 0:\n", " print(\"Prediction: %d \\t label: %d\" % (pred, label)) # print result\n", " preds.append(pred)\n", " labels.append(label)\n", " \n", "# calculate accuracy\n", "accuracy \u003d (np.asarray(preds) \u003d\u003d np.asarray(labels)).sum() / len(preds)\n", "print(\"Accuracy: %.3f\" % accuracy)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/custom_backend.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# How To: Integrate your own Computation Backend\n", "\n", "*Author: Justus Schock*\n", "\n", "*Date: 15.05.2019*\n", "\n", "This howto will take you on a trip through the `delira` internals, while we will see, how to add a custom computation backend on the examplaric case of the `torch.jit` or `TorchScript` backend\n", "\n", "## Model Definitions\n", "In order to implement a network, we will first have to define the network itself. In `delira` there is a single backend-specific implementation of an abstract network class for each of the backends. These interface classes are all based on the `AbstractNetwork`-class, defining the major API.\n", "\n", "So let's start having a look at this class to see, what we will have to implement for our own backend.\n", "\n", "Of course we will have to implement an `__init__` defining our class. The `__init__` of `AbstractNetwork` (which should be called during our the `__init__` of our baseclass) accepts a number of kwargs and simply registers them to be `init_kwargs`, so there is nothing we have to take care of.\n", "\n", "The next function to inspect is the `__call__` function, which makes the class callable and the docstrings indicate, that it should take care of our model's forward-pass.\n", "\n", "After the `__call__` we now have the `closure` function, which defines a single training step (including, but not limited to, forward-pass, calculation of losses and train-metrics, backward-pass and optimization).\n", "\n", "The last method to implement is the `prepare_batch` function which converts the input to a suitable format and the correct data-type and device.\n", "\n", "### TorchScript Limitations\n", "Since we want to implement an abstract network class for this specific backend, we should have a look on how to generally implement models in this backend.\n", "\n", "According the the [PyTorch docs](https://pytorch.org/docs/stable/jit.html) this works as follows:\n", "\n", "> You can write TorchScript code directly using Python syntax. You do this using the `torch.jit.script` decorator (for functions) or `torch.jit.script_method` decorator (for methods) on subclasses of `ScriptModule`. With this decorator the body of the annotated function is directly translated into TorchScript. TorchScript itself is a subset of the Python language, so not all features in Python work, but we provide enough functionality to compute on tensors and do control-dependent operations.\n", "\n", "Since our use-case is to implement the interface class for networks, we want to use the way of subclassing `torch.jit.ScriptModule`, implement it's `forward` and use the `torch.jit.script_method` decorator on it.\n", "\n", "The example given in the very same docs for this case is:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0.4997, 0.2955, 0.1588, 0.1873, 0.4753], grad_fn=)" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "class MyScriptModule(torch.jit.ScriptModule):\n", " def __init__(self, N, M):\n", " super().__init__()\n", " self.weight = torch.nn.Parameter(torch.rand(N, M))\n", "\n", " @torch.jit.script_method\n", " def forward(self, input):\n", " return self.weight.mv(input)\n", " \n", "my_script_module = MyScriptModule(5, 3)\n", "input_tensor = torch.rand(3)\n", "my_script_module(input_tensor)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merging TorchScript into our Abstract Class\n", "\n", "This little example gives us a few things, we have to do for a successful definition of our base class:\n", "\n", "**1.)** Our class has to subclass both, the `AbstractNetwork` and the `torch.jit.ScriptModule` classes.\n", "\n", "**2.)** We need to implement a `forward` method, which takes care of the forward-pass (as it's name indicates).\n", "\n", "**3.)** We don't have to take care of the backward-pass (thanks to `PyTorch`'s and `TorchScript`'s AutoGrad (which is a framework for automatic differentiation).\n", "\n", "**4.)** Since `torch.jit.ScriptModule` is callable (seen in the example), it already implements a `__call__` method and we may simply use this one.\n", "\n", "**5.)** The `closure` is completely network-dependent and thus has to remain an abstract method here.\n", "\n", "**6.)** The `prepare_batch` function also depends on the combination of network, inputs and loss functions to use, but we can at least give a prototype of such an function, which handles the devices correctly and converts everything to `float`\n", "\n", "\n", "### Actual Implementation\n", "\n", "Now, let's start with the actual implementation and do one function by another and keep the things in mind, we just discovered.\n", "\n", "#### Class Signature and `__init__`-Method\n", "To subclass both networks, we cannot use the simple `super().__init__` approach, because we have to init both parent classes, so we do \n", "\n", "\n", "```python\n", "\n", " class AbstractTorchScriptNetwork(AbstractNetwork, torch.jit.ScriptModule):\n", "\n", " @abc.abstractmethod\n", " def __init__(self, optimize=True, **kwargs):\n", " \"\"\"\n", "\n", " Parameters\n", " ----------\n", " optimize : bool\n", " whether to optimize the network graph or not; default: True\n", " **kwargs :\n", " additional keyword arguments (passed to :class:`AbstractNetwork`)\n", " \"\"\"\n", " torch.jit.ScriptModule.__init__(self, optimize=optimize)\n", " AbstractNetwork.__init__(self, **kwargs)\n", " \n", "```\n", "instead. This ensures all parent classes to be initialized correctly.\n", "\n", "#### `__call__`-Method\n", "As mentioned above, the `__call__` method is very easy to implement, because we can simply use the implementation of our `TorchScript` base class like this:\n", "\n", "```python\n", "\n", " def __call__(self, *args, **kwargs):\n", " \"\"\"\n", " Calls Forward method\n", "\n", " Parameters\n", " ----------\n", " *args :\n", " positional arguments (passed to `forward`)\n", " **kwargs :\n", " keyword arguments (passed to `forward`)\n", "\n", " Returns\n", " -------\n", " Any\n", " result: module results of arbitrary type and number\n", "\n", " \"\"\"\n", " return torch.jit.ScriptModule.__call__(self, *args, **kwargs)\n", " \n", "```\n", "\n", "This also ensures, that we can pass an arbitrary number or positional and keyword arguments of arbitrary types to it (which are all passed to the `forward`-function). The advantage over directly calling the `forward` method here, is that the `ScriptModule.__call__` already does the handling of [forward-pre-hooks](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.register_forward_pre_hook), [forward-hooks](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.register_forward_hook) and [backward-hooks](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.register_backward_hook).\n", "\n", "#### `closure`-Method\n", "Since this method is highly model-dependant, we just don't implement it, which forces the user to implement it (since it is marked as an `abstractmethod` in `AbstractExperiment`).\n", "\n", "#### `prepare_batch`-Method\n", "The above mentioned prototype of pushing everything to the correct device and convert it to float looks like this:\n", "\n", "```python\n", "\n", " @staticmethod\n", " def prepare_batch(batch: dict, input_device, output_device):\n", " \"\"\"\n", " Helper Function to prepare Network Inputs and Labels (convert them to\n", " correct type and shape and push them to correct devices)\n", "\n", " Parameters\n", " ----------\n", " batch : dict\n", " dictionary containing all the data\n", " input_device : torch.device\n", " device for network inputs\n", " output_device : torch.device\n", " device for network outputs\n", "\n", " Returns\n", " -------\n", " dict\n", " dictionary containing data in correct type and shape and on correct\n", " device\n", "\n", " \"\"\"\n", " return_dict = {\"data\": torch.from_numpy(batch.pop(\"data\")).to(\n", " input_device).to(torch.float)}\n", "\n", " for key, vals in batch.items():\n", " return_dict[key] = torch.from_numpy(vals).to(output_device).to(\n", " torch.float)\n", "\n", " return return_dict\n", "\n", "```\n", "\n", "Since we don't want to use any of the model's attributes here (and for conformity with the `AbstractNetwork` class), this method is defined as `staticmethod`, meaning it is class-bound, not instance-bound. The `closure` method has to be a `staticmethod` too.\n", "\n", "\n", "#### `forward`-Method\n", "The only thing left now, is the `forward` method, which is internally called by `ScriptModule.__call__`. The bad news is: We currently can't implement it. Subclassing a `ScriptModule` to overwrite a function decorated with `torch.jit.script_method` is not (yet) supported, but will be soon, once [this PR](https://github.com/pytorch/pytorch/pull/20503) is merged and released.\n", "\n", "For now: you simply have to implement this method in your own network despite the missing of an abstract interface-method.\n", "\n", "#### Putting it all together\n", "If we combine all the function implementations to one class, it looks like this:\n", "\n", "```python\n", "\n", " class AbstractTorchScriptNetwork(AbstractNetwork, torch.jit.ScriptModule):\n", "\n", " \"\"\"\n", " Abstract Interface Class for TorchScript Networks. For more information\n", " have a look at https://pytorch.org/docs/stable/jit.html#torchscript\n", "\n", " Warnings\n", " --------\n", " In addition to the here defined API, a forward function must be\n", " implemented and decorated with ``@torch.jit.script_method``\n", "\n", " \"\"\"\n", " @abc.abstractmethod\n", " def __init__(self, optimize=True, **kwargs):\n", " \"\"\"\n", "\n", " Parameters\n", " ----------\n", " optimize : bool\n", " whether to optimize the network graph or not; default: True\n", " **kwargs :\n", " additional keyword arguments (passed to :class:`AbstractNetwork`)\n", " \"\"\"\n", " torch.jit.ScriptModule.__init__(self, optimize=optimize)\n", " AbstractNetwork.__init__(self, **kwargs)\n", "\n", " def __call__(self, *args, **kwargs):\n", " \"\"\"\n", " Calls Forward method\n", "\n", " Parameters\n", " ----------\n", " *args :\n", " positional arguments (passed to `forward`)\n", " **kwargs :\n", " keyword arguments (passed to `forward`)\n", "\n", " Returns\n", " -------\n", " Any\n", " result: module results of arbitrary type and number\n", "\n", " \"\"\"\n", " return torch.jit.ScriptModule.__call__(self, *args, **kwargs)\n", "\n", " @staticmethod\n", " def prepare_batch(batch: dict, input_device, output_device):\n", " \"\"\"\n", " Helper Function to prepare Network Inputs and Labels (convert them to\n", " correct type and shape and push them to correct devices)\n", "\n", " Parameters\n", " ----------\n", " batch : dict\n", " dictionary containing all the data\n", " input_device : torch.device\n", " device for network inputs\n", " output_device : torch.device\n", " device for network outputs\n", "\n", " Returns\n", " -------\n", " dict\n", " dictionary containing data in correct type and shape and on correct\n", " device\n", "\n", " \"\"\"\n", " return_dict = {\"data\": torch.from_numpy(batch.pop(\"data\")).to(\n", " input_device).to(torch.float)}\n", "\n", " for key, vals in batch.items():\n", " return_dict[key] = torch.from_numpy(vals).to(output_device).to(\n", " torch.float)\n", "\n", " return return_dict\n", " \n", "```\n", "\n", "## Saving and loading\n", "Now that we have the ability to implement `delira`-suitable TorchScript models, we want to store them on disk and load them again, so that we don't have to retrain them every time we want to use them. These I/O functions are usually located in `delira.io`. \n", "\n", "### Saving\n", "Our saving function utilizes multiple functions: `torch.jit.save` to simply save the model (including it's graph) and the `save_checkpoint_torch` function implemented for the `PyTorch` backend to store the trainer state, since `TorchScript` allows us to use plain `PyTorch` optimizers.\n", "\n", "The implementation of the function looks like this:\n", "\n", "```python\n", "\n", " def save_checkpoint_torchscript(file: str, model=None, optimizers={},\n", " epoch=None, **kwargs):\n", " \"\"\"\n", " Save current checkpoint to two different files:\n", " 1.) ``file + \"_model.ptj\"``: Will include the state of the model\n", " (including the graph; this is the opposite to\n", " :func:`save_checkpoint`)\n", " 2.) ``file + \"_trainer_state.pt\"``: Will include the states of all\n", " optimizers and the current epoch (if given)\n", "\n", " Parameters\n", " ----------\n", " file : str\n", " filepath the model should be saved to\n", " model : AbstractPyTorchJITNetwork or None\n", " the model which should be saved\n", " if None: empty dict will be saved as state dict\n", " optimizers : dict\n", " dictionary containing all optimizers\n", " epoch : int\n", " current epoch (will also be pickled)\n", "\n", " \"\"\"\n", "\n", " # remove file extension if given\n", " if any([file.endswith(ext) for ext in [\".pth\", \".pt\", \".ptj\"]]):\n", " file = file.rsplit(\".\", 1)[0]\n", "\n", " if isinstance(model, AbstractPyTorchJITNetwork):\n", " torch.jit.save(model, file + \"_model.ptj\")\n", "\n", " if optimizers or epoch is not None:\n", " save_checkpoint_torch(file + \"_trainer_state.pt\", None,\n", " optimizers=optimizers, epoch=epoch, **kwargs)\n", " \n", "```\n", "\n", "### Loading\n", "To load a model, which has been saved to disk by this function we have to revert each part of it. We do this by using `torch.jit.load` for the model (and the graph) and `load_checkpoint_torch` by the `PyTorch` backend.\n", "The actual implementation is given here:\n", "\n", "```python\n", "\n", " def load_checkpoint_torchscript(file: str, **kwargs):\n", " \"\"\"\n", " Loads a saved checkpoint consisting of 2 files\n", " (see :func:`save_checkpoint_jit` for details)\n", "\n", " Parameters\n", " ----------\n", " file : str\n", " filepath to a file containing a saved model\n", " **kwargs:\n", " Additional keyword arguments (passed to torch.load)\n", " Especially \"map_location\" is important to change the device the\n", " state_dict should be loaded to\n", "\n", " Returns\n", " -------\n", " OrderedDict\n", " checkpoint state_dict\n", "\n", " \"\"\"\n", " # remove file extensions\n", " if any([file.endswith(ext) for ext in [\".pth\", \".pt\", \".ptj\"]]):\n", " file = file.rsplit(\".\", 1)[0]\n", "\n", " # load model\n", " if os.path.isfile(file + \".ptj\"):\n", " model_file = file\n", " elif os.path.isfile(file + \"_model.ptj\"):\n", " model_file = file + \"_model.ptj\"\n", " else:\n", " raise ValueError(\"No Model File found for %s\" % file)\n", "\n", " # load trainer state (if possible)\n", " trainer_file = model_file.replace(\"_model.ptj\", \"_trainer_state.pt\")\n", " if os.path.isfile(trainer_file):\n", " trainer_state = load_checkpoint_torch(trainer_file, **kwargs)\n", "\n", " else:\n", " trainer_state = {\"optimizer\": {},\n", " \"epoch\": None}\n", "\n", " trainer_state.update({\"model\": torch.jit.load(model_file)})\n", "\n", " return trainer_state\n", " \n", "```\n", "\n", "\n", "## A Trainer to train\n", "Now, that we can define and save/load our models, we want to train them. Luckily `delira` has already implemented a very modular backend-agnostic trainer (the `BaseNetworkTrainer`) and build upon this a `PyTorchNetworkTrainer`. Since the training process in PyTorch and TorchScript is nearly the same, we can just extend the `PyTorchNetworkTrainer`. Usually one would have to extend the `BaseNetworkTrainer` to provide some backend specific functions (like necessary initializations, optimizer setup, seeding etc.). To see how this is done, you could either have a look at the `PyTorchNetworkTrainer` or the `TfNetworkTrainer` for tensorflow, which are both following this principle. Usually the only stuff to completely change is the loading/saving behavior and the `_setup` function, which defines the backend-specific initialization. Some other functions may have to be extended (by implementing the extension and calling the parent-classes function).\n", "\n", "### Things to change:\n", "\n", "By Subclassing the `PyTorchNetworkTrainer` we have to change the following things:\n", "\n", "* The trainer's default arguments\n", "\n", "* The behavior for trying to resume a previous training\n", "\n", "* The saving, loading and updating behavior\n", "\n", "We will access this one by one:\n", "\n", "#### The Default Arguments\n", "\n", "We want to use `AbstractTorchScriptNetwork`s instead of `AbstractPyTorchNetwork`s here and we have to change the behavior if passing multiple GPUs, because currently Multi-GPU training is not supported by `TorchScript`.\n", "\n", "To do this: we implement the functions `__init__`, apply our changes and forward these changes to the call of the base-classes `__init__` like this (omitted docstrings for the sake of shortness):\n", "\n", "```python\n", "\n", "class TorchScriptNetworkTrainer(PyTorchNetworkTrainer):\n", " def __init__(self,\n", " network: AbstractTorchScriptNetwork,\n", " save_path: str,\n", " key_mapping,\n", " losses=None,\n", " optimizer_cls=None,\n", " optimizer_params={},\n", " train_metrics={},\n", " val_metrics={},\n", " lr_scheduler_cls=None,\n", " lr_scheduler_params={},\n", " gpu_ids=[],\n", " save_freq=1,\n", " optim_fn=create_optims_default,\n", " logging_type=\"tensorboardx\",\n", " logging_kwargs={},\n", " fold=0,\n", " callbacks=[],\n", " start_epoch=1,\n", " metric_keys=None,\n", " convert_batch_to_npy_fn=convert_torch_tensor_to_npy,\n", " criterions=None,\n", " val_freq=1,\n", " **kwargs):\n", " \n", " if len(gpu_ids) > 1:\n", " # only use first GPU due to\n", " # https://github.com/pytorch/pytorch/issues/15421\n", " gpu_ids = [gpu_ids[0]]\n", " logging.warning(\"Multiple GPUs specified. Torch JIT currently \"\n", " \"supports only single-GPU training. \"\n", " \"Switching to use only the first GPU for now...\")\n", "\n", " super().__init__(network=network, save_path=save_path,\n", " key_mapping=key_mapping, losses=losses,\n", " optimizer_cls=optimizer_cls,\n", " optimizer_params=optimizer_params,\n", " train_metrics=train_metrics,\n", " val_metrics=val_metrics,\n", " lr_scheduler_cls=lr_scheduler_cls,\n", " lr_scheduler_params=lr_scheduler_params,\n", " gpu_ids=gpu_ids, save_freq=save_freq,\n", " optim_fn=optim_fn, logging_type=logging_type,\n", " logging_kwargs=logging_kwargs, fold=fold,\n", " callbacks=callbacks,\n", " start_epoch=start_epoch, metric_keys=metric_keys,\n", " convert_batch_to_npy_fn=convert_batch_to_npy_fn,\n", " mixed_precision=False, mixed_precision_kwargs={},\n", " criterions=criterions, val_freq=val_freq, **kwargs\n", " )\n", " \n", "```\n", "\n", "#### Resuming Training\n", "\n", "For resuming the training, we have to completely change the `try_resume_training` function and cannot reuse the parent's implementation of it. Thus, we don't call `super().try_resume_training` here, but completely reimplement it from scratch:\n", "\n", "```python\n", "\n", " def try_resume_training(self):\n", " \"\"\"\n", " Load the latest state of a previous training if possible\n", "\n", " \"\"\"\n", " # Load latest epoch file if available\n", " if os.path.isdir(self.save_path):\n", " # check all files in directory starting with \"checkpoint\" and\n", " # not ending with \"_best.pth\"\n", " files = [x for x in os.listdir(self.save_path)\n", " if os.path.isfile(os.path.join(self.save_path, x))\n", " and x.startswith(\"checkpoint\")\n", " and not x.endswith(\"_best.ptj\")\n", " ]\n", "\n", " # if list is not empty: load previous state\n", " if files:\n", "\n", " latest_epoch = max([\n", " int(x.rsplit(\"_\", 1)[-1].rsplit(\".\", 1)[0])\n", " for x in files])\n", "\n", " latest_state_path = os.path.join(self.save_path,\n", " \"checkpoint_epoch_%d.ptj\"\n", " % latest_epoch)\n", "\n", " # if pth file does not exist, load pt file instead\n", " if not os.path.isfile(latest_state_path):\n", " latest_state_path = latest_state_path[:-1]\n", "\n", " logger.info(\"Attempting to load state from previous \\\n", " training from %s\" % latest_state_path)\n", " try:\n", " self.update_state(latest_state_path)\n", " except KeyError:\n", " logger.warning(\"Previous State could not be loaded, \\\n", " although it exists.Training will be \\\n", " restarted\")\n", "\n", "```\n", "\n", "#### Saving and Loading\n", "Now we need to change the saving and loading behavior. As always we try to reuse as much code as possible to avoid code duplication.\n", "\n", "##### Saving\n", "To save the current training state, we simply call the `save_checkpoint_torchscript` function:\n", "\n", "```python\n", "\n", " def save_state(self, file_name, epoch, **kwargs):\n", " \"\"\"\n", " saves the current state via\n", " :func:`delira.io.torch.save_checkpoint_jit`\n", "\n", " Parameters\n", " ----------\n", " file_name : str\n", " filename to save the state to\n", " epoch : int\n", " current epoch (will be saved for mapping back)\n", " **kwargs :\n", " keyword arguments\n", "\n", " \"\"\"\n", " if file_name.endswith(\".pt\") or file_name.endswith(\".pth\"):\n", " file_name = file_name.rsplit(\".\", 1)[0]\n", "\n", " save_checkpoint_torchscript(file_name, self.module, self.optimizers,\n", " **kwargs)\n", " \n", "```\n", "\n", "##### Loading\n", "\n", "To load the training state, we simply return the state loaded by `load_checkpoint_torchscript`.\n", "Since we don't use any arguments of the trainer itself here, the function is a `staticmethod`:\n", "\n", "```python\n", "\n", " @staticmethod\n", " def load_state(file_name, **kwargs):\n", " \"\"\"\n", " Loads the new state from file via\n", " :func:`delira.io.torch.load_checkpoint:jit`\n", "\n", " Parameters\n", " ----------\n", " file_name : str\n", " the file to load the state from\n", " **kwargs : keyword arguments\n", "\n", " Returns\n", " -------\n", " dict\n", " new state\n", "\n", " \"\"\"\n", " return load_checkpoint_torchscript(file_name, **kwargs)\n", " \n", "```\n", " \n", "##### Updating\n", "\n", "After we loaded the new state, we need to update the trainer's internal state by this new state.\n", "\n", "We do this by directly assigning the model here (since the graph was stored/loaded too) instead of only updating the state_dict and calling the parent-classes method afterwards:\n", " \n", "```python\n", "\n", " def _update_state(self, new_state):\n", " \"\"\"\n", " Update the state from a given new state\n", "\n", " Parameters\n", " ----------\n", " new_state : dict\n", " new state to update internal state from\n", "\n", " Returns\n", " -------\n", " :class:`PyTorchNetworkJITTrainer`\n", " the trainer with a modified state\n", "\n", " \"\"\"\n", " if \"model\" in new_state:\n", " self.module = new_state.pop(\"model\").to(self.input_device)\n", "\n", " return super()._update_state(new_state)\n", "\n", "```\n", " \n", "### A Whole Trainer\n", " \n", "After combining all the changes above, we finally get our new trainer as:\n", " \n", "```python\n", "\n", " class TorchScriptNetworkTrainer(PyTorchNetworkTrainer):\n", " def __init__(self,\n", " network: AbstractTorchScriptNetwork,\n", " save_path: str,\n", " key_mapping,\n", " losses=None,\n", " optimizer_cls=None,\n", " optimizer_params={},\n", " train_metrics={},\n", " val_metrics={},\n", " lr_scheduler_cls=None,\n", " lr_scheduler_params={},\n", " gpu_ids=[],\n", " save_freq=1,\n", " optim_fn=create_optims_default,\n", " logging_type=\"tensorboardx\",\n", " logging_kwargs={},\n", " fold=0,\n", " callbacks=[],\n", " start_epoch=1,\n", " metric_keys=None,\n", " convert_batch_to_npy_fn=convert_torch_tensor_to_npy,\n", " criterions=None,\n", " val_freq=1,\n", " **kwargs):\n", " \"\"\"\n", "\n", " Parameters\n", " ----------\n", " network : :class:`AbstractPyTorchJITNetwork`\n", " the network to train\n", " save_path : str\n", " path to save networks to\n", " key_mapping : dict\n", " a dictionary containing the mapping from the ``data_dict`` to\n", " the actual model's inputs.\n", " E.g. if a model accepts one input named 'x' and the data_dict\n", " contains one entry named 'data' this argument would have to\n", " be ``{'x': 'data'}``\n", " losses : dict\n", " dictionary containing the training losses\n", " optimizer_cls : subclass of tf.train.Optimizer\n", " optimizer class implementing the optimization algorithm of\n", " choice\n", " optimizer_params : dict\n", " keyword arguments passed to optimizer during construction\n", " train_metrics : dict, optional\n", " metrics, which will be evaluated during train phase\n", " (should work on framework's tensor types)\n", " val_metrics : dict, optional\n", " metrics, which will be evaluated during test phase\n", " (should work on numpy arrays)\n", " lr_scheduler_cls : Any\n", " learning rate schedule class: must implement step() method\n", " lr_scheduler_params : dict\n", " keyword arguments passed to lr scheduler during construction\n", " gpu_ids : list\n", " list containing ids of GPUs to use; if empty: use cpu instead\n", " Currently ``torch.jit`` only supports single GPU-Training,\n", " thus only the first GPU will be used if multiple GPUs are passed\n", " save_freq : int\n", " integer specifying how often to save the current model's state.\n", " State is saved every state_freq epochs\n", " optim_fn : function\n", " creates a dictionary containing all necessary optimizers\n", " logging_type : str or callable\n", " the type of logging. If string: it must be one of\n", " [\"visdom\", \"tensorboardx\"]\n", " If callable: it must be a logging handler class\n", " logging_kwargs : dict\n", " dictionary containing all logging keyword arguments\n", " fold : int\n", " current cross validation fold (0 per default)\n", " callbacks : list\n", " initial callbacks to register\n", " start_epoch : int\n", " epoch to start training at\n", " metric_keys : dict\n", " dict specifying which batch_dict entry to use for which metric as\n", " target; default: None, which will result in key \"label\" for all\n", " metrics\n", " convert_batch_to_npy_fn : type, optional\n", " function converting a batch-tensor to numpy, per default this is\n", " a function, which detaches the tensor, moves it to cpu and the\n", " calls ``.numpy()`` on it\n", " mixed_precision : bool\n", " whether to use mixed precision or not (False per default)\n", " mixed_precision_kwargs : dict\n", " additional keyword arguments for mixed precision\n", " val_freq : int\n", " validation frequency specifying how often to validate the trained\n", " model (a value of 1 denotes validating every epoch,\n", " a value of 2 denotes validating every second epoch etc.);\n", " defaults to 1\n", " **kwargs :\n", " additional keyword arguments\n", "\n", " \"\"\"\n", "\n", " if len(gpu_ids) > 1:\n", " # only use first GPU due to\n", " # https://github.com/pytorch/pytorch/issues/15421\n", " gpu_ids = [gpu_ids[0]]\n", " logging.warning(\"Multiple GPUs specified. Torch JIT currently \"\n", " \"supports only single-GPU training. \"\n", " \"Switching to use only the first GPU for now...\")\n", "\n", " super().__init__(network=network, save_path=save_path,\n", " key_mapping=key_mapping, losses=losses,\n", " optimizer_cls=optimizer_cls,\n", " optimizer_params=optimizer_params,\n", " train_metrics=train_metrics,\n", " val_metrics=val_metrics,\n", " lr_scheduler_cls=lr_scheduler_cls,\n", " lr_scheduler_params=lr_scheduler_params,\n", " gpu_ids=gpu_ids, save_freq=save_freq,\n", " optim_fn=optim_fn, logging_type=logging_type,\n", " logging_kwargs=logging_kwargs, fold=fold,\n", " callbacks=callbacks,\n", " start_epoch=start_epoch, metric_keys=metric_keys,\n", " convert_batch_to_npy_fn=convert_batch_to_npy_fn,\n", " mixed_precision=False, mixed_precision_kwargs={},\n", " criterions=criterions, val_freq=val_freq, **kwargs\n", " )\n", "\n", " def try_resume_training(self):\n", " \"\"\"\n", " Load the latest state of a previous training if possible\n", "\n", " \"\"\"\n", " # Load latest epoch file if available\n", " if os.path.isdir(self.save_path):\n", " # check all files in directory starting with \"checkpoint\" and\n", " # not ending with \"_best.pth\"\n", " files = [x for x in os.listdir(self.save_path)\n", " if os.path.isfile(os.path.join(self.save_path, x))\n", " and x.startswith(\"checkpoint\")\n", " and not x.endswith(\"_best.ptj\")\n", " ]\n", "\n", " # if list is not empty: load previous state\n", " if files:\n", "\n", " latest_epoch = max([\n", " int(x.rsplit(\"_\", 1)[-1].rsplit(\".\", 1)[0])\n", " for x in files])\n", "\n", " latest_state_path = os.path.join(self.save_path,\n", " \"checkpoint_epoch_%d.ptj\"\n", " % latest_epoch)\n", "\n", " # if pth file does not exist, load pt file instead\n", " if not os.path.isfile(latest_state_path):\n", " latest_state_path = latest_state_path[:-1]\n", "\n", " logger.info(\"Attempting to load state from previous \\\n", " training from %s\" % latest_state_path)\n", " try:\n", " self.update_state(latest_state_path)\n", " except KeyError:\n", " logger.warning(\"Previous State could not be loaded, \\\n", " although it exists.Training will be \\\n", " restarted\")\n", "\n", " def save_state(self, file_name, epoch, **kwargs):\n", " \"\"\"\n", " saves the current state via\n", " :func:`delira.io.torch.save_checkpoint_jit`\n", "\n", " Parameters\n", " ----------\n", " file_name : str\n", " filename to save the state to\n", " epoch : int\n", " current epoch (will be saved for mapping back)\n", " **kwargs :\n", " keyword arguments\n", "\n", " \"\"\"\n", " if file_name.endswith(\".pt\") or file_name.endswith(\".pth\"):\n", " file_name = file_name.rsplit(\".\", 1)[0]\n", "\n", " save_checkpoint_torchscript(file_name, self.module, self.optimizers,\n", " **kwargs)\n", "\n", " @staticmethod\n", " def load_state(file_name, **kwargs):\n", " \"\"\"\n", " Loads the new state from file via\n", " :func:`delira.io.torch.load_checkpoint:jit`\n", "\n", " Parameters\n", " ----------\n", " file_name : str\n", " the file to load the state from\n", " **kwargs : keyword arguments\n", "\n", " Returns\n", " -------\n", " dict\n", " new state\n", "\n", " \"\"\"\n", " return load_checkpoint_torchscript(file_name, **kwargs)\n", "\n", " def _update_state(self, new_state):\n", " \"\"\"\n", " Update the state from a given new state\n", "\n", " Parameters\n", " ----------\n", " new_state : dict\n", " new state to update internal state from\n", "\n", " Returns\n", " -------\n", " :class:`PyTorchNetworkJITTrainer`\n", " the trainer with a modified state\n", "\n", " \"\"\"\n", " if \"model\" in new_state:\n", " self.module = new_state.pop(\"model\").to(self.input_device)\n", "\n", " return super()._update_state(new_state)\n", " \n", "```\n", "\n", "## Wrapping it all in an Experiment\n", "To have access to methods like a K-Fold (and the not yet finished) hyperparameter tuning, we need to wrap the trainer in an Experiment. We will use the same approach as we did for implementing the trainer: Extending an already provided class.\n", "\n", "This time we extend the `PyTorchExperiment` which itself extends the `BaseExperiment` by some backend-specific defaults, types and seeds.\n", "\n", "Our whole class definition just changes the default arguments of the `PyTorchExperiment` and thus, we only have to implenent it's `__init__`:\n", "\n", "```python\n", "\n", "class TorchScriptExperiment(PyTorchExperiment):\n", " def __init__(self,\n", " params: typing.Union[str, Parameters],\n", " model_cls: AbstractTorchScriptNetwork, # not AbstractPyTorchNetwork anymore\n", " n_epochs=None,\n", " name=None,\n", " save_path=None,\n", " key_mapping=None,\n", " val_score_key=None,\n", " optim_builder=create_optims_default_pytorch,\n", " checkpoint_freq=1,\n", " trainer_cls=TorchScriptNetworkTrainer, # not PyTorchNetworkTrainer anymore\n", " **kwargs):\n", " \"\"\"\n", "\n", " Parameters\n", " ----------\n", " params : :class:`Parameters` or str\n", " the training parameters, if string is passed,\n", " it is treated as a path to a pickle file, where the\n", " parameters are loaded from\n", " model_cls : Subclass of :class:`AbstractTorchScriptNetwork`\n", " the class implementing the model to train\n", " n_epochs : int or None\n", " the number of epochs to train, if None: can be specified later\n", " during actual training\n", " name : str or None\n", " the Experiment's name\n", " save_path : str or None\n", " the path to save the results and checkpoints to.\n", " if None: Current working directory will be used\n", " key_mapping : dict\n", " mapping between data_dict and model inputs (necessary for\n", " prediction with :class:`Predictor`-API), if no keymapping is\n", " given, a default key_mapping of {\"x\": \"data\"} will be used here\n", " val_score_key : str or None\n", " key defining which metric to use for validation (determining\n", " best model and scheduling lr); if None: No validation-based\n", " operations will be done (model might still get validated,\n", " but validation metrics can only be logged and not used further)\n", " optim_builder : function\n", " Function returning a dict of backend-specific optimizers.\n", " defaults to :func:`create_optims_default_pytorch`\n", " checkpoint_freq : int\n", " frequency of saving checkpoints (1 denotes saving every epoch,\n", " 2 denotes saving every second epoch etc.); default: 1\n", " trainer_cls : subclass of :class:`TorchScriptNetworkTrainer`\n", " the trainer class to use for training the model, defaults to\n", " :class:`TorchScriptNetworkTrainer`\n", " **kwargs :\n", " additional keyword arguments\n", "\n", " \"\"\"\n", " super().__init__(params=params, model_cls=model_cls,\n", " n_epochs=n_epochs, name=name, save_path=save_path,\n", " key_mapping=key_mapping,\n", " val_score_key=val_score_key,\n", " optim_builder=optim_builder,\n", " checkpoint_freq=checkpoint_freq,\n", " trainer_cls=trainer_cls,\n", " **kwargs)\n", " \n", "```\n", "\n", "## Testing it\n", "Now that we finished the implementation of the backend (which is the outermost wrapper; Congratulations!), we can just test it. We'll use a very simple network and test it with dummy data. We also only test the `run` and `test` functionality of our experiment, since everything else is just used for setting up the internal state or a composition of these two methods and already tested:\n", "Now, let's just define our dataset, instantiate it three times (for training, validation and testing) and wrap each of them into a `DataManager`:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from delira.data_loading import AbstractDataset\n", "from delira.data_loading import DataManager\n", "\n", "\n", "class DummyDataset(AbstractDataset):\n", " def __init__(self, length):\n", " super().__init__(None, None)\n", " self.length = length\n", "\n", " def __getitem__(self, index):\n", " return {\"data\": np.random.rand(32),\n", " \"label\": np.random.randint(0, 1, 1)}\n", "\n", " def __len__(self):\n", " return self.length\n", "\n", " def get_sample_from_index(self, index):\n", " return self.__getitem__(index)\n", " \n", "dset_train = DummyDataset(500)\n", "dset_val = DummyDataset(50)\n", "dset_test = DummyDataset(10)\n", "\n", "# training, validation and testing with \n", "#a batchsize of 16, 1 loading thread and no transformations.\n", "dmgr_train = DataManager(dset_train, 16, 1, None)\n", "dmgr_val = DataManager(dset_val, 16, 1, None)\n", "dmgr_test = DataManager(dset_test, 16, 1, None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, that we have created three datasets, we need to define our small dummy network. We do this by subclassing `delira.models.AbstractTorchScriptNetwork` (which is the exactly implementation given above, be we need to use the internal one, because there are some typechecks against this one)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from delira.models import AbstractTorchScriptNetwork\n", "import torch\n", "\n", "\n", "class DummyNetworkTorchScript(AbstractTorchScriptNetwork):\n", " __constants__ = [\"module\"]\n", "\n", " def __init__(self):\n", " super().__init__()\n", " self.module = self._build_model(32, 1)\n", "\n", " @torch.jit.script_method\n", " def forward(self, x):\n", " return {\"pred\": self.module(x)}\n", "\n", " @staticmethod\n", " def prepare_batch(batch_dict, input_device, output_device):\n", " return {\"data\": torch.from_numpy(batch_dict[\"data\"]\n", " ).to(input_device,\n", " torch.float),\n", " \"label\": torch.from_numpy(batch_dict[\"label\"]\n", " ).to(output_device,\n", " torch.float)}\n", "\n", " @staticmethod\n", " def closure(model: AbstractTorchScriptNetwork, data_dict: dict,\n", " optimizers: dict, losses={}, metrics={},\n", " fold=0, **kwargs):\n", " \"\"\"\n", " closure method to do a single backpropagation step\n", "\n", "\n", " Parameters\n", " ----------\n", " model : \n", " trainable model\n", " data_dict : dict\n", " dictionary containing the data\n", " optimizers : dict\n", " dictionary of optimizers to optimize model's parameters\n", " losses : dict\n", " dict holding the losses to calculate errors\n", " (gradients from different losses will be accumulated)\n", " metrics : dict\n", " dict holding the metrics to calculate\n", " fold : int\n", " Current Fold in Crossvalidation (default: 0)\n", " **kwargs:\n", " additional keyword arguments\n", "\n", " Returns\n", " -------\n", " dict\n", " Metric values (with same keys as input dict metrics)\n", " dict\n", " Loss values (with same keys as input dict losses)\n", " list\n", " Arbitrary number of predictions as torch.Tensor\n", "\n", " Raises\n", " ------\n", " AssertionError\n", " if optimizers or losses are empty or the optimizers are not\n", " specified\n", "\n", " \"\"\"\n", "\n", " assert (optimizers and losses) or not optimizers, \\\n", " \"Criterion dict cannot be emtpy, if optimizers are passed\"\n", "\n", " loss_vals = {}\n", " metric_vals = {}\n", " total_loss = 0\n", "\n", " # choose suitable context manager:\n", " if optimizers:\n", " context_man = torch.enable_grad\n", "\n", " else:\n", " context_man = torch.no_grad\n", "\n", " with context_man():\n", "\n", " inputs = data_dict.pop(\"data\")\n", " preds = model(inputs)\n", "\n", " if data_dict:\n", "\n", " for key, crit_fn in losses.items():\n", " _loss_val = crit_fn(preds[\"pred\"], *data_dict.values())\n", " loss_vals[key] = _loss_val.item()\n", " total_loss += _loss_val\n", "\n", " with torch.no_grad():\n", " for key, metric_fn in metrics.items():\n", " metric_vals[key] = metric_fn(\n", " preds[\"pred\"], *data_dict.values()).item()\n", "\n", " if optimizers:\n", " optimizers['default'].zero_grad()\n", " # perform loss scaling via apex if half precision is enabled\n", " with optimizers[\"default\"].scale_loss(total_loss) as scaled_loss:\n", " scaled_loss.backward()\n", " optimizers['default'].step()\n", "\n", " else:\n", "\n", " # add prefix \"val\" in validation mode\n", " eval_loss_vals, eval_metrics_vals = {}, {}\n", " for key in loss_vals.keys():\n", " eval_loss_vals[\"val_\" + str(key)] = loss_vals[key]\n", "\n", " for key in metric_vals:\n", " eval_metrics_vals[\"val_\" + str(key)] = metric_vals[key]\n", "\n", " loss_vals = eval_loss_vals\n", " metric_vals = eval_metrics_vals\n", "\n", " return metric_vals, loss_vals, {k: v.detach()\n", " for k, v in preds.items()}\n", "\n", " @staticmethod\n", " def _build_model(in_channels, n_outputs):\n", " return torch.nn.Sequential(\n", " torch.nn.Linear(in_channels, 64),\n", " torch.nn.ReLU(),\n", " torch.nn.Linear(64, n_outputs)\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, that we defined our model, let's just test, if we really can forward some tensors through it. We will just use some random `torch.Tensors` (created by `torch.rand`). Since our model accepts 1d inputs of length 32, we need to pass 2d tensors to it (the additional dimension is the batch-dimension)." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'single': tensor([[-0.1934]], grad_fn=),\n", " 'batched': tensor([[-0.0525],\n", " [-0.0884],\n", " [-0.1492],\n", " [-0.0431]], grad_fn=)}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_tensor_single = torch.rand(1, 32) # use a single-sample batch (batchsize=1) here\n", "input_tensor_batched = torch.rand(4, 32) # use a batch with batchsize 4 here\n", "\n", "# create model instance\n", "model = DummyNetworkTorchScript()\n", "\n", "outputs = {\"single\": model(input_tensor_single)[\"pred\"], \"batched\": model(input_tensor_batched)[\"pred\"]}\n", "outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_absolute_error\n", "from delira.training.callbacks import ReduceLROnPlateauCallbackPyTorch\n", "from delira.training import Parameters\n", "params = Parameters(fixed_params={\n", " \"model\": {},\n", " \"training\": {\n", " \"losses\": {\"CE\": torch.nn.BCEWithLogitsLoss()},\n", " \"optimizer_cls\": torch.optim.Adam,\n", " \"optimizer_params\": {\"lr\": 1e-3},\n", " \"num_epochs\": 2,\n", " \"val_metrics\": {\"mae\": mean_absolute_error},\n", " \"lr_sched_cls\": ReduceLROnPlateauCallbackPyTorch,\n", " \"lr_sched_params\": {\"mode\": \"min\"}\n", " }\n", " }\n", " )\n", "\n", "from delira.training import TorchScriptExperiment\n", "\n", "exp = TorchScriptExperiment(params, DummyNetworkTorchScript,\n", " key_mapping={\"x\": \"data\"},\n", " val_score_key=\"mae\",\n", " val_score_mode=\"min\")\n", "\n", "trained_model = exp.run(dmgr_train, dmgr_val)\n", "exp.test(trained_model, dmgr_test, params.nested_get(\"val_metrics\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Congratulations. You have implemented your first fully-workable `delira`-Backend. Wasn't that hard, was it?\n", "\n", "Before you start implementing backends for all the other frameworks out there, let me just give you some advices:\n", "\n", "* You should test everything you implement or extend\n", "\n", "* Make sure, to keep your backend-specification in mind\n", "\n", "* Always follow the API of already existing backends. If this is not possible: test this extensively\n", "\n", "* If you extend another backend (like we did here; we extended the `PyTorch`-backend for `TorchScript`), make sure, that the \"base-backend\" is always installed (best if they can only be installed together)\n", "\n", "* If you have questions regarding the implementation, don't hestiate to contact us." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/gan_pytorch.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Generative Adversarial Nets with Delira - A very short introduction\n", "*Author: Justus Schock* \n", "\n", "*Date: 04.12.2018*\n", "\n", "This Example shows how to set up a basic GAN PyTorch experiment and\n", "Visdom Logging Environment.\n", "\n", "## HyperParameters\n", "Let's first setup the essential hyperparameters. We will use `delira`'s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "logger = None\n", "import torch\n", "from delira.training import Parameters\n", "params = Parameters(fixed_params={\n", " \"model\": {\n", " \"n_channels\": 1, \n", " \"noise_length\": 10\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": torch.optim.Adam, # optimization algorithm to use\n", " \"optimizer_params\": {'lr': 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"L1\": torch.nn.L1Loss()}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we specified `torch.nn.L1Loss` as criterion and `torch.nn.MSELoss` as metric, they will be both calculated for each batch, but only the criterion will be used for backpropagation. Since we have a simple generative task, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Visdom`. To start a visdom server you need to execute the following command inside an environment which has visdom installed: \n", "```shell\n", "visdom -port=9999\n", "```\n", "This will start a visdom server on port 9999 of your machine and now we can start to configure our logging environment. To view your results you can open [http://localhost:9999](http://localhost:9999) in your browser." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from trixi.logger import PytorchVisdomLogger\n", "from delira.logging import TrixiHandler\n", "import logging\n", "\n", "logger_kwargs = {\n", " 'name': 'GANExampleLogger', # name of our logging environment\n", " 'port': 9999 # port on which our visdom server is alive\n", "}\n", "\n", "logger_cls = PytorchVisdomLogger\n", "\n", "# configure logging module (and root logger)\n", "logging.basicConfig(level=logging.INFO,\n", " handlers=[TrixiHandler(logger_cls, **logger_kwargs)])\n", "\n", "\n", "# derive logger from root logger\n", "# (don't do `logger = logging.Logger(\"...\")` since this will create a new\n", "# logger which is unrelated to the root logger\n", "logger = logging.getLogger(\"Test Logger\")\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since a single visdom server can run multiple environments, we need to specify a (unique) name for our environment and need to tell the logger, on which port it can find the visdom server.\n", "\n", "## Data Preparation\n", "### Loading\n", "Next we will create a small train and validation set (based on `torchvision` MNIST):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import TorchvisionClassificationDataset\n", "\n", "dataset_train = TorchvisionClassificationDataset(\"mnist\", # which dataset to use\n", " train=True, # use trainset\n", " img_shape=(224, 224) # resample to 224 x 224 pixels\n", " )\n", "dataset_val = TorchvisionClassificationDataset(\"mnist\", \n", " train=False,\n", " img_shape=(224, 224)\n", " )" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from batchgenerators.transforms import RandomCropTransform, \\\n", " ContrastAugmentationTransform, Compose\n", "from batchgenerators.transforms.spatial_transforms import ResizeTransform\n", "from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n", "\n", "transforms = Compose([\n", " RandomCropTransform(200), # Perform Random Crops of Size 200 x 200 pixels\n", " ResizeTransform(224), # Resample these crops back to 224 x 224 pixels\n", " ContrastAugmentationTransform(), # randomly adjust contrast\n", " MeanStdNormalizationTransform(mean=[0.5], std=[0.5])]) \n", "\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train = DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms=transforms,\n", " sampler_cls=RandomSampler,\n", " n_process_augmentation=4)\n", "\n", "manager_val = DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms=transforms,\n", " sampler_cls=SequentialSampler,\n", " n_process_augmentation=4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Training\n", "\n", "After we have done that, we can finally specify our experiment and run it. We will therfore use the already implemented `GenerativeAdversarialNetworkBasePyTorch` which is basically a vanilla DCGAN:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import PyTorchExperiment\n", "from delira.training.train_utils import create_optims_gan_default_pytorch\n", "from delira.models.gan import GenerativeAdversarialNetworkBasePyTorch\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment = PyTorchExperiment(params, GenerativeAdversarialNetworkBasePyTorch,\n", " name=\"GANExample\",\n", " save_path=\"./tmp/delira_Experiments\",\n", " optim_builder=create_optims_gan_default_pytorch,\n", " gpu_ids=[0])\n", "experiment.save()\n", "\n", "model = experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Congratulations, you have now trained your first Generative Adversarial Model using `delira`." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## See Also\n", "For a more detailed explanation have a look at \n", "* [the introduction tutorial](tutorial_delira.ipynb, \"Introduction\")\n", "* [the 2d segmentation example](segmentation_2d_pytorch.ipynb, \"Segmentation 2D\")\n", "* [the 3d segmentation example](segmentation_3d_pytorch.ipynb, \"Segmentation 3D\")\n", "* [the classification example](classification_pytorch.ipynb, \"GAN\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/segmentation_2d_pytorch.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Segmentation in 2D using U-Nets with Delira - A very short introduction\n", "\n", "*Author: Justus Schock, Alexander Moriz* \n", "\n", "*Date: 17.12.2018*\n", " \n", "This Example shows how use the U-Net implementation in Delira with PyTorch.\n", "\n", "Let's first setup the essential hyperparameters. We will use `delira`'s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "logger = None\n", "import torch\n", "from delira.training import Parameters\n", "params = Parameters(fixed_params={\n", " \"model\": {\n", " \"in_channels\": 1, \n", " \"num_classes\": 4\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": torch.optim.Adam, # optimization algorithm to use\n", " \"optimizer_params\": {'lr': 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"CE\": torch.nn.CrossEntropyLoss()}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `CrossEntropyLoss` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Visdom`. To start a visdom server you need to execute the following command inside an environment which has visdom installed: \n", "```shell\n", "visdom -port=9999\n", "```\n", "This will start a visdom server on port 9999 of your machine and now we can start to configure our logging environment. To view your results you can open [http://localhost:9999](http://localhost:9999) in your browser." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from trixi.logger import PytorchVisdomLogger\n", "from delira.logging import TrixiHandler\n", "import logging\n", "\n", "logger_kwargs = {\n", " 'name': 'ClassificationExampleLogger', # name of our logging environment\n", " 'port': 9999 # port on which our visdom server is alive\n", "}\n", "\n", "logger_cls = PytorchVisdomLogger\n", "\n", "# configure logging module (and root logger)\n", "logging.basicConfig(level=logging.INFO,\n", " handlers=[TrixiHandler(logger_cls, **logger_kwargs)])\n", "\n", "\n", "# derive logger from root logger\n", "# (don't do `logger = logging.Logger(\"...\")` since this will create a new\n", "# logger which is unrelated to the root logger\n", "logger = logging.getLogger(\"Test Logger\")\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since a single visdom server can run multiple environments, we need to specify a (unique) name for our environment and need to tell the logger, on which port it can find the visdom server.\n", "\n", "## Data Praparation\n", "### Loading\n", "Next we will create a small train and validation set (in this case they will be the same to show the overfitting capability of the UNet).\n", "\n", "Our data is a brain MR-image thankfully provided by the [FSL](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki) in their [introduction](http://www.fmrib.ox.ac.uk/primers/intro_primer/ExBox3/IntroBox3.html).\n", "\n", "We first download the data and extract the T1 image and the corresponding segmentation:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from io import BytesIO\n", "from zipfile import ZipFile\n", "from urllib.request import urlopen\n", "\n", "resp = urlopen(\"http://www.fmrib.ox.ac.uk/primers/intro_primer/ExBox3/ExBox3.zip\")\n", "zipfile = ZipFile(BytesIO(resp.read()))\n", "#zipfile_list = zipfile.namelist()\n", "#print(zipfile_list)\n", "img_file = zipfile.extract(\"ExBox3/T1_brain.nii.gz\")\n", "mask_file = zipfile.extract(\"ExBox3/T1_brain_seg.nii.gz\")" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Now, we load the image and the mask (they are both 3D), convert them to a 32-bit floating point numpy array and ensure, they have the same shape (i.e. that for each voxel in the image, there is a voxel in the mask):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import SimpleITK as sitk\n", "import numpy as np\n", "\n", "# load image and mask\n", "img = sitk.GetArrayFromImage(sitk.ReadImage(img_file))\n", "img = img.astype(np.float32)\n", "mask = mask = sitk.GetArrayFromImage(sitk.ReadImage(mask_file))\n", "mask = mask.astype(np.float32)\n", "\n", "assert mask.shape == img.shape\n", "print(img.shape)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "By querying the unique values in the mask, we get the following:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "np.unique(mask)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "This means, there are 4 classes (background and 3 types of tissue) in our sample.\n", "\n", "Since we want to do a 2D segmentation, we extract a single slice out of the image and the mask (we choose slice 100 here) and plot it:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# load single slice\n", "img_slice = img[:, :, 100]\n", "mask_slice = mask[:, :, 100]\n", "\n", "# plot slices\n", "plt.figure(1, figsize=(15,10))\n", "plt.subplot(121)\n", "plt.imshow(img_slice, cmap=\"gray\")\n", "plt.colorbar(fraction=0.046, pad=0.04)\n", "plt.subplot(122)\n", "plt.imshow(mask_slice, cmap=\"gray\")\n", "plt.colorbar(fraction=0.046, pad=0.04)\n", "plt.show()\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "To load the data, we have to use a `Dataset`. The following defines a very simple dataset, accepting an image slice, a mask slice and the number of samples. It always returns the same sample until `num_samples` samples have been returned." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import AbstractDataset\n", "\n", "class CustomDataset(AbstractDataset):\n", " def __init__(self, img, mask, num_samples=1000):\n", " super().__init__(None, None, None, None)\n", " self.data = {\"data\": img.reshape(1, *img.shape), \"label\": mask.reshape(1, *mask.shape)}\n", " self.num_samples = num_samples\n", " \n", " def __getitem__(self, index):\n", " return self.data\n", " \n", " def __len__(self):\n", " return self.num_samples" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Now, we can finally instantiate our datasets:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "dataset_train = CustomDataset(img_slice, mask_slice, num_samples=10000)\n", "dataset_val = CustomDataset(img_slice, mask_slice, num_samples=1)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from batchgenerators.transforms import RandomCropTransform, \\\n", " ContrastAugmentationTransform, Compose\n", "from batchgenerators.transforms.spatial_transforms import ResizeTransform\n", "from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n", "\n", "transforms = Compose([\n", " RandomCropTransform(150, label_key=\"label\"), # Perform Random Crops of Size 150 x 150 pixels\n", " ResizeTransform(224, label_key=\"label\"), # Resample these crops back to 224 x 224 pixels\n", " ContrastAugmentationTransform(), # randomly adjust contrast\n", " MeanStdNormalizationTransform(mean=[img_slice.mean()], std=[img_slice.std()])]) # use concrete values since we only have one sample (have to estimate it over whole dataset otherwise)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train = DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms=transforms,\n", " sampler_cls=RandomSampler,\n", " n_process_augmentation=4)\n", "\n", "manager_val = DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms=transforms,\n", " sampler_cls=SequentialSampler,\n", " n_process_augmentation=4)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Training\n", "\n", "After we have done that, we can finally specify our experiment and run it. We will therfore use the already implemented `UNet2dPytorch`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import PyTorchExperiment\n", "from delira.training.train_utils import create_optims_default_pytorch\n", "from delira.models.segmentation import UNet2dPyTorch\n", "\n", "if logger is not None:\n", " logger.info(\"Init Experiment\")\n", "experiment = PyTorchExperiment(params, UNet2dPyTorch,\n", " name=\"Segmentation2dExample\",\n", " save_path=\"./tmp/delira_Experiments\",\n", " optim_builder=create_optims_default_pytorch,\n", " gpu_ids=[0], mixed_precision=True)\n", "experiment.save()\n", "\n", "model = experiment.run(manager_train, manager_val)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## See Also\n", "For a more detailed explanation have a look at \n", "* [the introduction tutorial](tutorial_delira.ipynb, \"Introduction\")\n", "* [the classification example](classification_pytorch.ipynb, \"Classification\")\n", "* [the 3d segmentation example](segmentation_3d_pytorch.ipynb, \"Segmentation 3D\")\n", "* [the generative adversarial example](gan_pytorch.ipynb, \"GAN\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/segmentation_3d_pytorch.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Segmentation in 3D using U-Nets with Delira - A very short introduction\n", "\n", "*Author: Justus Schock, Alexander Moriz* \n", "\n", "*Date: 17.12.2018*\n", " \n", "This Example shows how use the U-Net implementation in Delira with PyTorch.\n", "\n", "Let's first setup the essential hyperparameters. We will use `delira`'s `Parameters`-class for this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "logger = None\n", "import torch\n", "from delira.training import Parameters\n", "params = Parameters(fixed_params={\n", " \"model\": {\n", " \"in_channels\": 1, \n", " \"num_classes\": 4\n", " },\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 10, # number of epochs to train\n", " \"optimizer_cls\": torch.optim.Adam, # optimization algorithm to use\n", " \"optimizer_params\": {'lr': 1e-3}, # initialization parameters for this algorithm\n", " \"losses\": {\"CE\": torch.nn.CrossEntropyLoss()}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) " ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since we did not specify any metric, only the `CrossEntropyLoss` will be calculated for each batch. Since we have a classification task, this should be sufficient. We will train our network with a batchsize of 64 by using `Adam` as optimizer of choice.\n", "\n", "## Logging and Visualization\n", "To get a visualization of our results, we should monitor them somehow. For logging we will use `Visdom`. To start a visdom server you need to execute the following command inside an environment which has visdom installed: \n", "```shell\n", "visdom -port=9999\n", "```\n", "This will start a visdom server on port 9999 of your machine and now we can start to configure our logging environment. To view your results you can open [http://localhost:9999](http://localhost:9999) in your browser." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from trixi.logger import PytorchVisdomLogger\n", "from delira.logging import TrixiHandler\n", "import logging\n", "\n", "logger_kwargs = {\n", " 'name': 'ClassificationExampleLogger', # name of our logging environment\n", " 'port': 9999 # port on which our visdom server is alive\n", "}\n", "\n", "logger_cls = PytorchVisdomLogger\n", "\n", "# configure logging module (and root logger)\n", "logging.basicConfig(level=logging.INFO,\n", " handlers=[TrixiHandler(logger_cls, **logger_kwargs)])\n", "\n", "\n", "# derive logger from root logger\n", "# (don't do `logger = logging.Logger(\"...\")` since this will create a new\n", "# logger which is unrelated to the root logger\n", "logger = logging.getLogger(\"Test Logger\")\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since a single visdom server can run multiple environments, we need to specify a (unique) name for our environment and need to tell the logger, on which port it can find the visdom server.\n", "\n", "## Data Praparation\n", "### Loading\n", "Next we will create a small train and validation set (in this case they will be the same to show the overfitting capability of the UNet).\n", "\n", "Our data is a brain MR-image thankfully provided by the [FSL](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki) in their [introduction](http://www.fmrib.ox.ac.uk/primers/intro_primer/ExBox3/IntroBox3.html).\n", "\n", "We first download the data and extract the T1 image and the corresponding segmentation:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from io import BytesIO\n", "from zipfile import ZipFile\n", "from urllib.request import urlopen\n", "\n", "resp = urlopen(\"http://www.fmrib.ox.ac.uk/primers/intro_primer/ExBox3/ExBox3.zip\")\n", "zipfile = ZipFile(BytesIO(resp.read()))\n", "#zipfile_list = zipfile.namelist()\n", "#print(zipfile_list)\n", "img_file = zipfile.extract(\"ExBox3/T1_brain.nii.gz\")\n", "mask_file = zipfile.extract(\"ExBox3/T1_brain_seg.nii.gz\")" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Now, we load the image and the mask (they are both 3D), convert them to a 32-bit floating point numpy array and ensure, they have the same shape (i.e. that for each voxel in the image, there is a voxel in the mask):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import SimpleITK as sitk\n", "import numpy as np\n", "\n", "# load image and mask\n", "img = sitk.GetArrayFromImage(sitk.ReadImage(img_file))\n", "img = img.astype(np.float32)\n", "mask = mask = sitk.GetArrayFromImage(sitk.ReadImage(mask_file))\n", "mask = mask.astype(np.float32)\n", "\n", "assert mask.shape == img.shape\n", "print(img.shape)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "By querying the unique values in the mask, we get the following:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "np.unique(mask)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "This means, there are 4 classes (background and 3 types of tissue) in our sample.\n", "\n", "To load the data, we have to use a `Dataset`. The following defines a very simple dataset, accepting an image slice, a mask slice and the number of samples. It always returns the same sample until `num_samples` samples have been returned." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import AbstractDataset\n", "\n", "class CustomDataset(AbstractDataset):\n", " def __init__(self, img, mask, num_samples=1000):\n", " super().__init__(None, None, None, None)\n", " self.data = {\"data\": img.reshape(1, *img.shape), \"label\": mask.reshape(1, *mask.shape)}\n", " self.num_samples = num_samples\n", " \n", " def __getitem__(self, index):\n", " return self.data\n", " \n", " def __len__(self):\n", " return self.num_samples" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Now, we can finally instantiate our datasets:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "dataset_train = CustomDataset(img, mask, num_samples=10000)\n", "dataset_val = CustomDataset(img, mask, num_samples=1)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Augmentation\n", "For Data-Augmentation we will apply a few transformations:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from batchgenerators.transforms import ContrastAugmentationTransform, Compose\n", "from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n", "\n", "transforms = Compose([\n", " ContrastAugmentationTransform(), # randomly adjust contrast\n", " MeanStdNormalizationTransform(mean=[img.mean()], std=[img.std()])]) # use concrete values since we only have one sample (have to estimate it over whole dataset otherwise)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "With these transformations we can now wrap our datasets into datamanagers:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import DataManager, SequentialSampler, RandomSampler\n", "\n", "manager_train = DataManager(dataset_train, params.nested_get(\"batch_size\"),\n", " transforms=transforms,\n", " sampler_cls=RandomSampler,\n", " n_process_augmentation=4)\n", "\n", "manager_val = DataManager(dataset_val, params.nested_get(\"batch_size\"),\n", " transforms=transforms,\n", " sampler_cls=SequentialSampler,\n", " n_process_augmentation=4)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Training\n", "\n", "After we have done that, we can finally specify our experiment and run it. We will therfore use the already implemented `UNet3dPytorch`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\", UserWarning) # ignore UserWarnings raised by dependency code\n", "warnings.simplefilter(\"ignore\", FutureWarning) # ignore FutureWarnings raised by dependency code\n", "\n", "\n", "from delira.training import PyTorchExperiment\n", "from delira.training.train_utils import create_optims_default_pytorch\n", "from delira.models.segmentation import UNet3dPyTorch\n", "\n", "if logger:\n", " logger.info(\"Init Experiment\")\n", "experiment = PyTorchExperiment(params, UNet3dPyTorch,\n", " name=\"Segmentation3dExample\",\n", " save_path=\"./tmp/delira_Experiments\",\n", " optim_builder=create_optims_default_pytorch,\n", " gpu_ids=[0], mixed_precision=True)\n", "experiment.save()\n", "\n", "model = experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## See Also\n", "For a more detailed explanation have a look at \n", "* [the introduction tutorial](tutorial_delira.ipynb, \"Introduction\")\n", "* [the classification example](classification_pytorch.ipynb, \"Classification\")\n", "* [the 2d segmentation example](segmentation_2d_pytorch.ipynb, \"Segmentation 2D\")\n", "* [the generative adversarial example](gan_pytorch.ipynb, \"GAN\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/tutorial_delira.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "# Delira Introduction\n", "\n", "*Last updated: 09.05.2019*\n", "\n", "Authors: Justus Schock, Christoph Haarburger" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Loading Data\n", "\n", "To train your network you first need to load your training data (and probably also your validation data). This chapter will therefore deal with `delira`'s capabilities to load your data (and apply some augmentation). \n", "\n", "### The Dataset\n", "There are mainly two ways to load your data: Lazy or non-lazy. Loading in a lazy way means that you load the data just in time and keep the used memory to a bare minimum. This has, however, the disadvantage that your loading function could be a bottleneck since all postponed operations may have to wait until the needed data samples are loaded. In a no-lazy way, one would preload all data to RAM before starting any other operations. This has the advantage that there cannot be a loading bottleneck during latter operations. This advantage comes at cost of a higher memory usage and a (possibly) huge latency at the beginning of each experiment. Both ways to load your data are implemented in `delira` and they are named `BaseLazyDataset`and `BaseCacheDataset`. In the following steps you will only see the `BaseLazyDataset` since exchanging them is trivial. All Datasets (including the ones you might want to create yourself later) must be derived of `delira.data_loading.AbstractDataset` to ensure a minimum common API.\n", "\n", "The dataset's `__init__` has the following signature:\n", "\n", "```python\n", "def __init__(self, data_path, load_fn, **load_kwargs):\n", "```\n", "\n", "This means, you have to pass the path to the directory containing your data (`data_path`), a function to load a single sample of your data (`load_fn`). To get a single sample of your dataset after creating it, you can index it like this: `dataset[0]`.\n", "Additionally you can iterate over your dataset just like over any other `python` iterator via\n", "\n", "```python\n", "for sample in dataset:\n", " # do your stuff here\n", "```\n", "\n", "or enumerate it via\n", "\n", "```python\n", "for idx, sample in enumerate(dataset):\n", " # do your stuff here\n", "```\n", ".\n", "\n", "The missing argument `**load_kwargs` accepts an arbitrary amount of additional keyword arguments which are directly passed to your loading function.\n", "\n", "An example of how loading your data may look like is given below:\n", "```python\n", "from delira.data_loading import BaseLazyDataset, default_load_fn_2d\n", "dataset_train = BaseLazyDataset(\"/images/datasets/external/mnist/train\",\n", " default_load_fn_2d, img_shape=(224, 224))\n", "```\n", "\n", "In this case all data lying in `/images/datasets/external/mnist/train` is loaded by `default_load_fn_2d`. The files containing the data must be PNG-files, while the groundtruth is defined in TXT-files. The `default_load_fn_2d` needs the additional argument `img_shape` which is passed as keyword argument via `**load_kwargs`.\n", "\n", "> **Note:** for reproducability we decided to use some wrapped PyTorch datasets for this introduction. \n", "\n", "Now, let's just initialize our trainset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import TorchvisionClassificationDataset\n", "dataset_train = TorchvisionClassificationDataset(\"mnist\", train=True,\n", " img_shape=(224, 224))" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Getting a single sample of your dataset with dataset_train[0] will produce:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "dataset_train[0]" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "which means, that our data is stored in a dictionary containing the keys `data` and `label`, each of them holding the corresponding numpy arrays. The dataloading works on `numpy` purely and is thus backend agnostic. It does not matter in which format or with which library you load/preprocess your data, but at the end it must be converted to numpy arrays\n", "For validation purposes another dataset could be created with the test data like this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "dataset_val = TorchvisionClassificationDataset(\"mnist\", train=False,\n", " img_shape=(224, 224))" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### The Dataloader\n", "The Dataloader wraps your dataset to privode the ability to load whole batches with an abstract interface. To create a dataloader, one would have to pass the following arguments to it's `__init__`: the previously created `dataset`.Additionally, it is possible to pass the `batch_size` defining the number of samples per batch, the total number of batches (`num_batches`), which will be the number of samples in your dataset devided by the batchsize per default, a random `seed`for always getting the same behaviour of random number generators and a [`sampler`]() defining your sampling strategy. This would create a dataloader for your `dataset_train`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import DataLoader\n", "\n", "batch_size = 32\n", "\n", "loader_train = DataLoader(dataset_train, batch_size)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Since the batch_size has been set to 32, the loader will load 32 samples as one batch.\n", "\n", "Even though it would be possible to train your network with an instance of `DataLoader`, `malira` also offers a different approach that covers multithreaded data loading and augmentation:" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### The Datamanager\n", "\n", "The data manager is implemented as `delira.data_loading.DataManager` and wraps a `DataLoader`. It also encapsulates augmentations. Having a view on the `DataManager`'s signature, it becomes obvious that it accepts the same arguments as the [`DataLoader`](#The-Dataloader). You can either pass a `dataset` or a combination of path, dataset class and load function. Additionally, you can pass a custom dataloder class if necessary and a sampler class to choose a sampling algorithm. \n", "\n", "The parameter `transforms` accepts augmentation transformations as implemented in `batchgenerators`. Augmentation is applied on the fly using `n_process_augmentation` threads.\n", "\n", "All in all the DataManager is the recommended way to generate batches from your dataset.\n", "\n", "The following example shows how to create a data manager instance:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.data_loading import DataManager\n", "from batchgenerators.transforms.abstract_transforms import Compose\n", "from batchgenerators.transforms.sample_normalization_transforms import MeanStdNormalizationTransform\n", "\n", "batchsize = 64\n", "transforms = Compose([MeanStdNormalizationTransform(mean=1*[0], std=1*[1])])\n", "\n", "data_manager_train = DataManager(dataset_train, # dataset to use\n", " batchsize, # batchsize\n", " n_process_augmentation=1, # number of augmentation processes\n", " transforms=transforms) # augmentation transforms\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "The approach to initialize a DataManager from a datapath takes more arguments since, in opposite to initializaton from dataset, it needs all the arguments which are necessary to internally create a dataset.\n", "\n", "Since we want to validate our model we have to create a second manager containing our `dataset_val`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "data_manager_val = DataManager(dataset_val, \n", " batchsize, \n", " n_process_augmentation=1, \n", " transforms=transforms)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "\n", "That's it - we just finished loading our data!\n", "\n", "Iterating over a DataManager is possible in simple loops:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from tqdm.auto import tqdm # utility for progress bars\n", "\n", "# create actual batch generator from DataManager\n", "batchgen = data_manager_val.get_batchgen()\n", "\n", "for data in tqdm(batchgen):\n", " pass # here you can access the data of the current batch" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Sampler\n", "In previous section samplers have been already mentioned but not yet explained. A sampler implements an algorithm how a batch should be assembled from single samples in a dataset. `delira` provides the following sampler classes in it's subpackage `delira.data_loading.sampler`:\n", "\n", "* `AbstractSampler`\n", "* `SequentialSampler`\n", "* `PrevalenceSequentialSampler`\n", "* `RandomSampler`\n", "* `PrevalenceRandomSampler`\n", "* `WeightedRandomSampler`\n", "* `LambdaSampler`\n", "\n", "The `AbstractSampler` implements no sampling algorithm but defines a sampling API and thus all custom samplers must inherit from this class. The `Sequential` sampler builds batches by just iterating over the samples' indices in a sequential way. Following this, the `RandomSampler` builds batches by randomly drawing the samples' indices with replacement. \n", "If the class each sample belongs to is known for each sample at the beginning, the `PrevalenceSequentialSampler` and the `PrevalenceRandomSampler` perform a per-class sequential or random sampling and building each batch with the exactly same number of samples from each class. \n", "The `WeightedRandomSampler`accepts custom weights to give specific samples a higher probability during random sampling than others.\n", "\n", "The `LambdaSampler` is a wrapper for a custom sampling function, which can be passed to the wrapper during it's initialization, to ensure API conformity.\n", "\n", "It can be passed to the DataLoader or DataManager as class (argument `sampler_cls`) or as instance (argument `sampler`)." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Models\n", "\n", "Since the purpose of this framework is to use machine learning algorithms, there has to be a way to define them. Defining models is straight forward. `delira` provides a class `delira.models.AbstractNetwork`. *All models must inherit from this class*.\n", "\n", "To inherit this class four functions must be implemented in the subclass:\n", "\n", "* `__init__`\n", "* `closure`\n", "* `prepare_batch`\n", "* `__call__`\n", "\n", "\n", "### `__init__`\n", "The `__init__`function is a classes constructor. In our case it builds the entire model (maybe using some helper functions). If writing your own custom model, you have to override this method.\n", "\n", "> **Note:** If you want the best experience for saving your model and completely recreating it during the loading process you need to take care of a few things:\n", "> * if using `torchvision.models` to build your model, always import it with `from torchvision import models as t_models`\n", "> * register all arguments in your custom `__init__` in the abstract class. A init_prototype could look like this:\n", ">\n", "```python\n", "def __init__(self, in_channels: int, n_outputs: int, **kwargs):\n", " \"\"\"\n", "\n", " Parameters\n", " ----------\n", " in_channels: int\n", " number of input_channels\n", " n_outputs: int\n", " number of outputs (usually same as number of classes)\n", " \"\"\"\n", " # register params by passing them as kwargs to parent class __init__\n", " # only params registered like this will be saved!\n", " super().__init__(in_channels=in_channels,\n", " n_outputs=n_outputs,\n", " **kwargs)\n", "```\n", "\n", "\n", "### `closure`\n", "The `closure`function defines one batch iteration to train the network. This function is needed for the framework to provide a generic trainer function which works with all kind of networks and loss functions.\n", "\n", "The closure function must implement all steps from forwarding, over loss calculation, metric calculation, logging (for which `delira.logging_handlers` provides some extensions for pythons logging module), and the actual backpropagation.\n", "\n", "It is called with an empty optimizer-dict to evaluate and should thus work with optional optimizers.\n", "\n", "### `prepare_batch`\n", "The `prepare_batch`function defines the transformation from loaded data to match the networks input and output shape and pushes everything to the right device.\n", "\n", "\n", "## Abstract Networks for specific Backends\n", "### PyTorch\n", "At the time of writing, PyTorch is the only backend which is supported, but other backends are planned.\n", "In PyTorch every network should be implemented as a subclass of `torch.nn.Module`, which also provides a `__call__` method.\n", "\n", "This results in sloghtly different requirements for PyTorch networks: instead of implementing a `__call__` method, we simply call the `torch.nn.Module.__call__` and therefore have to implement the `forward` method, which defines the module's behaviour and is internally called by `torch.nn.Module.__call__` (among other stuff). To give a default behaviour suiting most cases and not have to care about internals, `delira` provides the `AbstractPyTorchNetwork` which is a more specific case of the `AbstractNetwork` for PyTorch modules.\n", "\n", "#### `forward`\n", "The `forward` function defines what has to be done to forward your input through your network and must return a dictionary. Assuming your network has three convolutional layers stored in `self.conv1`, `self.conv2` and `self.conv3` and a ReLU stored in `self.relu`, a simple `forward` function could look like this:\n", "\n", "```python\n", "def forward(self, input_batch: torch.Tensor):\n", " out_1 = self.relu(self.conv1(input_batch))\n", " out_2 = self.relu(self.conv2(out_1))\n", " out_3 = self.conv3(out2)\n", " \n", " return {\"pred\": out_3}\n", "```\n", "\n", "#### `prepare_batch`\n", "The default `prepare_batch` function for PyTorch networks looks like this:\n", "\n", "```python\n", " @staticmethod\n", " def prepare_batch(batch: dict, input_device, output_device):\n", " \"\"\"\n", " Helper Function to prepare Network Inputs and Labels (convert them to\n", " correct type and shape and push them to correct devices)\n", "\n", " Parameters\n", " ----------\n", " batch : dict\n", " dictionary containing all the data\n", " input_device : torch.device\n", " device for network inputs\n", " output_device : torch.device\n", " device for network outputs\n", "\n", " Returns\n", " -------\n", " dict\n", " dictionary containing data in correct type and shape and on correct\n", " device\n", "\n", " \"\"\"\n", " return_dict = {\"data\": torch.from_numpy(batch.pop(\"data\")).to(\n", " input_device)}\n", "\n", " for key, vals in batch.items():\n", " return_dict[key] = torch.from_numpy(vals).to(output_device)\n", "\n", " return return_dict\n", "\n", "```\n", "and can be customized by subclassing the `AbstractPyTorchNetwork`.\n", "\n", "#### `closure example`\n", "A simple closure function for a PyTorch module could look like this:\n", "```python\n", " @staticmethod\n", " def closure(model: AbstractPyTorchNetwork, data_dict: dict,\n", " optimizers: dict, criterions={}, metrics={},\n", " fold=0, **kwargs):\n", " \"\"\"\n", " closure method to do a single backpropagation step\n", "\n", " Parameters\n", " ----------\n", " model : :class:`ClassificationNetworkBasePyTorch`\n", " trainable model\n", " data_dict : dict\n", " dictionary containing the data\n", " optimizers : dict\n", " dictionary of optimizers to optimize model's parameters\n", " criterions : dict\n", " dict holding the criterions to calculate errors\n", " (gradients from different criterions will be accumulated)\n", " metrics : dict\n", " dict holding the metrics to calculate\n", " fold : int\n", " Current Fold in Crossvalidation (default: 0)\n", " **kwargs:\n", " additional keyword arguments\n", "\n", " Returns\n", " -------\n", " dict\n", " Metric values (with same keys as input dict metrics)\n", " dict\n", " Loss values (with same keys as input dict criterions)\n", " list\n", " Arbitrary number of predictions as torch.Tensor\n", "\n", " Raises\n", " ------\n", " AssertionError\n", " if optimizers or criterions are empty or the optimizers are not\n", " specified\n", "\n", " \"\"\"\n", "\n", " assert (optimizers and criterions) or not optimizers, \\\n", " \"Criterion dict cannot be emtpy, if optimizers are passed\"\n", "\n", " loss_vals = {}\n", " metric_vals = {}\n", " total_loss = 0\n", "\n", " # choose suitable context manager:\n", " if optimizers:\n", " context_man = torch.enable_grad\n", "\n", " else:\n", " context_man = torch.no_grad\n", "\n", " with context_man():\n", "\n", " inputs = data_dict.pop(\"data\")\n", " # obtain outputs from network\n", " preds = model(inputs)[\"pred\"]\n", "\n", " if data_dict:\n", "\n", " for key, crit_fn in criterions.items():\n", " _loss_val = crit_fn(preds, *data_dict.values())\n", " loss_vals[key] = _loss_val.detach()\n", " total_loss += _loss_val\n", "\n", " with torch.no_grad():\n", " for key, metric_fn in metrics.items():\n", " metric_vals[key] = metric_fn(\n", " preds, *data_dict.values())\n", "\n", " if optimizers:\n", " optimizers['default'].zero_grad()\n", " total_loss.backward()\n", " optimizers['default'].step()\n", "\n", " else:\n", "\n", " # add prefix \"val\" in validation mode\n", " eval_loss_vals, eval_metrics_vals = {}, {}\n", " for key in loss_vals.keys():\n", " eval_loss_vals[\"val_\" + str(key)] = loss_vals[key]\n", "\n", " for key in metric_vals:\n", " eval_metrics_vals[\"val_\" + str(key)] = metric_vals[key]\n", "\n", " loss_vals = eval_loss_vals\n", " metric_vals = eval_metrics_vals\n", "\n", " for key, val in {**metric_vals, **loss_vals}.items():\n", " logging.info({\"value\": {\"value\": val.item(), \"name\": key,\n", " \"env_appendix\": \"_%02d\" % fold\n", " }})\n", "\n", " logging.info({'image_grid': {\"images\": inputs, \"name\": \"input_images\",\n", " \"env_appendix\": \"_%02d\" % fold}})\n", "\n", " return metric_vals, loss_vals, preds\n", "```\n", "\n", "> **Note:** This closure is taken from the `delira.models.classification.ClassificationNetworkBasePyTorch`\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Other examples\n", "In `delira.models` you can find exemplaric implementations of generative adversarial networks, classification and regression approaches or segmentation networks." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Training\n", "\n", "### Parameters\n", "Training-parameters (often called hyperparameters) can be defined in the `delira.training.Parameters` class. \n", "\n", "The class accepts the parameters `batch_size` and `num_epochs` to define the batchsize and the number of epochs to train, the parameters `optimizer_cls` and `optimizer_params` to create an optimizer or training, the parameter `criterions` to specify the training criterions (whose gradients will be accumulated by defaut), the parameters `lr_sched_cls` and `lr_sched_params` to define the learning rate scheduling and the parameter `metrics` to specify evaluation metrics.\n", "\n", "Additionally, it is possible to pass an aritrary number of keyword arguments to the class\n", "\n", "It is good practice to create a `Parameters` object at the beginning and then use it for creating other objects which are needed for training, since you can use the classes attributes and changes in hyperparameters only have to be done once:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "import torch\n", "from delira.training import Parameters\n", "from delira.data_loading import RandomSampler, SequentialSampler\n", "\n", "params = Parameters(fixed_params={\n", " \"model\": {},\n", " \"training\": {\n", " \"batch_size\": 64, # batchsize to use\n", " \"num_epochs\": 2, # number of epochs to train\n", " \"optimizer_cls\": torch.optim.Adam, # optimization algorithm to use\n", " \"optimizer_params\": {'lr': 1e-3}, # initialization parameters for this algorithm\n", " \"criterions\": {\"CE\": torch.nn.CrossEntropyLoss()}, # the loss function\n", " \"lr_sched_cls\": None, # the learning rate scheduling algorithm to use\n", " \"lr_sched_params\": {}, # the corresponding initialization parameters\n", " \"metrics\": {} # and some evaluation metrics\n", " }\n", "}) \n", "\n", "# recreating the data managers with the batchsize of the params object\n", "manager_train = DataManager(dataset_train, params.nested_get(\"batch_size\"), 1,\n", " transforms=None, sampler_cls=RandomSampler,\n", " n_process_loading=4)\n", "manager_val = DataManager(dataset_val, params.nested_get(\"batch_size\"), 3,\n", " transforms=None, sampler_cls=SequentialSampler,\n", " n_process_loading=4)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Trainer\n", "\n", "The `delira.training.NetworkTrainer` class provides functions to train a single network by passing attributes from your parameter object, a `save_freq` to specify how often your model should be saved (`save_freq=1` indicates every epoch, `save_freq=2` every second epoch etc.) and `gpu_ids`. If you don't pass any ids at all, your network will be trained on CPU (and probably take a lot of time). If you specify 1 id, the network will be trained on the GPU with the corresponding index and if you pass multiple `gpu_ids` your network will be trained on multiple GPUs in parallel.\n", "\n", "> **Note:** The GPU indices are refering to the devices listed in `CUDA_VISIBLE_DEVICES`. E.g if `CUDA_VISIBLE_DEVICES` lists GPUs 3, 4, 5 then gpu_id 0 will be the index for GPU 3 etc.\n", "\n", "> **Note:** training on multiple GPUs is not recommended for easy and small networks, since for these networks the synchronization overhead is far greater than the parallelization benefit.\n", "\n", "Training your network might look like this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.training import PyTorchNetworkTrainer\n", "from delira.models.classification import ClassificationNetworkBasePyTorch\n", "\n", "# path where checkpoints should be saved\n", "save_path = \"./results/checkpoints\"\n", "\n", "model = ClassificationNetworkBasePyTorch(in_channels=1, n_outputs=10)\n", "\n", "trainer = PyTorchNetworkTrainer(network=model,\n", " save_path=save_path,\n", " criterions=params.nested_get(\"criterions\"),\n", " optimizer_cls=params.nested_get(\"optimizer_cls\"),\n", " optimizer_params=params.nested_get(\"optimizer_params\"),\n", " metrics=params.nested_get(\"metrics\"),\n", " lr_scheduler_cls=params.nested_get(\"lr_sched_cls\"),\n", " lr_scheduler_params=params.nested_get(\"lr_sched_params\"),\n", " gpu_ids=[0]\n", " )\n", "\n", "#trainer.train(params.nested_get(\"num_epochs\"), manager_train, manager_val)\n" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "### Experiment\n", "The `delira.training.AbstractExperiment` class needs an experiment name, a path to save it's results to, a parameter object, a model class and the keyword arguments to create an instance of this class. It provides methods to perform a single training and also a method for running a kfold-cross validation. In order to create it, you must choose the `PyTorchExperiment`, which is basically just a subclass of the `AbstractExperiment` to provide a general setup for PyTorch modules. Running an experiment could look like this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "from delira.training import PyTorchExperiment\n", "from delira.training.train_utils import create_optims_default_pytorch\n", "\n", "# Add model parameters to Parameter class\n", "params.fixed.model = {\"in_channels\": 1, \"n_outputs\": 10}\n", "\n", "experiment = PyTorchExperiment(params=params, \n", " model_cls=ClassificationNetworkBasePyTorch,\n", " name=\"TestExperiment\", \n", " save_path=\"./results\",\n", " optim_builder=create_optims_default_pytorch,\n", " gpu_ids=[0])\n", "\n", "experiment.run(manager_train, manager_val)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "An `Experiment` is the most abstract (and recommended) way to define, train and validate your network." ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## Logging" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "Previous class and function definitions used pythons's `logging` library. As extensions for this library `delira` provides a package (`delira.logging`) containing handlers to realize different logging methods. \n", "\n", "To use these handlers simply add them to your logger like this:\n", "```python\n", "logger.addHandler(logging.StreamHandler())\n", "```\n", "\n", "Nowadays, delira mainly relies on [trixi](https://github.com/MIC-DKFZ/trixi/) for logging and provides only a `MultiStreamHandler` and a `TrixiHandler`, which is a binding to `trixi`'s loggers and integrates them into the python `logging` module\n", "\n", "### `MultiStreamHandler`\n", "The `MultiStreamHandler` accepts an arbitrary number of streams during initialization and writes the message to all of it's streams during logging.\n", "\n", "### Logging with `Visdom` - The `trixi` Loggers\n", "[`Visdom`](https://github.com/facebookresearch/visdom) is a tool designed to visualize your logs. To use this tool you need to open a port on the machine you want to train on via `visdom -port YOUR_PORTNUMBER` Afterwards just add the handler of your choice to the logger. For more detailed information and customization have a look at [this](https://github.com/facebookresearch/visdom) website.\n", "\n", "Logging the scalar tensors containing `1`, `2`, `3`, `4` (at the beginning; will increase to show epochwise logging) with the corresponding keys `\"one\"`, `\"two\"`, `\"three\"`, `\"four\"` and two random images with the keys `\"prediction\"` and `\"groundtruth\"` would look like this:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": {} }, "outputs": [], "source": [ "NUM_ITERS = 4\n", "\n", "# import logging handler and logging module\n", "from delira.logging import TrixiHandler\n", "from trixi.logger import PytorchVisdomLogger\n", "import logging\n", "\n", "# configure logging module (and root logger)\n", "logger_kwargs = {\n", " 'name': 'test_env', # name of loggin environment\n", " 'port': 9999 # visdom port to connect to\n", "}\n", "logger_cls = PytorchVisdomLogger\n", "\n", "# configure logging module (and root logger)\n", "logging.basicConfig(level=logging.INFO,\n", " handlers=[TrixiHandler(logger_cls, **logger_kwargs)])\n", "# derive logger from root logger\n", "# (don't do `logger = logging.Logger(\"...\")` since this will create a new\n", "# logger which is unrelated to the root logger\n", "logger = logging.getLogger(\"Test Logger\")\n", "\n", "# create dict containing the scalar numbers as torch.Tensor\n", "scalars = {\"one\": torch.Tensor([1]),\n", " \"two\": torch.Tensor([2]),\n", " \"three\": torch.Tensor([3]),\n", " \"four\": torch.Tensor([4])}\n", "\n", "# create dict containing the images as torch.Tensor\n", "# pytorch awaits tensor dimensionality of \n", "# batchsize x image channels x height x width\n", "images = {\"prediction\": torch.rand(1, 3, 224, 224),\n", " \"groundtruth\": torch.rand(1, 3, 224, 224)}\n", "\n", "# Simulate 4 Epochs\n", "for i in range(4*NUM_ITERS): \n", " logger.info({\"image_grid\": {\"images\": images[\"prediction\"], \"name\": \"predictions\"}})\n", " \n", " for key, val_tensor in scalars.items():\n", " logger.info({\"value\": {\"value\": val_tensor.item(), \"name\": key}})\n", " scalars[key] += 1" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "## More Examples" ] }, { "cell_type": "markdown", "metadata": { "pycharm": {} }, "source": [ "More Examples can be found in \n", "* [the classification example](classification_pytorch.ipynb, \"Classification\")\n", "* [the 2d segmentation example](segmentation_2d_pytorch.ipynb, \"Segmentation 2D\")\n", "* [the 3d segmentation example](segmentation_3d_pytorch.ipynb, \"Segmentation 3D\")\n", "* [the generative adversarial example](gan_pytorch.ipynb, \"GAN\")" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: paper/paper.bib ================================================ @online{batchgenerators, author = {MIC-DKFZ}, title = {batchgenerators}, year = 2019, url = {https://github.com/MIC-DKFZ/batchgenerators}, urldate = {2019-05-17} } @inproceedings{tensorflow, title={Tensorflow: A system for large-scale machine learning}, author={Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others}, booktitle={12th {USENIX} Symposium on Operating Systems Design and Implementation {OSDI} 16)}, pages={265--283}, year={2016} } @inproceedings{pytorch, title={Automatic differentiation in PyTorch}, author={Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam}, booktitle={NIPS 2017 Autodiff Workshop}, year={2017} } @inproceedings{gan, title = {Generative Adversarial Nets}, author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, booktitle = {Advances in Neural Information Processing Systems 27}, editor = {Z. Ghahramani and M. Welling and C. Cortes and N. D. Lawrence and K. Q. Weinberger}, pages = {2672--2680}, year = {2014}, publisher = {Curran Associates, Inc.}, url = {http://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf} } ================================================ FILE: paper/paper.md ================================================ --- title: 'Delira: A High-Level Framework for Deep Learning in Medical Image Analysis' tags: - python - deep learning - medical image analysis - pytorch - tensorflow authors: - name: Christoph Haarburger affiliation: "1" - name: Justus Schock affiliation: "1" - name: Michael Baumgartner affiliation: "1" - name: Oliver Rippel affiliation: "1" - name: Dorit Merhof affiliation: "1" affiliations: - name: Institute of Imaging and Computer Vision, RWTH Aachen University, Germany index: 1 date: 17 May 2019 bibliography: paper.bib --- # Summary Medical image analysis research using deep neural networks often involves the development of problem-specific network architectures and the evaluation of models on several datasets. Contemporary deep learning frameworks such as PyTorch [@pytorch] and Tensorflow [@tensorflow], however, operate on a low level, such that for comparing different models on several datasets, a lot of boilerplate code is necessary. So far, this boilerplate code is often copied and pasted for new projects and experiments. Reference implementations of new methods may be implemented in either PyTorch or Tensorflow, leading to a lot of friction when comparing two methods that are implemented in different low-level frameworks. Moreover, data augmentation for 3D medical images such as from computed tomography or magnetic resonance images is not natively supported by many low-level frameworks. As a result, stand alone data augmentation solutions are often applied [@batchgenerators]. In order to integrate high level functionalities such as logging, data structures for image datasets, data augmentation, trainer classes and model save and load functionality in a way that is agnostic with respect to the low-level framework, we developed ``Delira`` (Deep Learning in Radiology). ``Delira`` sonsists of serveral subpackages and modules that are structured into ``data_loading``, ``io``, ``logging``, ``models``, ``training`` and ``utils``. This modular structure enables the reuse of datasets and data loading pipelines across different models. Moreover, reference models for classification, segmentation and data synthesis problems using generative adversarial networks [@gan] are provided in the ``models`` subpackage. The actual training is carried out using a ``NetworkTrainer`` class that implements the actual training routine given a dataset and model. An ``Experiment`` class runs the training using ``NetworkTrainer``, e.g. in a cross validation scheme. A quick tutorial showing how the most important data structures interact with each other and HTML documentation is provided at https://delira.readthedocs.io/en/master/classification_pytorch.html. Currently, PyTorch and Tensorflow backends are supported and tested. Adding more backends is easily possible if needed. ``Delira`` is released under BSD Clause-2 license. The source code can be found at https://github.com/justusschock/delira. # References ================================================ FILE: pytest.ini ================================================ [pytest] testpaths = tests addopts = --cov=delira python_files = *.py ================================================ FILE: requirements/base.txt ================================================ numpy>=1.15.0 scikit-learn>=0.20.0 jupyter>=1.0.0 ipython joblib pylint tqdm visdom>=0.1.8.5 pyyaml batchgenerators>=0.18.2,!=0.19.2,<0.19.4 tensorboardX nested_lookup ================================================ FILE: requirements/chainer.txt ================================================ chainer >= 6.0.0 h5py ================================================ FILE: requirements/tensorflow.txt ================================================ tensorflow-gpu==1.14 ================================================ FILE: requirements/torch.txt ================================================ torchvision>=0.2.1 torch>=1.0.0 ================================================ FILE: scripts/ci/build_docs.sh ================================================ #!/usr/bin/env bash cd ./docs; make html; make html; make html; touch _build/html/.nojekyll; ================================================ FILE: scripts/ci/install_before_docs.sh ================================================ #!/usr/bin/env bash pip install -r docs/requirements.txt; ================================================ FILE: scripts/ci/install_before_style_check.sh ================================================ #!/usr/bin/env bash pip install pycodestyle; pip install autopep8; ================================================ FILE: scripts/ci/install_before_tests.sh ================================================ #!/usr/bin/env bash pip install -U pip wheel; pip install -r requirements/base.txt; if [[ "$BACKEND" == "TFEager" ]]; then pip install -r requirements/tensorflow.txt pip uninstall -y tensorflow-gpu; pip install tensorflow==1.14; elif [[ "$BACKEND" == "TFGraph" ]]; then pip install -r requirements/tensorflow.txt pip uninstall -y tensorflow-gpu; pip install tensorflow==1.14; elif [[ "$BACKEND" == "Torch" ]]; then pip install -r requirements/torch.txt elif [[ "$BACKEND" == "TorchScript" ]]; then pip install -r requirements/torch.txt elif [[ "$BACKEND" == "Chainer" ]]; then pip install -r requirements/chainer.txt else pip install slackclient==1.3.1 fi pip install coverage; pip install codecov; ================================================ FILE: scripts/ci/run_style_checks.sh ================================================ #!/usr/bin/env bash # based onhttps://gist.github.com/MichaelCurrie/802ce28c993ff2dd632c # find pep8 errors and ignore E402 module level import not at top of file due to logging num_errors_before=`find . -name \*.py -exec pycodestyle --ignore=E402 {} + | wc -l`; echo $num_errors_before; cd "$TRAVIS_BUILD_DIR"; # try with combination of maintainer email and github token git config user.name "Travis AutoPEP8 Fixes"; git checkout $TRAVIS_BRANCH; # fix pep8 erros in place if possible find . -name \*.py -exec autopep8 --recursive --aggressive --aggressive --in-place --exclude *conf.py {} +; num_errors_after=`find . -name \*.py -exec pycodestyle --ignore=E402 {} + | wc -l`; echo $num_errors_after; if (( $num_errors_after < $num_errors_before )); then git commit -a -m "PEP-8 Auto-Fix"; git config --global push.default simple; # Push only to the current branch. # Make sure to make the output quiet, or else the API token will # leak! This works because the API key can replace your password. git push https://$GITHUB_TOKEN@github.com/delira-dev/delira.git; fi cd "$TRAVIS_BUILD_DIR"; # List remaining errors, which have to be fixed manually find . -name \*.py -exec pycodestyle --ignore=E402 {} +; ================================================ FILE: scripts/ci/run_tests.sh ================================================ #!/usr/bin/env bash coverage run -m unittest ================================================ FILE: setup.cfg ================================================ [pycodestyle] exclude = .eggs,*.egg,build,docs/*,.git,versioneer.py,*/conf.py ignore = E721 [versioneer] VCS = git style = pep440 versionfile_source = delira/_version.py versionfile_build = delira/_version.py tag_prefix = v parentdir_prefix = ================================================ FILE: setup.py ================================================ import os from setuptools import find_packages, setup import versioneer def resolve_requirements(file): if not os.path.isfile(file): file = os.path.join(os.path.dirname(__file__), "requirements", file) requirements = [] with open(file) as f: req = f.read().splitlines() for r in req: if r.startswith("-r"): requirements += resolve_requirements( os.path.join(os.path.dirname(file), r.split(" ")[1])) else: requirements.append(r) return requirements def read_file(file): with open(file) as f: content = f.read() return content def unify_requirements(base_requirements: list, *additional_requirement_lists): for reqs in additional_requirement_lists: for req in reqs: if req not in base_requirements: base_requirements.append(req) return base_requirements def parse_all_requirements(backend_requirement_dict: dict): backend_requirements = {"full": []} # parse all requirements for backend_name, requirement_file in backend_requirement_dict.items(): _reqs = resolve_requirements(requirement_file) backend_requirements[backend_name] = _reqs # add all requirements to full if not already part of it backend_requirements["full"] = unify_requirements( backend_requirements["full"], _reqs) # for each backend: check if requirement is already in base requirements for backend_name, reqs in backend_requirements.items(): if backend_name == "base": continue for _req in reqs: if _req in backend_requirements["base"]: reqs.pop(reqs.index(_req)) backend_requirements[backend_name] = reqs return backend_requirements requirement_files = { "base": "base.txt", "sklearn": "base.txt", # no extra requirements necessary "torch": "torch.txt", "torchscript": "torch.txt", "tensorflow": "tensorflow.txt", "tensorflow_eager": "tensorflow.txt", "chainer": "chainer.txt" } requirement_dict = parse_all_requirements(requirement_files) readme = read_file(os.path.join(os.path.dirname(__file__), "README.md")) setup( name='delira', version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), packages=find_packages(), url='https://github.com/delira-dev/delira/', test_suite="unittest", long_description=readme, long_description_content_type='text/markdown', maintainer="Justus Schock", maintainer_email="justus.schock@rwth-aachen.de", license='BSD-2', install_requires=requirement_dict.pop("base"), tests_require=["coverage"], python_requires=">=3.5", extras_require=requirement_dict ) ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/data_loading/__init__.py ================================================ ================================================ FILE: tests/data_loading/test_augmenters.py ================================================ from delira.data_loading import Augmenter, DataLoader, SequentialSampler, \ AbstractDataset import numpy as np from .utils import DummyDataset from ..utils import check_for_no_backend import unittest class TestAugmenters(unittest.TestCase): def setUp(self) -> None: self._dset_len = 500 self._batchsize = 3 if "drop_last" in self._testMethodName: self._drop_last = True else: self._drop_last = False dataset = DummyDataset(self._dset_len) data_loader = DataLoader(dataset) sampler = SequentialSampler.from_dataset(dataset) if "parallel" in self._testMethodName: self.aug = Augmenter(data_loader, self._batchsize, sampler, 2, drop_last=self._drop_last) else: self.aug = Augmenter(data_loader, self._batchsize, sampler, 0, drop_last=self._drop_last) def _aug_test(self): num_batches = self._dset_len // self._batchsize if not self._drop_last: num_batches += int(bool(self._dset_len % self._batchsize)) last_idx = 0 for batch in self.aug: self.assertIsInstance(batch, dict) for v in batch.values(): # check for batchsize for alll batches except last # (which can be smaller) if self._drop_last or last_idx < num_batches - 1: self.assertEqual(len(v), self._batchsize) else: self.assertLess(len(v), self._batchsize) last_idx += 1 self.assertEqual(last_idx, num_batches) # multiple test functions running the same test with different # configurations. Must be done in different functions, because # configurations are switch based on function name @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_parallel(self): self._aug_test() @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_parallel_drop_last(self): self._aug_test() @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_sequential(self): self._aug_test() @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_sequential_drop_last(self): self._aug_test() def _test_sampler_indices(self, parallel: bool): class Dataset(AbstractDataset): def __init__(self): super().__init__(None, None) self.data = [] for i in range(50): self.data.append({"data": i}) def __getitem__(self, item): return self.data[item] def __len__(self): return 50 dataset = Dataset() data_loader = DataLoader(dataset) sampler = SequentialSampler.from_dataset(dataset) if parallel: aug = Augmenter(data_loader, 1, sampler, 2, drop_last=False) else: aug = Augmenter(data_loader, 1, sampler, 0, drop_last=False) for idx, batch in enumerate(aug): self.assertEquals(batch["data"].item(), idx) @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_sampling_order_parallel(self): self._test_sampler_indices(True) @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_sampling_order_sequential(self): self._test_sampler_indices(False) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/data_loading/test_data_loader.py ================================================ import unittest from delira.data_loading import DataLoader, SequentialSampler, BatchSampler from .utils import DummyDataset import numpy as np from ..utils import check_for_no_backend class DataLoaderTest(unittest.TestCase): def _test_data_loader(self, data): loader = DataLoader(data) sampler = SequentialSampler.from_dataset(loader.dataset) batch_sampler = BatchSampler(sampler, 16) sampler_iter = iter(batch_sampler) self.assertIsInstance(loader(next(sampler_iter)), dict) for key, val in loader(next(sampler_iter)).items(): self.assertEqual(len(val), 16) self.assertIn("label", loader(next(sampler_iter))) self.assertIn("data", loader(next(sampler_iter))) self.assertEquals(loader.process_id, 0) loader.process_id = 456 self.assertEquals(loader.process_id, 456) with self.assertRaises(AttributeError): loader.process_id = 123 @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_data_loader_dset(self): dset = DummyDataset(600, [0.5, 0.3, 0.2]) self._test_data_loader(dset) @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_data_loader_dict(self): data = {"label": np.random.rand(600), "data": np.random.rand(600, 1, 3, 3)} self._test_data_loader(data) @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_data_loader_iterable(self): data = [{"label": np.random.rand(1), "data": np.random.rand(1, 3, 3)} for i in range(600)] self._test_data_loader(data) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/data_loading/test_data_manager.py ================================================ import unittest import numpy as np from delira.data_loading import DataManager from delira.data_loading.data_manager import Augmenter from ..utils import check_for_no_backend from .utils import DummyDataset class DataManagerTest(unittest.TestCase): @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_datamanager(self): batch_size = 16 np.random.seed(1) dset = DummyDataset(600, [0.5, 0.3, 0.2]) manager = DataManager(dset, batch_size, n_process_augmentation=0, transforms=None) self.assertIsInstance(manager.get_batchgen(), Augmenter) # create batch manually data, labels = [], [] for i in range(batch_size): data.append(dset[i]["data"]) labels.append(dset[i]["label"]) batch_dict = {"data": np.asarray(data), "label": np.asarray(labels)} augmenter = manager.get_batchgen() augmenter_iter = iter(augmenter) for key, val in next(augmenter_iter).items(): self.assertTrue((val == batch_dict[key]).all()) for key, val in next(augmenter_iter).items(): self.assertEqual(len(val), batch_size) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/data_loading/test_dataset.py ================================================ import unittest import numpy as np from delira.data_loading import ConcatDataset, BaseCacheDataset, \ BaseExtendCacheDataset, BaseLazyDataset, LoadSample, LoadSampleLabel from delira.data_loading.load_utils import norm_zero_mean_unit_std from ..utils import check_for_no_backend class DataSubsetConcatTest(unittest.TestCase): @staticmethod def load_dummy_sample(path, label_load_fct): """ Returns dummy data, independent of path or label_load_fct Parameters ---------- path label_load_fct Returns ------- : dict dict with data and label """ return {'data': np.random.rand(1, 256, 256), 'label': np.random.randint(2)} @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_data_subset_concat(self): class DummyCacheDataset(BaseCacheDataset): def __init__(self, num: int, label_load_fct, *args, **kwargs): """ Generates random samples with _make_dataset Parameters ---------- num : int number of random samples args : passed to BaseCacheDataset kwargs : passed to BaseCacheDataset """ self.label_load_fct = label_load_fct super().__init__(data_path=num, *args, **kwargs) def _make_dataset(self, path): data = [] for i in range(path): data.append(self._load_fn(i, self.label_load_fct)) return data dset_a = DummyCacheDataset(500, None, load_fn=self.load_dummy_sample, img_extensions=[], gt_extensions=[]) dset_b = DummyCacheDataset(700, None, load_fn=self.load_dummy_sample, img_extensions=[], gt_extensions=[]) # test concatenating concat_dataset = ConcatDataset(dset_a, dset_b) self.assertEqual(len(concat_dataset), len(dset_a) + len(dset_b)) self.assertTrue(concat_dataset[0]) # test slicing: half_len_a = len(dset_a) // 2 half_len_b = len(dset_b) // 2 self.assertEqual(len(dset_a.get_subset(range(half_len_a))), half_len_a) self.assertEqual(len(dset_b.get_subset(range(half_len_b))), half_len_b) sliced_concat_set = concat_dataset.get_subset( range(half_len_a + half_len_b)) self.assertEqual(len(sliced_concat_set), half_len_a + half_len_b) # check if entries are valid self.assertTrue(sliced_concat_set[0]) @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_cache_dataset(self): def load_mul_sample(path): """ Return a list of random samples Parameters ---------- path Returns ------- list list of samples """ return [self.load_dummy_sample(path, None)] * 4 # test normal cache dataset paths = list(range(10)) dataset = BaseCacheDataset(paths, self.load_dummy_sample, label_load_fct=None) assert len(dataset) == 10 try: a = dataset[0] a = dataset[5] a = dataset[9] except BaseException: raise AssertionError('Dataset access failed.') try: j = 0 for i in dataset: assert 'data' in i assert 'label' in i j += 1 assert j == len(dataset) except BaseException: raise AssertionError('Dataset iteration failed.') # test extend cache dataset dataset = BaseExtendCacheDataset(paths, load_mul_sample) assert len(dataset) == 40 try: a = dataset[0] a = dataset[20] a = dataset[39] except BaseException: raise AssertionError('Dataset access failed.') try: j = 0 for i in dataset: assert 'data' in i assert 'label' in i j += 1 assert j == len(dataset) except BaseException: raise AssertionError('Dataset iteration failed.') @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_lazy_dataset(self): # test lazy dataset paths = list(range(10)) dataset = BaseLazyDataset(paths, self.load_dummy_sample, label_load_fct=None) assert len(dataset) == 10 try: a = dataset[0] a = dataset[5] a = dataset[9] except BaseException: raise AssertionError('Dataset access failed.') try: j = 0 for i in dataset: assert 'data' in i assert 'label' in i j += 1 assert j == len(dataset) except BaseException: raise AssertionError('Dataset iteration failed.') @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_load_sample(self): def load_dummy_label(path): return {'label': 42} def load_dummy_data(path): return np.random.rand(1, 256, 256) * np.random.randint(2, 20) + \ np.random.randint(20) # check loading of a single sample sample_fn = LoadSample({'data': ['data', 'data', 'data'], 'seg': ['data'], 'data2': ['data', 'data', 'data']}, load_dummy_data, dtype={'seg': 'uint8'}, normalize=('data2',)) sample = sample_fn('load') assert not np.isclose(np.mean(sample['data']), 0) assert not np.isclose(np.mean(sample['seg']), 0) assert sample['seg'].dtype == 'uint8' assert np.isclose(sample['data2'].max(), 1) assert np.isclose(sample['data2'].min(), -1) # check different normalization function sample_fn = LoadSample({'data': ['data', 'data', 'data']}, load_dummy_data, normalize=('data',), norm_fn=norm_zero_mean_unit_std) sample = sample_fn('load') assert np.isclose(np.mean(sample['data']), 0) assert np.isclose(np.std(sample['data']), 1) # check label and loading of single sample sample_fn = LoadSampleLabel( {'data': ['data', 'data', 'data'], 'seg': ['data'], 'data2': ['data', 'data', 'data']}, load_dummy_data, 'label', load_dummy_label, dtype={'seg': 'uint8'}, normalize=('data2',)) sample = sample_fn('load') assert not np.isclose(np.mean(sample['data']), 0) assert not np.isclose(np.mean(sample['seg']), 0) assert sample['seg'].dtype == 'uint8' assert np.isclose(sample['data2'].max(), 1) assert np.isclose(sample['data2'].min(), -1) assert sample['label'] == 42 if __name__ == "__main__": unittest.main() ================================================ FILE: tests/data_loading/test_numba_transforms.py ================================================ import unittest from batchgenerators.transforms import ZoomTransform, PadTransform, Compose import numpy as np from ..utils import check_for_no_backend try: import numba except ImportError: numba = None class NumbaTest(unittest.TestCase): def setUp(self) -> None: from delira.data_loading.numba_transform import NumbaTransform, \ NumbaCompose self._basic_zoom_trafo = ZoomTransform(3) self._numba_zoom_trafo = NumbaTransform(ZoomTransform, zoom_factors=3) self._basic_pad_trafo = PadTransform(new_size=(30, 30)) self._numba_pad_trafo = NumbaTransform(PadTransform, new_size=(30, 30)) self._basic_compose_trafo = Compose([self._basic_pad_trafo, self._basic_zoom_trafo]) self._numba_compose_trafo = NumbaCompose([self._basic_pad_trafo, self._basic_zoom_trafo]) self._input = {"data": np.random.rand(10, 1, 24, 24)} def compare_transform_outputs(self, transform, numba_transform): output_normal = transform(**self._input)["data"] output_numba = numba_transform(**self._input)["data"] # only check for same shapes, since numba might apply slightly # different interpolations self.assertTupleEqual(output_normal.shape, output_numba.shape) @unittest.skipIf(numba is None, "Numba must be imported successfully") @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_zoom(self): self.compare_transform_outputs(self._basic_zoom_trafo, self._numba_zoom_trafo) @unittest.skipIf(numba is None, "Numba must be imported successfully") @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_pad(self): self.compare_transform_outputs(self._basic_pad_trafo, self._numba_pad_trafo) @unittest.skipIf(numba is None, "Numba must be imported successfully") @unittest.skipUnless(check_for_no_backend(), "Test should be only executed if no " "backend was installed") def test_compose(self): self.compare_transform_outputs(self._basic_compose_trafo, self._numba_compose_trafo) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/data_loading/test_sampler.py ================================================ import unittest import numpy as np from delira.data_loading.sampler import RandomSamplerWithReplacement, \ PrevalenceRandomSampler, SequentialSampler, \ RandomSamplerNoReplacement, BatchSampler, AbstractSampler from ..utils import check_for_no_backend from .utils import DummyDataset class SamplerTest(unittest.TestCase): def setUp(self) -> None: self.dset = DummyDataset(600, [0.5, 0.3, 0.2]) @unittest.skipUnless(check_for_no_backend(), "Test should only be executed " "if no backend is installed/specified") def test_batch_sampler(self): for batchsize in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]: for truncate in [True, False]: with self.subTest(batchsize=batchsize, truncate=truncate): sampler = BatchSampler( SequentialSampler.from_dataset(self.dset), batchsize, truncate) sampler_iter = iter(sampler) for i in range(len(sampler)): batch = next(sampler_iter) if i < len(sampler) - 1: self.assertEquals(len(batch), batchsize) else: if truncate: self.assertLessEqual(len(batch), batchsize) @unittest.skipUnless(check_for_no_backend(), "Test should only be executed " "if no backend is installed/specified") def test_sequential(self): prev_index = None sampler = SequentialSampler.from_dataset(self.dset) for idx in sampler: if prev_index is not None: self.assertEquals(idx, prev_index + 1) prev_index = idx @unittest.skipUnless(check_for_no_backend(), "Test should only be executed " "if no backend is installed/specified") def test_random_replacement(self): sampler = RandomSamplerWithReplacement.from_dataset(self.dset) samples = [] self.assertEquals(len(sampler), len(self.dset)) for idx in sampler: self.assertIn(idx, np.arange(len(self.dset))) samples.append(idx) # check if all samples are only sampled once (extremly unlikely) self.assertFalse((np.bincount(samples) == 1).all()) @unittest.skipUnless(check_for_no_backend(), "Test should only be executed " "if no backend is installed/specified") def test_random_no_replacement(self): sampler = RandomSamplerNoReplacement.from_dataset(self.dset) samples = [] self.assertEquals(len(sampler), len(self.dset)) for idx in sampler: self.assertIn(idx, np.arange(len(self.dset))) samples.append(idx) # check if all samples are only sampled once self.assertTrue((np.bincount(samples) == 1).all()) @unittest.skipUnless(check_for_no_backend(), "Test should only be executed " "if no backend is installed/specified") def test_prevalence_sampler(self): sampler = PrevalenceRandomSampler.from_dataset(self.dset) sample_classes = [] for idx in sampler: self.assertIn(idx, np.arange(len(self.dset))) sample_classes.append(self.dset[idx]["label"]) num_samples_per_class = np.bincount(sample_classes) self.assertTrue( (num_samples_per_class.min() - num_samples_per_class.max()) <= 1) @unittest.skipUnless(check_for_no_backend(), "Test should only be executed " "if no backend is installed/specified" ) def test_abstract_sampler_iter(self): sampler = AbstractSampler.from_dataset(self.dset) with self.assertRaises(NotImplementedError): iter(sampler) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/data_loading/utils.py ================================================ import math import numpy as np from delira.data_loading import AbstractDataset class DummyDataset(AbstractDataset): def __init__(self, length=600, class_weights=[0.5, 0.3, 0.2]): super().__init__(None, None) assert math.isclose(sum(class_weights), 1) self._data = [np.random.rand(1, 28, 28) for i in range(length)] _labels = [] for idx, weight in enumerate(class_weights): _labels += [idx] * int(length * weight) self._labels = _labels def __getitem__(self, index): return {"data": self._data[index], "label": self._labels[index]} def __len__(self): return len(self._data) ================================================ FILE: tests/io/__init__.py ================================================ ================================================ FILE: tests/io/test_chainer.py ================================================ import unittest from ..utils import check_for_chainer_backend if check_for_chainer_backend(): import chainer from delira.models import AbstractChainerNetwork # define model outside actual test to make it pickleable class Model(AbstractChainerNetwork): def __init__(self): super().__init__() with self.init_scope(): self.dense = chainer.links.Linear(1, 1) def forward(self, x): return { "pred": chainer.functions.relu( self.dense(x)) } class IoChainerTest(unittest.TestCase): @unittest.skipUnless(check_for_chainer_backend(), "Test should be only executed if chainer backend is " "installed and specified") def test_load_save(self): from delira.io.chainer import load_checkpoint, save_checkpoint net = Model() save_checkpoint("./model_chainer.chain", model=net) self.assertTrue(load_checkpoint("./model_chainer.chain", model=net)) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/io/test_sklearn.py ================================================ import unittest from ..utils import check_for_sklearn_backend class IoSklearnTest(unittest.TestCase): @unittest.skipUnless(check_for_sklearn_backend(), "Test should be only executed if sklearn backend is " "installed and specified") def test_load_save(self): from delira.io.sklearn import load_checkpoint, save_checkpoint from delira.models import SklearnEstimator from sklearn.tree import DecisionTreeRegressor import numpy as np net = SklearnEstimator(DecisionTreeRegressor()) net.fit(X=np.random.rand(2, 32), y=np.random.rand(2)) save_checkpoint("./model_sklearn.pkl", model=net) self.assertTrue(load_checkpoint("./model_sklearn.pkl")) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/io/test_tf.py ================================================ import unittest from ..utils import check_for_tf_eager_backend, check_for_tf_graph_backend class IoTfTest(unittest.TestCase): def setUp(self) -> None: import tensorflow as tf tf.reset_default_graph() if "_eager" in self._testMethodName: tf.enable_eager_execution() else: tf.disable_eager_execution() @unittest.skipUnless(check_for_tf_graph_backend(), "Test should be only executed if tensorflow backend " "is installed and specified") def test_load_save(self): import tensorflow as tf tf.disable_eager_execution() from delira.io.tf import load_checkpoint, save_checkpoint from delira.models import AbstractTfGraphNetwork from delira.training.backends import initialize_uninitialized import numpy as np class DummyNetwork(AbstractTfGraphNetwork): def __init__(self, in_channels, n_outputs): super().__init__(in_channels=in_channels, n_outputs=n_outputs) self.net = self._build_model(in_channels, n_outputs) @staticmethod def _build_model(in_channels, n_outputs): return tf.keras.models.Sequential( layers=[ tf.keras.layers.Dense( 64, input_shape=in_channels, bias_initializer='glorot_uniform'), tf.keras.layers.ReLU(), tf.keras.layers.Dense( n_outputs, bias_initializer='glorot_uniform')]) net = DummyNetwork((32,), 1) initialize_uninitialized(net._sess) vars_1 = net._sess.run(tf.global_variables()) save_checkpoint("./model", model=net) net._sess.run(tf.initializers.global_variables()) vars_2 = net._sess.run(tf.global_variables()) load_checkpoint("./model", model=net) vars_3 = net._sess.run(tf.global_variables()) for var_1, var_2 in zip(vars_1, vars_2): with self.subTest(var_1=var_1, var2=var_2): self.assertTrue(np.all(var_1 != var_2)) for var_1, var_3 in zip(vars_1, vars_3): with self.subTest(var_1=var_1, var_3=var_3): self.assertTrue(np.all(var_1 == var_3)) @unittest.skipUnless(check_for_tf_eager_backend(), "Test should be only executed if tensorflow backend " "is installed and specified") def test_load_save_eager(self): import tensorflow as tf tf.enable_eager_execution() from delira.io.tf import load_checkpoint_eager, save_checkpoint_eager from delira.models import AbstractTfEagerNetwork import numpy as np class DummyNetwork(AbstractTfEagerNetwork): def __init__(self, in_channels, n_outputs): super().__init__(in_channels=in_channels, n_outputs=n_outputs) with tf.init_scope(): self.net = self._build_model(in_channels, n_outputs) @staticmethod def _build_model(in_channels, n_outputs): return tf.keras.models.Sequential( layers=[ tf.keras.layers.Dense( 64, input_shape=in_channels, bias_initializer='glorot_uniform'), tf.keras.layers.ReLU(), tf.keras.layers.Dense( n_outputs, bias_initializer='glorot_uniform')]) def call(self, inputs): return self.net(inputs) net = DummyNetwork((32,), 1) input_tensor = tf.constant(np.random.rand(1, 32).astype(np.float32)) result_pre_save = net(input_tensor) save_checkpoint_eager("./model_eager", model=net) loaded_state = load_checkpoint_eager("./model_eager", model=net) loaded_net = loaded_state["model"] result_post_save = loaded_net(input_tensor) self.assertTrue(np.array_equal(result_post_save, result_pre_save)) def tearDown(self) -> None: import gc import sys try: del sys.modules["tf"] except KeyError: pass try: del tf except (UnboundLocalError, NameError): pass try: del sys.modules["tensorflow"] except KeyError: pass try: del tensorflow except (UnboundLocalError, NameError): pass gc.collect() if __name__ == '__main__': unittest.main() ================================================ FILE: tests/io/test_torch.py ================================================ import unittest from ..utils import check_for_torch_backend, check_for_torchscript_backend class IoTorchTest(unittest.TestCase): @unittest.skipUnless(check_for_torch_backend(), "Test should be only executed if torch backend is " "installed and specified") def test_load_save(self): from delira.io.torch import load_checkpoint_torch, \ save_checkpoint_torch from delira.models import AbstractPyTorchNetwork import torch class DummyNetwork(AbstractPyTorchNetwork): def __init__(self, in_channels, n_outputs): super().__init__(in_channels=in_channels, n_outputs=n_outputs) self.net = self._build_model(in_channels, n_outputs) def forward(self, x): return self.module(x) @staticmethod def _build_model(in_channels, n_outputs): return torch.nn.Sequential( torch.nn.Linear(in_channels, 64), torch.nn.ReLU(), torch.nn.Linear(64, n_outputs) ) net = DummyNetwork(32, 1) save_checkpoint_torch("./model_torch.pt", model=net) self.assertTrue(load_checkpoint_torch("./model_torch.pt")) @unittest.skipUnless(check_for_torchscript_backend(), "Test should be only executed if torch backend is " "installed and specified") def test_torchscript_save(self): from delira.io.torch import load_checkpoint_torchscript, \ save_checkpoint_torchscript from delira.models import AbstractTorchScriptNetwork import torch class DummyNetwork(AbstractTorchScriptNetwork): def __init__(self): super().__init__() self.dense = torch.nn.Linear(3, 1) @torch.jit.script_method def forward(self, x): return self.dense(x) net = DummyNetwork() save_checkpoint_torchscript("./model_jit.ptj", model=net) self.assertTrue(load_checkpoint_torchscript("./model_jit.ptj")) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/logging/__init__.py ================================================ ================================================ FILE: tests/logging/test_logging_frequency.py ================================================ import unittest from delira.logging import BaseBackend, SingleThreadedLogger import logging class DummyBackend(BaseBackend): def _text(self, logging_no: int, tag: str, global_step=None): logging.info("INFO: Logging Item Number %d" % logging_no) # implement dummy funtions to be able to instantiate backend def _image(self, *args, **kwargs): pass def _images(self, *args, **kwargs): pass def _image_with_boxes(self, *args, **kwargs): pass def _scalar(self, *args, **kwargs): pass def _scalars(self, *args, **kwargs): pass def _histogram(self, *args, **kwargs): pass def _figure(self, *args, **kwargs): pass def _audio(self, *args, **kwargs): pass def _video(self, *args, **kwargs): pass def _graph_pytorch(self, *args, **kwargs): pass def _graph_tf(self, *args, **kwargs): pass def _graph_onnx(self, *args, **kwargs): pass def _embedding(self, *args, **kwargs): pass def _pr_curve(self, *args, **kwargs): pass class LoggingFrequencyTestCase(unittest.TestCase): def _logging_freq_test(self, frequencies, num_runs: int, check_freq=None): logger = SingleThreadedLogger(DummyBackend(), logging_frequencies=frequencies, reduce_types="last") if check_freq is None and isinstance(frequencies, int): check_freq = frequencies assert check_freq is not None target_messages = 0 with self.assertLogs() as cm: for idx in range(num_runs): logger.log({"text": {"logging_no": idx, "tag": "dummy"}}) target_messages += int((idx + 1) % check_freq == 0) self.assertIsNotNone(cm.output) self.assertEqual(target_messages, len(cm.output)) def test_logging_freq(self): for frequencies, check_freq in zip([1, 5, 10, {"text": 15}], [None, None, None, 15]): with self.subTest(frequencies=frequencies): self._logging_freq_test(frequencies, 50, check_freq) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/logging/test_logging_outside_trainer.py ================================================ import unittest from delira.logging import log from delira.training import BaseNetworkTrainer from delira.models import AbstractNetwork import os from tests.utils import check_for_tf_graph_backend try: import tensorflow as tf except ImportError: tf = None class LoggingOutsideTrainerTestCase(unittest.TestCase): @unittest.skipUnless(check_for_tf_graph_backend(), "TF Backend not installed") def test_logging_freq(self): save_path = os.path.abspath("./logs") config = { "num_epochs": 2, "losses": {}, "optimizer_cls": None, "optimizer_params": {"learning_rate": 1e-3}, "metrics": {}, "lr_scheduler_cls": None, "lr_scheduler_params": {} } trainer = BaseNetworkTrainer( AbstractNetwork(), save_path, **config, gpu_ids=[], save_freq=1, optim_fn=None, key_mapping={}, logging_type="tensorboardx", logging_kwargs={ 'logdir': save_path }) trainer._setup( AbstractNetwork(), lr_scheduler_cls=None, lr_scheduler_params={}, gpu_ids=[], key_mapping={}, convert_batch_to_npy_fn=None, prepare_batch_fn=None, callbacks=[]) tag = 'dummy' log({"scalar": {"scalar_value": 1234, "tag": tag}}) file = [os.path.join(save_path, x) for x in os.listdir(save_path) if os.path.isfile(os.path.join(save_path, x))][0] ret_val = False if tf is not None: for e in tf.train.summary_iterator(file): for v in e.summary.value: if v.tag == tag: ret_val = True break if ret_val: break self.assertTrue(ret_val) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/logging/test_single_threaded_logging.py ================================================ from delira.logging import Logger, TensorboardBackend, make_logger from tests.utils import check_for_torch_backend, check_for_tf_graph_backend import unittest try: import tensorflow as tf except ImportError: tf = None try: import torch except ImportError: torch = None try: import onnx except ImportError: onnx = None import numpy as np import os import gc class TestTensorboardLogging(unittest.TestCase): def setUp(self) -> None: self._npy_imgs = np.random.rand(2, 3, 24, 24) self._boxes_npy = np.array([[5, 5, 10, 10], [4, 8, 5, 16]]) self._scalars = [{"1": 4, "2": 14, "3": 24}, {"1": 5, "2": 15, "3": 25}, {"1": 6, "2": 16, "3": 26}] self._hist_vals = np.random.randint(0, 10, size=(100,)) from scipy.signal import chirp self._audio_sample_npy = chirp(np.linspace(0, 100), 500, 2, 100) self._text_string = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrs" \ "tuvwxyz0123456789" if tf is not None: tf.reset_default_graph() input = np.zeros(shape=(1, 28, 28, 1)) layers = tf.keras.layers self._model_tf = tf.keras.Sequential( [layers.Conv2D( 32, 5, padding='same', data_format="channels_last", activation=tf.nn.relu), layers.Conv2D( 64, 5, padding='same', data_format="channels_last", activation=tf.nn.relu), ]) self._model_tf.build(input_shape=input.shape) else: self._model_tf = None if torch is not None: self._model_torch = torch.nn.Sequential( torch.nn.Conv2d(3, 8, 3, padding=1), torch.nn.ReLU(), torch.nn.Conv2d(8, 1, 3, padding=1), torch.nn.LeakyReLU(), torch.nn.Conv2d(1, 23, 3), ) else: self._model_torch = None self._embedding_npy = np.random.rand(500, 3) self._labels_npy = np.random.randint(0, 10, 100) self._predictions_npy = np.random.randint(0, 10, 100) self._logger = self._setup_logger() def _setup_logger(self): return make_logger(TensorboardBackend( {"logdir": os.path.join(".", "runs", self._testMethodName)} )) def _check_for_tag(self, tag, logdir=None): if logdir is None: try: logdir = self._logger._backend._writer.logdir except AttributeError: logdir = self._logger._backend._writer.log_dir file = [os.path.join(logdir, x) for x in os.listdir(logdir) if os.path.isfile(os.path.join(logdir, x))][0] if tf is not None: ret_val = False for e in tf.train.summary_iterator(file): for v in e.summary.value: if v.tag == tag: ret_val = True break if ret_val: break self.assertTrue(ret_val) @staticmethod def _destroy_logger(logger: Logger): logger.close() del logger gc.collect() def test_image_npy(self): self._logger.log({"image": {"tag": "image_npy", "img_tensor": self._npy_imgs[0]}}) self._check_for_tag("image_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_image_torch(self): self._logger.log({"image": {"tag": "image_torch", "img_tensor": torch.from_numpy(self._npy_imgs[0])}}) self._check_for_tag("image_torch") def test_img_npy(self): self._logger.log({"img": {"tag": "img_npy", "img_tensor": self._npy_imgs[0]}}) self._check_for_tag("img_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_img_torch(self): self._logger.log({"img": {"tag": "img_torch", "img_tensor": torch.from_numpy(self._npy_imgs[0])}}) self._check_for_tag("img_torch") def test_picture_npy(self): self._logger.log({"picture": {"tag": "picture_npy", "img_tensor": self._npy_imgs[0]}}) self._check_for_tag("picture_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_picture_torch(self): self._logger.log({ "picture": { "tag": "picture_torch", "img_tensor": torch.from_numpy(self._npy_imgs[0])}}) self._check_for_tag("picture_torch") def test_images_npy(self): self._logger.log({"images": {"tag": "images_npy", "img_tensor": self._npy_imgs}}) self._check_for_tag("images_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_images_torch(self): self._logger.log({"images": {"tag": "images_torch", "img_tensor": torch.from_numpy(self._npy_imgs)}}) self._check_for_tag("images_torch") def test_imgs_npy(self): self._logger.log({"imgs": {"tag": "imgs_npy", "img_tensor": self._npy_imgs}}) self._check_for_tag("imgs_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_imgs_torch(self): self._logger.log({"imgs": {"tag": "imgs_torch", "img_tensor": torch.from_numpy(self._npy_imgs)}}) self._check_for_tag("imgs_torch") def test_pictures_npy(self): self._logger.log({"pictures": {"tag": "pictures_npy", "img_tensor": self._npy_imgs}}) self._check_for_tag("pictures_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_pictures_torch(self): self._logger.log({"pictures": {"tag": "pictures_torch", "img_tensor": torch.from_numpy(self._npy_imgs)}}) self._check_for_tag("pictures_torch") def test_image_with_boxes_npy(self): self._logger.log({"image_with_boxes": { "tag": "image_with_boxes_npy", "img_tensor": self._npy_imgs[0], "box_tensor": self._boxes_npy }}) self._check_for_tag("image_with_boxes_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_image_with_boxes_torch(self): self._logger.log({"image_with_boxes": { "tag": "image_with_boxes_torch", "img_tensor": torch.from_numpy(self._npy_imgs[0]), "box_tensor": torch.from_numpy(self._boxes_npy) }}) self._check_for_tag("image_with_boxes_torch") def test_bounding_boxes_npy(self): self._logger.log({"bounding_boxes": { "tag": "bounding_boxes_npy", "img_tensor": self._npy_imgs[0], "box_tensor": self._boxes_npy }}) self._check_for_tag("bounding_boxes_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_bounding_boxes_torch(self): self._logger.log({"bounding_boxes": { "tag": "bounding_boxes_torch", "img_tensor": torch.from_numpy(self._npy_imgs[0]), "box_tensor": torch.from_numpy(self._boxes_npy) }}) self._check_for_tag("bounding_boxes_torch") def test_bboxes_npy(self): self._logger.log({"bboxes": { "tag": "bboxes_npy", "img_tensor": self._npy_imgs[0], "box_tensor": self._boxes_npy }}) self._check_for_tag("bboxes_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_bboxes_torch(self): self._logger.log({"bboxes": { "tag": "bboxes_torch", "img_tensor": torch.from_numpy(self._npy_imgs[0]), "box_tensor": torch.from_numpy(self._boxes_npy) }}) self._check_for_tag("bboxes_torch") def test_scalar(self): for _scalar in self._scalars: self._logger.log({ "scalar": { "tag": "scalar", "scalar_value": _scalar["1"] } }) self._check_for_tag("scalar") def test_scalar_npy(self): for _scalar in self._scalars: self._logger.log({ "scalar": { "tag": "scalar_npy", "scalar_value": np.array(_scalar["1"]) } }) self._check_for_tag("scalar_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_scalar_torch(self): for _scalar in self._scalars: self._logger.log({ "scalar": { "tag": "scalar_torch", "scalar_value": torch.tensor(_scalar["1"]) } }) def test_value(self): for _scalar in self._scalars: self._logger.log({ "value": { "tag": "value", "scalar_value": _scalar["1"] } }) self._check_for_tag("value") def test_value_npy(self): for _scalar in self._scalars: self._logger.log({ "value": { "tag": "value_npy", "scalar_value": np.array(_scalar["1"]) } }) self._check_for_tag("value_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_value_torch(self): for _scalar in self._scalars: self._logger.log({ "value": { "tag": "value_torch", "scalar_value": torch.tensor(_scalar["1"]) } }) self._check_for_tag("value_torch") def test_scalars(self): for _scalar in self._scalars: self._logger.log({ "scalars": { "main_tag": "scalars", "tag_scalar_dict": _scalar, "sep": "/" } }) for k in self._scalars[0].keys(): self._check_for_tag("scalars/" + k) def test_scalars_npy(self): for _scalar in self._scalars: self._logger.log({ "scalars": { "main_tag": "scalars_npy", "tag_scalar_dict": {k: np.array(v) for k, v in _scalar.items()}, "sep": "/" } }) for k in self._scalars[0].keys(): self._check_for_tag("scalars_npy/" + k) @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_scalars_torch(self): for _scalar in self._scalars: self._logger.log({ "scalars": { "main_tag": "scalars_torch", "tag_scalar_dict": {k: torch.tensor(v) for k, v in _scalar.items()}, "sep": "/" } }) for k in self._scalars[0].keys(): self._check_for_tag("scalars_torch/" + k) def test_values(self): for _scalar in self._scalars: self._logger.log({ "values": { "main_tag": "values", "tag_scalar_dict": _scalar, "sep": "/" } }) for k in self._scalars[0].keys(): self._check_for_tag("values/" + k) def test_values_npy(self): for _scalar in self._scalars: self._logger.log({ "values": { "main_tag": "values_npy", "tag_scalar_dict": {k: np.array(v) for k, v in _scalar.items()}, "sep": "/" } }) for k in self._scalars[0].keys(): self._check_for_tag("values_npy/" + k) @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_values_torch(self): for _scalar in self._scalars: self._logger.log({ "values": { "main_tag": "values_torch", "tag_scalar_dict": {k: torch.tensor(v) for k, v in _scalar.items()}, "sep": "/" } }) for k in self._scalars[0].keys(): self._check_for_tag("values_torch/" + k) def test_histogram_npy(self): self._logger.log({ "histogram": { "tag": "histogram_npy", "values": self._hist_vals } }) self._check_for_tag("histogram_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_histogram_torch(self): self._logger.log({ "histogram": { "tag": "histogram_torch", "values": torch.from_numpy(self._hist_vals) } }) self._check_for_tag("histogram_torch") def test_hist_npy(self): self._logger.log({ "hist": { "tag": "hist_npy", "values": self._hist_vals } }) self._check_for_tag("hist_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_hist_torch(self): self._logger.log({ "hist": { "tag": "hist_torch", "values": torch.from_numpy(self._hist_vals) } }) self._check_for_tag("hist_torch") def test_figure(self): from matplotlib.pyplot import figure, imshow, close _fig = figure() imshow(self._npy_imgs[0][0]) self._logger.log({ "figure": { "tag": "figure", "figure": _fig } }) close() self._check_for_tag("figure") def test_fig(self): from matplotlib.pyplot import figure, imshow, close _fig = figure() imshow(self._npy_imgs[0][0]) self._logger.log({ "fig": { "tag": "fig", "figure": _fig } }) close() self._check_for_tag("fig") def test_audio_npy(self): self._logger.log({"audio": { "tag": "audio_npy", "snd_tensor": self._audio_sample_npy }}) self._check_for_tag("audio_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_audio_torch(self): self._logger.log({"audio": { "tag": "audio_torch", "snd_tensor": torch.from_numpy(self._audio_sample_npy) }}) self._check_for_tag("audio_torch") def test_sound_npy(self): self._logger.log({"sound": { "tag": "sound_npy", "snd_tensor": self._audio_sample_npy }}) self._check_for_tag("sound_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_sound_torch(self): self._logger.log({"sound": { "tag": "sound_torch", "snd_tensor": torch.from_numpy(self._audio_sample_npy) }}) self._check_for_tag("sound_torch") def test_video_npy(self): # add channel and batch dimension for format BTCHW vid = self._npy_imgs.reshape((1, *self._npy_imgs.shape)) self._logger.log({"video": { "tag": "video_npy", "vid_tensor": vid, "fps": 1 }}) self._check_for_tag("video_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_video_torch(self): # add channel and batch dimension for format BTCHW vid = self._npy_imgs.reshape((1, *self._npy_imgs.shape)) self._logger.log({"video": { "tag": "video_torch", "vid_tensor": torch.from_numpy(vid), "fps": 1 }}) self._check_for_tag("video_torch") def test_text(self): self._logger.log({"text": { "tag": "text", "text_string": self._text_string }}) self._check_for_tag("text/text_summary") @unittest.skipUnless(check_for_tf_graph_backend(), "TF Backend not installed") def test_graph_tf(self): run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() with tf.Session() as sess: outputs = self._model_tf( np.zeros( shape=( 1, 28, 28, 1), dtype=np.float32)) sess.run(tf.initializers.global_variables()) sess.run(outputs, options=run_options, run_metadata=run_metadata) self._logger.log({"graph_tf": { "graph": self._model_tf._graph.as_graph_def(add_shapes=True), "run_metadata": run_metadata }}) @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_graph_torch(self): input_tensor = self._npy_imgs[0] input_tensor = input_tensor.reshape(1, *input_tensor.shape) self._logger.log({ "graph_pytorch": { "model": self._model_torch, "input_to_model": torch.from_numpy(input_tensor).float() } }) @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") @unittest.skipIf(onnx is None, reason="ONNX not installed") def test_graph_onnx(self): import os input_tensor = self._npy_imgs[0] input_tensor = input_tensor.reshape(1, *input_tensor.shape) torch.onnx.export(self._model_torch, torch.from_numpy(input_tensor).float(), os.path.abspath("model.onnx")) self._logger.log({ "graph_onnx": {"prototxt": os.path.abspath("model.onnx")} }) def test_embedding_npy(self): self._logger.log({"embedding": { "mat": self._embedding_npy }}) @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_embedding_torch(self): self._logger.log({"embedding": { "mat": torch.from_numpy(self._embedding_npy) }}) def test_pr_curve_npy(self): self._logger.log({"pr_curve": { "tag": "pr_curve_npy", "labels": self._labels_npy, "predictions": self._predictions_npy }}) self._check_for_tag("pr_curve_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_pr_curve_torch(self): self._logger.log({"pr_curve": { "tag": "pr_curve_torch", "labels": torch.from_numpy(self._labels_npy), "predictions": torch.from_numpy(self._predictions_npy) }}) self._check_for_tag("pr_curve_torch") def test_pr_npy(self): self._logger.log({"pr": { "tag": "pr_npy", "labels": self._labels_npy, "predictions": self._predictions_npy }}) self._check_for_tag("pr_npy") @unittest.skipUnless(check_for_torch_backend(), "Torch Backend not installed") def test_pr_torch(self): self._logger.log({"pr": { "tag": "pr_torch", "labels": torch.from_numpy(self._labels_npy), "predictions": torch.from_numpy(self._predictions_npy) }}) self._check_for_tag("pr_torch") def tearDown(self) -> None: self._destroy_logger(self._logger) self._logger = None if __name__ == '__main__': from multiprocessing import freeze_support freeze_support() unittest.main() ================================================ FILE: tests/models/__init__.py ================================================ ================================================ FILE: tests/models/data_parallel/__init__.py ================================================ ================================================ FILE: tests/models/data_parallel/test_chainer.py ================================================ import unittest from tests.utils import check_for_chainer_backend class TestDataParallelChainer(unittest.TestCase): def setUp(self) -> None: if check_for_chainer_backend(): import chainer import chainer.link import chainer.links import chainer.functions import chainer.optimizers from delira.models.backends.chainer.data_parallel import \ DataParallelChainerOptimizer, \ DataParallelChainerNetwork from delira.models.backends.chainer.abstract_network import \ AbstractChainerNetwork # creating a really simple model to test dataparallel behavior class SimpleModel(AbstractChainerNetwork): def __init__(self): super(SimpleModel, self).__init__() with self.init_scope(): self.dense_1 = chainer.links.Linear(3, 32) self.dense_2 = chainer.links.Linear(32, 2) def forward(self, x): return self.dense_2( chainer.functions.relu( self.dense_1(x))) self.model = DataParallelChainerNetwork(SimpleModel(), devices=["@numpy", "@numpy"]) self.optimizer = DataParallelChainerOptimizer.from_optimizer_class( chainer.optimizers.Adam ) self.optimizer.setup(self.model) @unittest.skipUnless(check_for_chainer_backend(), "Test should be only executed if chainer backend is " "installed and specified") def test_update(self): import numpy as np import chainer input_tensor = np.random.rand(10, 3).astype(np.float32) label_tensor = np.random.rand(10, 2).astype(np.float) model_copy = self.model.copy() preds = self.model(input_tensor) loss = chainer.functions.sum(preds - label_tensor) self.model.cleargrads() loss.backward() self.optimizer.update() # check if param was updated for orig_param, updated_param in zip(model_copy.params(), self.model.params()): self.assertFalse(np.array_equal(orig_param, updated_param)) # check if all grads were cleared self.model.cleargrads() for module in self.model.modules: for updated_param in module.params(): self.assertIsNone(updated_param.grad_var) # test with keyword arguments @unittest.skipUnless(check_for_chainer_backend(), "Test should be only executed if chainer backend is " "installed and specified") def test_keyword_arguments_different_batchsize(self): import numpy as np import chainer # test batchsize smaller than, equal to and greater than number devices for batchsize in [1, 2, 3]: with self.subTest(batchsize=batchsize): input_kwargs = { "x": np.random.rand(batchsize, 3).astype(np.float32) } pred = self.model(**input_kwargs) self.assertTupleEqual(pred.shape, (batchsize, 2)) self.assertEqual(chainer.get_device(pred.device), chainer.get_device("@numpy")) # test with positional arguments @unittest.skipUnless(check_for_chainer_backend(), "Test should be only executed if chainer backend is " "installed and specified") def test_positional_arguments(self): import numpy as np import chainer # test batchsize smaller than, equal to and greater than number devices for batchsize in [1, 2, 3]: with self.subTest(batchsize=batchsize): input_args = [ np.random.rand(batchsize, 3).astype(np.float32) ] pred = self.model(*input_args) self.assertTupleEqual(pred.shape, (batchsize, 2)) self.assertEqual(chainer.get_device(pred.device), chainer.get_device("@numpy")) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/models/data_parallel/test_torch.py ================================================ import unittest from copy import deepcopy import numpy as np from tests.utils import check_for_torch_backend class TestDataParallelTorch(unittest.TestCase): def setUp(self) -> None: if check_for_torch_backend(): from delira.models.backends.torch import AbstractPyTorchNetwork, \ DataParallelPyTorchNetwork import torch class SimpleModel(AbstractPyTorchNetwork): def __init__(self): super().__init__() self.dense_1 = torch.nn.Linear(3, 32) self.dense_2 = torch.nn.Linear(32, 2) self.relu = torch.nn.ReLU() def forward(self, x): return {"pred": self.dense_2(self.relu(self.dense_1(x)))} model = SimpleModel() self.optimizer = torch.optim.Adam(model.parameters()) if torch.cuda.is_available() and torch.cuda.device_count() > 1: self.model = DataParallelPyTorchNetwork(model, [0, 1]) else: self.model = model @unittest.skipUnless(check_for_torch_backend(), "Test should be only executed if torch backend is " "installed and specified") def test_update(self): import torch input_tensor = torch.rand(10, 3) label_tensor = torch.rand(10, 2) model_copy = deepcopy(self.model) preds = self.model(input_tensor) loss = (preds["pred"] - label_tensor).sum() self.optimizer.zero_grad() loss.backward() self.optimizer.step() for orig_param, updated_param in zip(model_copy.parameters(), self.model.parameters()): self.assertFalse( np.array_equal( orig_param.detach().cpu().numpy(), updated_param.detach().cpu().numpy())) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/models/test_abstract_models.py ================================================ import unittest import numpy as np from ..utils import check_for_chainer_backend, check_for_torch_backend, \ check_for_tf_graph_backend, check_for_tf_eager_backend, \ check_for_torchscript_backend, check_for_sklearn_backend class TestAbstractModels(unittest.TestCase): @staticmethod def _setup_torch(*args): import torch from delira.models.backends.torch import AbstractPyTorchNetwork class Model(AbstractPyTorchNetwork): def __init__(self): super().__init__() self.dense = torch.nn.Linear(1, 1) self.relu = torch.nn.ReLU() def forward(self, x): return {"pred": self.relu(self.dense(x))} return Model() @staticmethod def _setup_torchscript(*args): import torch from delira.models.backends.torchscript import \ AbstractTorchScriptNetwork class Model(AbstractTorchScriptNetwork): def __init__(self): super().__init__() self.dense = torch.nn.Linear(1, 1) self.relu = torch.nn.ReLU() @torch.jit.script_method def forward(self, x): return {"pred": self.relu(self.dense(x))} return Model() @staticmethod def _setup_tfeager(*args): import tensorflow as tf tf.enable_eager_execution() tf.reset_default_graph() from delira.models.backends.tf_eager import AbstractTfEagerNetwork class Model(AbstractTfEagerNetwork): def __init__(self): super().__init__() self.dense = tf.keras.layers.Dense(1, activation="relu") def call(self, x: tf.Tensor): return {"pred": self.dense(x)} return Model() @staticmethod def _setup_tfgraph(*args): import tensorflow as tf tf.disable_eager_execution() tf.reset_default_graph() from delira.models import AbstractTfGraphNetwork from delira.training.backends.tf_graph.utils import \ initialize_uninitialized class Model(AbstractTfGraphNetwork): def __init__(self): super().__init__() self.dense = tf.keras.layers.Dense(1, activation="relu") data = tf.placeholder(shape=[None, 1], dtype=tf.float32) labels = tf.placeholder_with_default( tf.zeros([tf.shape(data)[0], 1]), shape=[None, 1]) preds_train = self.dense(data) preds_eval = self.dense(data) self.inputs["data"] = data self.inputs["labels"] = labels self.outputs_train["pred"] = preds_train self.outputs_eval["pred"] = preds_eval model = Model() initialize_uninitialized(model._sess) return model @staticmethod def _setup_chainer(*args): import chainer from delira.models import AbstractChainerNetwork class Model(AbstractChainerNetwork): def __init__(self): super().__init__() with self.init_scope(): self.dense = chainer.links.Linear(1, 1) def forward(self, x): return { "pred": chainer.functions.relu( self.dense(x)) } return Model() @staticmethod def _setup_sklearn(*args): from delira.models import SklearnEstimator from sklearn.neural_network import MLPRegressor class Model(SklearnEstimator): def __init__(self): # prefit to enable prediction mode afterwards module = MLPRegressor() module.fit(*args) super().__init__(module) @staticmethod def prepare_batch(batch: dict, input_device, output_device): return batch return Model() def run_model_arg(self, device=None): prep_data = self._model.prepare_batch(self._data, input_device=device, output_device=device) pred = self._model(prep_data["data"]) self.assertIsInstance(pred, dict) def run_model_kwarg(self, device=None, keyword="data"): prep_data = self._model.prepare_batch(self._data, input_device=device, output_device=device) pred = self._model(**{keyword: prep_data["data"]}) self.assertIsInstance(pred, dict) def setUp(self) -> None: self._data = {"data": np.random.rand(100, 1), "label": np.random.rand(100, 1)} if "sklearn" in self._testMethodName.lower(): self._model = self._setup_sklearn(self._data["data"], self._data["label"]) elif "chainer" in self._testMethodName.lower(): self._model = self._setup_chainer() elif "pytorch" in self._testMethodName.lower(): self._model = self._setup_torch() elif "torchscript" in self._testMethodName.lower(): self._model = self._setup_torchscript() elif "tf_graph" in self._testMethodName.lower(): self._model = self._setup_tfgraph() elif "tf_eager" in self._testMethodName.lower(): self._model = self._setup_tfeager() @unittest.skipUnless(check_for_sklearn_backend(), "Test should be only executed if sklearn backend is " "installed and specified") def test_sklearn(self): self.run_model_arg() @unittest.skipUnless(check_for_chainer_backend(), "Test should be only executed if chainer backend is " "installed and specified") def test_chainer(self): import chainer self.run_model_arg(chainer.backend.CpuDevice()) @unittest.skipUnless(check_for_torch_backend(), "Test should be only executed if torch backend is " "installed and specified") def test_pytorch(self): self.run_model_arg("cpu") @unittest.skipUnless(check_for_torchscript_backend(), "Test should be only executed if torch backend is " "installed and specified") def test_torchscript(self): self.run_model_arg("cpu") @unittest.skipUnless(check_for_tf_eager_backend(), "Test should be only executed if tf backend is " "installed and specified") def test_tf_eager(self): self.run_model_arg("/cpu:0") @unittest.skipUnless(check_for_tf_graph_backend(), "Test should be only executed if tf backend is " "installed and specified") def test_tf_graph(self): self.run_model_kwarg() def tearDown(self) -> None: import sys import gc try: del sys.modules["tf"] except KeyError: pass try: del tf except (UnboundLocalError, NameError): pass try: del sys.modules["tensorflow"] except KeyError: pass try: del tensorflow except (UnboundLocalError, NameError): pass gc.collect() if __name__ == '__main__': unittest.main() ================================================ FILE: tests/training/__init__.py ================================================ ================================================ FILE: tests/training/backends/__init__.py ================================================ ================================================ FILE: tests/training/backends/test_chainer.py ================================================ import unittest from delira.utils import DeliraConfig from sklearn.metrics import mean_absolute_error from .utils import create_experiment_test_template_for_backend from tests.utils import check_for_chainer_backend if check_for_chainer_backend(): from delira.models import AbstractChainerNetwork import chainer # define this outside, because it has to be pickleable, which it won't be, # wehn defined inside a function class DummyNetworkChainer(AbstractChainerNetwork): def __init__(self): super().__init__() with self.init_scope(): self.dense_1 = chainer.links.Linear(32, 64) self.dense_2 = chainer.links.Linear(64, 1) def forward(self, x): return { "pred": self.dense_2(chainer.functions.relu( self.dense_1(x))) } class TestChainerBackend( create_experiment_test_template_for_backend("CHAINER") ): def setUp(self) -> None: if check_for_chainer_backend(): from delira.training import ChainerExperiment import chainer config = DeliraConfig() config.fixed_params = { "model": {}, "training": { "losses": { "L1": chainer.functions.mean_absolute_error}, "optimizer_cls": chainer.optimizers.Adam, "optimizer_params": {}, "num_epochs": 2, "metrics": {"mae": mean_absolute_error}, "lr_sched_cls": None, "lr_sched_params": {}} } model_cls = DummyNetworkChainer experiment_cls = ChainerExperiment else: config = None model_cls = None experiment_cls = None len_train = 50 len_test = 50 self._test_cases = [ { "config": config, "network_cls": model_cls, "len_train": len_train, "len_test": len_test, "key_mapping": {"x": "data"} } ] self._experiment_cls = experiment_cls super().setUp() if __name__ == "__main__": unittest.main() ================================================ FILE: tests/training/backends/test_sklearn.py ================================================ import unittest import numpy as np from tests.utils import check_for_sklearn_backend from delira.utils import DeliraConfig from sklearn.metrics import mean_absolute_error from .utils import create_experiment_test_template_for_backend, DummyDataset class TestSklearnBackend( create_experiment_test_template_for_backend("SKLEARN") ): def setUp(self) -> None: if check_for_sklearn_backend(): from delira.training import SklearnExperiment from sklearn.tree import DecisionTreeClassifier from sklearn.neural_network import MLPClassifier config = DeliraConfig() config.fixed_params = { "model": {}, "training": { "losses": { "L1": mean_absolute_error}, "optimizer_cls": None, "optimizer_params": {}, "num_epochs": 2, "metrics": {"mae": mean_absolute_error}, "lr_sched_cls": None, "lr_sched_params": {}} } # run tests for estimator with and without partial_fit model_cls = [ DecisionTreeClassifier, MLPClassifier ] experiment_cls = SklearnExperiment else: config = None model_cls = [] experiment_cls = None len_train = 50 len_test = 50 self._test_cases = [ { "config": config, "network_cls": _cls, "len_train": len_train, "len_test": len_test, "key_mapping": {"X": "X"}, "metric_keys": {"L1": ("pred", "y"), "mae": ("pred", "y")} } for _cls in model_cls ] self._experiment_cls = experiment_cls super().setUp() @unittest.skipUnless(check_for_sklearn_backend(), "Test should only be executed if SKLEARN backend is " "installed and specified") def test_experiment_test(self): from delira.data_loading import DataManager # iterate over test cases for case in self._test_cases: with self.subTest(case=case): # pop arguments (to use remaining case as kwargs later) _ = case.pop("len_train") config = case.pop("config") metric_keys = case.pop("metric_keys") network_cls = case.pop("network_cls") len_test = case.pop("len_test") exp = self._experiment_cls(config, network_cls, **case) # create data dset_test = DummyDataset(len_test) dmgr_test = DataManager(dset_test, 16, 1, None) model = network_cls() # must fit on 2 samples to initialize coefficients model.fit(np.random.rand(2, 32), np.array([[0], [1]])) exp.test(model, dmgr_test, config.nested_get("metrics", {}), metric_keys) if __name__ == "__main__": unittest.main() ================================================ FILE: tests/training/backends/test_tf_eager.py ================================================ import unittest import gc from tests.utils import check_for_tf_eager_backend from delira.utils import DeliraConfig from sklearn.metrics import mean_absolute_error from .utils import create_experiment_test_template_for_backend if check_for_tf_eager_backend(): from delira.models import AbstractTfEagerNetwork import tensorflow as tf class DummyNetworkTfEager(AbstractTfEagerNetwork): def __init__(self): super().__init__() self.model = tf.keras.models.Sequential( layers=[ tf.keras.layers.Dense(64, input_shape=( 32,), bias_initializer='glorot_uniform'), tf.keras.layers.ReLU(), tf.keras.layers.Dense( 1, bias_initializer='glorot_uniform')] ) def call(self, x: tf.Tensor): return {"pred": self.model(x)} class TestTfEagerBackend( create_experiment_test_template_for_backend("TFEAGER") ): def setUp(self) -> None: if check_for_tf_eager_backend(): import tensorflow as tf tf.enable_eager_execution() from delira.training import TfEagerExperiment config = DeliraConfig() config.fixed_params = { "model": {}, "training": { "losses": { "L1": tf.losses.absolute_difference}, "optimizer_cls": tf.train.AdamOptimizer, "optimizer_params": {"learning_rate": 1e-3}, "num_epochs": 2, "metrics": {"mae": mean_absolute_error}, "lr_sched_cls": None, "lr_sched_params": {}} } model_cls = DummyNetworkTfEager experiment_cls = TfEagerExperiment else: config = None model_cls = None experiment_cls = None len_train = 100 len_test = 50 self._test_cases = [ { "config": config, "network_cls": model_cls, "len_train": len_train, "len_test": len_test, "key_mapping": {"x": "data"}, } ] self._experiment_cls = experiment_cls super().setUp() def tearDown(self): import sys try: del sys.modules["tf"] except KeyError: pass try: del tf except (UnboundLocalError, NameError): pass try: del sys.modules["tensorflow"] except KeyError: pass try: del tensorflow except (UnboundLocalError, NameError): pass gc.collect() if __name__ == "__main__": unittest.main() ================================================ FILE: tests/training/backends/test_tf_graph.py ================================================ import unittest import gc from tests.utils import check_for_tf_graph_backend from delira.utils import DeliraConfig from sklearn.metrics import mean_absolute_error from .utils import create_experiment_test_template_for_backend if check_for_tf_graph_backend(): from delira.models import AbstractTfGraphNetwork import tensorflow as tf class DummyNetworkTfGraph(AbstractTfGraphNetwork): def __init__(self): super().__init__() self.model = tf.keras.models.Sequential( layers=[ tf.keras.layers.Dense(64, input_shape=( 32,), bias_initializer='glorot_uniform'), tf.keras.layers.ReLU(), tf.keras.layers.Dense( 1, bias_initializer='glorot_uniform')] ) data = tf.placeholder(shape=[None, 32], dtype=tf.float32) labels = tf.placeholder_with_default( tf.zeros([tf.shape(data)[0], 1]), shape=[None, 1]) preds_train = self.model(data) preds_eval = self.model(data) self.inputs["data"] = data self.inputs["label"] = labels self.outputs_train["pred"] = preds_train self.outputs_eval["pred"] = preds_eval class TestTfGraphBackend( create_experiment_test_template_for_backend("TFGRAPH") ): def setUp(self) -> None: if check_for_tf_graph_backend(): import tensorflow as tf tf.disable_eager_execution() from delira.training import TfGraphExperiment config = DeliraConfig() config.fixed_params = { "model": {}, "training": { "losses": { "CE": tf.losses.softmax_cross_entropy}, "optimizer_cls": tf.train.AdamOptimizer, "optimizer_params": {"learning_rate": 1e-3}, "num_epochs": 2, "metrics": {"mae": mean_absolute_error}, "lr_sched_cls": None, "lr_sched_params": {}} } model_cls = DummyNetworkTfGraph experiment_cls = TfGraphExperiment else: config = None model_cls = None experiment_cls = None len_train = 100 len_test = 50 self._test_cases = [ { "config": config, "network_cls": model_cls, "len_train": len_train, "len_test": len_test, "key_mapping": {"data": "data"}, } ] self._experiment_cls = experiment_cls super().setUp() def tearDown(self): import sys try: del sys.modules["tf"] except KeyError: pass try: del tf except (UnboundLocalError, NameError): pass try: del sys.modules["tensorflow"] except KeyError: pass try: del tensorflow except (UnboundLocalError, NameError): pass gc.collect() if __name__ == "__main__": unittest.main() ================================================ FILE: tests/training/backends/test_torch.py ================================================ import unittest from tests.utils import check_for_torch_backend from delira.utils import DeliraConfig from sklearn.metrics import mean_absolute_error from .utils import create_experiment_test_template_for_backend if check_for_torch_backend(): from delira.models import AbstractPyTorchNetwork import torch class DummyNetworkTorch(AbstractPyTorchNetwork): def __init__(self): super().__init__() self.module = torch.nn.Sequential( torch.nn.Linear(32, 64), torch.nn.ReLU(), torch.nn.Linear(64, 1) ) def forward(self, x): return { "pred": self.module(x) } class TestTorchBackend( create_experiment_test_template_for_backend("TORCH") ): def setUp(self) -> None: if check_for_torch_backend(): import torch from delira.training import PyTorchExperiment config = DeliraConfig() config.fixed_params = { "model": {}, "training": { "losses": { "L1": torch.nn.BCEWithLogitsLoss()}, "optimizer_cls": torch.optim.Adam, "optimizer_params": {}, "num_epochs": 2, "metrics": {"mae": mean_absolute_error}, "lr_sched_cls": None, "lr_sched_params": {}} } model_cls = DummyNetworkTorch experiment_cls = PyTorchExperiment else: config = None model_cls = None experiment_cls = None len_train = 100 len_test = 50 self._test_cases = [ { "config": config, "network_cls": model_cls, "len_train": len_train, "len_test": len_test, "key_mapping": {"x": "data"}, } ] self._experiment_cls = experiment_cls super().setUp() if __name__ == "__main__": unittest.main() ================================================ FILE: tests/training/backends/test_torchscript.py ================================================ import unittest from tests.utils import check_for_torchscript_backend from delira.utils import DeliraConfig from sklearn.metrics import mean_absolute_error from .utils import create_experiment_test_template_for_backend if check_for_torchscript_backend(): from delira.models import AbstractTorchScriptNetwork import torch class DummyNetworkTorchScript(AbstractTorchScriptNetwork): __constants__ = ["module"] def __init__(self): super().__init__() self.module = torch.nn.Sequential( torch.nn.Linear(32, 64), torch.nn.ReLU(), torch.nn.Linear(64, 1) ) @torch.jit.script_method def forward(self, x): return { "pred": self.module(x) } class TestTorchScriptBackend( create_experiment_test_template_for_backend("TORCHSCRIPT") ): def setUp(self) -> None: if check_for_torchscript_backend(): import torch from delira.training import TorchScriptExperiment config = DeliraConfig() config.fixed_params = { "model": {}, "training": { "losses": { "L1": torch.nn.BCEWithLogitsLoss()}, "optimizer_cls": torch.optim.Adam, "optimizer_params": {}, "num_epochs": 2, "metrics": {"mae": mean_absolute_error}, "lr_sched_cls": None, "lr_sched_params": {}} } model_cls = DummyNetworkTorchScript experiment_cls = TorchScriptExperiment else: config = None model_cls = None experiment_cls = None len_train = 100 len_test = 50 self._test_cases = [ { "config": config, "network_cls": model_cls, "len_train": len_train, "len_test": len_test, "key_mapping": {"x": "data"}, } ] self._experiment_cls = experiment_cls super().setUp() if __name__ == "__main__": unittest.main() ================================================ FILE: tests/training/backends/utils.py ================================================ import numpy as np from delira.data_loading import AbstractDataset, DataManager from delira.training import BaseExperiment from tests.utils import check_for_chainer_backend, \ check_for_tf_eager_backend, check_for_tf_graph_backend, \ check_for_sklearn_backend, check_for_torch_backend, \ check_for_torchscript_backend import unittest import logging from delira.training.callbacks import AbstractCallback callback_logger = logging.getLogger("CallbackLogger") _SKIP_CONDITIONS = { "CHAINER": check_for_chainer_backend, "TFEAGER": check_for_tf_eager_backend, "TFGRAPH": check_for_tf_graph_backend, "TORCH": check_for_torch_backend, "TORCHSCRIPT": check_for_torchscript_backend, "SKLEARN": check_for_sklearn_backend } class DummyDataset(AbstractDataset): def __init__(self, length): super().__init__(None, None) self.length = length def __getitem__(self, index): return {"data": np.random.rand(32), "label": np.random.randint(0, 1, 1)} def __len__(self): return self.length def get_sample_from_index(self, index): return self.__getitem__(index) class LoggingCallback(): def at_epoch_begin(self, trainer, curr_epoch, **kwargs): callback_logger.info("AtEpochBegin_epoch{}".format(curr_epoch)) return {} def at_epoch_end(self, trainer, curr_epoch, **kwargs): callback_logger.info("AtEpochEnd_epoch{}".format(curr_epoch)) return {} def at_training_begin(self, trainer, **kwargs): callback_logger.info("AtTrainingBegin_fold{}".format(trainer.fold)) return {} def at_training_end(self, trainer, **kwargs): callback_logger.info("AtTrainingEnd_fold{}".format(trainer.fold)) return {} def at_iter_begin(self, trainer, iter_num, **kwargs): callback_logger.info("AtIterBegin_iter{}".format(iter_num)) return {} def at_iter_end(self, trainer, iter_num, **kwargs): callback_logger.info("AtIterEnd_iter{}".format(iter_num)) return {} def add_logging_callback(dict_like): callbacks = list(dict_like.pop("callbacks", [])) callbacks.append(LoggingCallback()) dict_like["callbacks"] = callbacks return dict_like def run_experiment(experiment_cls, config, network_cls, len_train, len_test, **kwargs): assert issubclass(experiment_cls, BaseExperiment) exp = experiment_cls(config, network_cls, **kwargs) dset_train = DummyDataset(len_train) dset_test = DummyDataset(len_test) dmgr_train = DataManager(dset_train, 16, 4, None) dmgr_test = DataManager(dset_test, 16, 1, None) return exp.run(dmgr_train, dmgr_test) def test_experiment(experiment_cls, config, network_cls, len_test, **kwargs): assert issubclass(experiment_cls, BaseExperiment) exp = experiment_cls(config, network_cls, **kwargs) dset_test = DummyDataset(len_test) dmgr_test = DataManager(dset_test, 16, 1, None) model = network_cls() return exp.test(model, dmgr_test, config.nested_get("metrics", {}), kwargs.get("metric_keys", None)) def kfold_experiment(experiment_cls, config, network_cls, len_data, shuffle=True, split_type="random", num_splits=2, val_split=None, **kwargs): assert issubclass(experiment_cls, BaseExperiment) metric_keys = kwargs.pop("metric_keys", None) exp = experiment_cls(config, network_cls, **kwargs) dset = DummyDataset(len_data) dmgr = DataManager(dset, 16, 1, None) return exp.kfold(data=dmgr, metrics=config.nested_get("metrics"), shuffle=shuffle, split_type=split_type, num_splits=num_splits, val_split=val_split, metric_keys=metric_keys) def create_experiment_test_template_for_backend(backend: str): backend_skip = unittest.skipUnless(_SKIP_CONDITIONS[backend](), "Test should be only executed if " "backend %s is installed and specified" % backend) class TestCase(unittest.TestCase): def setUp(self) -> None: # check if the proviced test case hast the following attributes set assert hasattr(self, "_experiment_cls") assert hasattr(self, "_test_cases") self.logging_msg_run = [ 'INFO:CallbackLogger:AtEpochBegin_epoch1', 'INFO:CallbackLogger:AtEpochEnd_epoch1', 'INFO:CallbackLogger:AtIterBegin_iter0', 'INFO:CallbackLogger:AtIterEnd_iter0', 'INFO:CallbackLogger:AtTrainingBegin_fold0', 'INFO:CallbackLogger:AtTrainingEnd_fold0', ] self.logging_msg_test = [ 'INFO:CallbackLogger:AtIterBegin_iter0', 'INFO:CallbackLogger:AtIterEnd_iter0', ] self.logging_msg_kfold = [ 'INFO:CallbackLogger:AtEpochBegin_epoch1', 'INFO:CallbackLogger:AtEpochEnd_epoch1', 'INFO:CallbackLogger:AtIterBegin_iter0', 'INFO:CallbackLogger:AtIterEnd_iter0', 'INFO:CallbackLogger:AtTrainingBegin_fold0', 'INFO:CallbackLogger:AtTrainingEnd_fold0', 'INFO:CallbackLogger:AtTrainingBegin_fold1', 'INFO:CallbackLogger:AtTrainingEnd_fold1', ] @backend_skip def test_experiment_run(self): # prototype to run an experiment once for each testcase for case in self._test_cases: with self.subTest(case=case): case = add_logging_callback(case) with self.assertLogs(callback_logger, "INFO") as cm: run_experiment(self._experiment_cls, **case) for msg in self.logging_msg_run: self.assertIn(msg, cm.output) @backend_skip def test_experiment_test(self): # prototype to test an experiment once with each testcase for case in self._test_cases: with self.subTest(case=case): _ = case.pop("len_train") case = add_logging_callback(case) with self.assertLogs(callback_logger, "INFO") as cm: test_experiment(self._experiment_cls, **case) for msg in self.logging_msg_test: self.assertIn(msg, cm.output) @backend_skip def test_experiment_kfold(self): # runs multiple kfolds with each testcase # ( 1 for each combination of split_type and val_split) for case in self._test_cases: with self.subTest(case=case): # combine test and train data to len_data len_data = case.pop("len_test") + case.pop("len_train") case["len_data"] = len_data case = add_logging_callback(case) for split_type in ["random", "stratified", "error"]: with self.subTest(split_type=split_type): if split_type == "error": # must raise ValueError with self.assertRaises(ValueError): kfold_experiment( self._experiment_cls, **case, split_type=split_type, num_splits=2) continue else: for val_split in [0.2, None]: with self.subTest(val_split=val_split): with self.assertLogs( callback_logger, "INFO") as cm: kfold_experiment( self._experiment_cls, **case, val_split=val_split, split_type=split_type, num_splits=2, ) for msg in self.logging_msg_kfold: self.assertIn(msg, cm.output) return TestCase ================================================ FILE: tests/training/test_losses_torch.py ================================================ import unittest from ..utils import check_for_torch_backend class FocalLossTestPyTorch(unittest.TestCase): @unittest.skipUnless(check_for_torch_backend(), reason="No torch backend installed") def test_focalloss(self): """ Test some predefines focal loss values """ from delira.training.losses import BCEFocalLossLogitPyTorch, \ BCEFocalLossPyTorch import torch.nn as nn import torch import torch.nn.functional as F # examples ####################################################################### # binary values p = torch.Tensor([[0, 0.2, 0.5, 1.0], [0, 0.2, 0.5, 1.0]]) t = torch.Tensor([[0, 0, 0, 0], [1, 1, 1, 1]]) p_l = torch.Tensor([[-2, -1, 0, 2], [-2, -1, 0, 1]]) ####################################################################### # params gamma = 2 alpha = 0.25 eps = 1e-8 ####################################################################### # compute targets # target for focal loss p_t = p * t + (1 - p) * (1 - t) alpha_t = torch.Tensor([alpha]).expand_as(t) * t + \ (1 - t) * (1 - torch.Tensor([alpha]).expand_as(t)) w = alpha_t * (1 - p_t).pow(torch.Tensor([gamma])) fc_value = F.binary_cross_entropy(p, t, w, reduction='none') # target for focal loss with logit p_tmp = torch.sigmoid(p_l) p_t = p_tmp * t + (1 - p_tmp) * (1 - t) alpha_t = torch.Tensor([alpha]).expand_as(t) * t + \ (1 - t) * (1 - torch.Tensor([alpha]).expand_as(t)) w = alpha_t * (1 - p_t).pow(torch.Tensor([gamma])) fc_value_logit = \ F.binary_cross_entropy_with_logits(p_l, t, w, reduction='none') ####################################################################### # test against BCE and CE =>focal loss with gamma=0, alpha=None # test against binary_cross_entropy bce = nn.BCELoss(reduction='none') focal = BCEFocalLossPyTorch(alpha=None, gamma=0, reduction='none') bce_loss = bce(p, t) focal_loss = focal(p, t) self.assertTrue((torch.abs(bce_loss - focal_loss) < eps).all()) # test against binary_cross_entropy with logit bce = nn.BCEWithLogitsLoss() focal = BCEFocalLossLogitPyTorch(alpha=None, gamma=0) bce_loss = bce(p_l, t) focal_loss = focal(p_l, t) self.assertTrue((torch.abs(bce_loss - focal_loss) < eps).all()) ####################################################################### # test focal loss with pre computed values # test focal loss binary (values manually pre computed) focal = BCEFocalLossPyTorch(gamma=gamma, alpha=alpha, reduction='none') focal_loss = focal(p, t) self.assertTrue((torch.abs(fc_value - focal_loss) < eps).all()) # test focal loss binary with logit (values manually pre computed) # Note that now p_l is used as prediction focal = BCEFocalLossLogitPyTorch( gamma=gamma, alpha=alpha, reduction='none') focal_loss = focal(p_l, t) self.assertTrue((torch.abs(fc_value_logit - focal_loss) < eps).all()) ####################################################################### # test if backward function works p.requires_grad = True focal = BCEFocalLossPyTorch(gamma=gamma, alpha=alpha) focal_loss = focal(p, t) try: focal_loss.backward() except BaseException: self.assertTrue(False, "Backward function failed for focal loss") p_l.requires_grad = True focal = BCEFocalLossLogitPyTorch(gamma=gamma, alpha=alpha) focal_loss = focal(p_l, t) try: focal_loss.backward() except BaseException: self.assertTrue( False, "Backward function failed for focal loss with logits") if __name__ == "__main__": unittest.main() ================================================ FILE: tests/training/test_metrics.py ================================================ import numpy as np from sklearn.metrics import accuracy_score import unittest from delira.training.metrics import SklearnClassificationMetric, \ SklearnAccuracyScore, AurocMetric from ..utils import check_for_no_backend class TestMetrics(unittest.TestCase): @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is specified") def test_sklearn_classification_metric(self): """ Test metric wrapper for sklearn metrics """ target = np.array([1, 1, 1, 1, 1]) pred = np.array([0, 1, 0, 1, 0]) dummy_fn = accuracy_score metric_wrapped = SklearnClassificationMetric(dummy_fn, pred_logits=False, gt_logits=False) wrapped_score = metric_wrapped(target, pred) self.assertLess(np.abs(wrapped_score - 0.4), 1e-8) metric_ac = SklearnAccuracyScore(gt_logits=False, pred_logits=False) score = metric_ac(target, pred) self.assertLess(np.abs(score - 0.4), 1e-8) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is specified") def test_auroc_metric(self): """ Test auroc metric """ pred = np.array([1, 1, 1, 1]) target = np.array([1, 0, 1, 0]) metric_auc = AurocMetric() score_auc = metric_auc(target, pred) self.assertEqual(score_auc, 0.5) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/utils/__init__.py ================================================ from delira import get_backends import os def check_for_environment_variable(variable: str, value: str): if variable not in os.environ or os.environ[variable] == value: return True return False def check_for_backend(backend_name, environment_variable): backend_installed = backend_name in get_backends() backend_specified = check_for_environment_variable("BACKEND", environment_variable) return backend_installed and backend_specified def check_for_torch_backend(): return check_for_backend("TORCH", "Torch") def check_for_torchscript_backend(): return check_for_backend("TORCH", "TorchScript") def check_for_tf_eager_backend(): return check_for_backend("TF", "TFEager") def check_for_tf_graph_backend(): return check_for_backend("TF", "TFGraph") def check_for_chainer_backend(): return check_for_backend("CHAINER", "Chainer") def check_for_sklearn_backend(): return check_for_backend("SKLEARN", "Sklearn") def check_for_no_backend(): # sklearn backend is always installed, so this check is mainly a check if # installation was successfull and checks for environment variable return check_for_backend("SKLEARN", "None") ================================================ FILE: tests/utils/dict_reductions.py ================================================ import unittest import numpy as np from delira.utils.dict_reductions import possible_reductions, \ flatten_dict, unflatten_dict, reduce_dict, get_reduction class TestDictReductions(unittest.TestCase): def setUp(self) -> None: self._reduce_sequence = [2, 3, 4, 5, 6] self._test_dict = { "a": self._reduce_sequence, "b": { "c": self._reduce_sequence }, "d": { "e": { "f": self._reduce_sequence } } } self._flattened_test_dict = { "a": self._reduce_sequence, "b.c": self._reduce_sequence, "d.e.f": self._reduce_sequence } self._reduction_results = {"max": max(self._reduce_sequence), "min": min(self._reduce_sequence), "mean": np.mean(self._reduce_sequence), "median": np.median(self._reduce_sequence), "first": self._reduce_sequence[0], "last": self._reduce_sequence[-1]} self._reduce_dicts = [] for i in self._reduce_sequence: self._reduce_dicts.append( { "a": i, "b": { "c": i }, "d": { "e": { "f": i } } } ) def test_dict_flatten(self): result_dict = flatten_dict(self._test_dict, parent_key='', sep=".") self.assertDictEqual(result_dict, self._flattened_test_dict) def test_dict_unflatten(self): result_dict = unflatten_dict(self._flattened_test_dict, sep=".") self.assertDictEqual(result_dict, self._test_dict) def test_dict_flatten_unflatten(self): result_dict = unflatten_dict(flatten_dict(self._test_dict, parent_key='', sep="."), sep=".") self.assertDictEqual(result_dict, self._test_dict) def test_reduction_fuctions(self): for key in possible_reductions(): with self.subTest(reduce_type=key): result = get_reduction(key)(self._reduce_sequence) # convert array to scalar if necessary if isinstance(result, np.ndarray): result = result.item() self.assertEquals(result, self._reduction_results[key]) def test_reduce_dict(self): for key in possible_reductions(): with self.subTest(reduce_type=key): result_dict = reduce_dict(self._reduce_dicts, get_reduction(key)) target_dict = { "a": self._reduction_results[key], "b": { "c": self._reduction_results[key] }, "d": { "e": { "f": self._reduction_results[key] } } } self.assertDictEqual(result_dict, target_dict) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/utils/test_codecs.py ================================================ import unittest import numpy as np from functools import partial from delira.utils.codecs import Encoder, Decoder from . import check_for_no_backend class CodecsTest(unittest.TestCase): @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_encoder(self): test_dict = {} test_dict['number'] = 1 test_dict['string'] = "test_string" test_dict['list'] = [0, 1, 2, "skjd"] test_dict['dict'] = {"key0": 0, "key1": 1, "key2": 2} test_dict['tuple'] = (1, 2, 3) test_dict['none'] = None test_dict['nparray'] = np.array([0, 1, 2]) test_dict['function'] = partial test_dict['class'] = np.ndarray encoded_test_dict = Encoder().encode(test_dict) self.assertTrue(encoded_test_dict['number'] == 1) self.assertTrue(encoded_test_dict['string'] == "test_string") self.assertListEqual(encoded_test_dict['list'], [0, 1, 2, "skjd"]) self.assertDictEqual(encoded_test_dict['dict'], { "key0": 0, "key1": 1, "key2": 2}) self.assertDictEqual(encoded_test_dict['tuple'], { "__convert__": { "repr": [1, 2, 3], "type": { "__type__": {"module": "builtins", "name": "tuple"}} }}) self.assertIsNone(encoded_test_dict["none"]) self.assertDictEqual(encoded_test_dict["nparray"], {"__array__": [0, 1, 2]}) self.assertDictEqual(encoded_test_dict["function"], { "__type__": {"module": "functools", "name": "partial"}}) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_decoder(self): test_dict = {} test_dict['number'] = 1 test_dict['string'] = "test_string" test_dict['list'] = [0, 1, 2, "skjd"] test_dict['dict'] = {"key0": 0, "key1": 1, "key2": 2} test_dict['tuple'] = {"__convert__": { "repr": [1, 2, 3], "type": {"__type__": {"module": "builtins", "name": "tuple"}} }} test_dict['none'] = None test_dict['nparray'] = {"__array__": [0, 1, 2]} test_dict['function'] = {"__function__": { "module": "numpy", "name": "amin"}} test_dict['class'] = {"__type__": { "module": "numpy", "name": "ndarray"}} test_dict["classargs"] = {"__classargs__": {"module": "numpy", "name": "ndarray", "args": [[1, 2, 3]] } } test_dict["funcargs"] = {"__functionargs__": {"module": "numpy", "name": "min", "kwargs": {"axis": (1, 2)}} } decoded_dict = Decoder().decode(test_dict) self.assertTrue(decoded_dict['number'] == 1) self.assertTrue(decoded_dict['string'] == "test_string") self.assertListEqual(decoded_dict['list'], [0, 1, 2, "skjd"]) self.assertDictEqual(decoded_dict['dict'], { "key0": 0, "key1": 1, "key2": 2}) self.assertTupleEqual(decoded_dict['tuple'], (1, 2, 3)) self.assertIsNone(decoded_dict["none"]) self.assertTrue((decoded_dict["nparray"] == np.array([0, 1, 2])).all()) self.assertTrue( decoded_dict["function"].__module__ == np.min.__module__) self.assertTrue( decoded_dict["function"].__name__ == np.min.__name__) self.assertTrue( decoded_dict["class"].__module__ == np.ndarray.__module__) self.assertTrue( decoded_dict["class"].__name__ == np.ndarray.__name__) self.assertTrue(test_dict["classargs"].shape == (1, 2, 3)) self.assertTrue(test_dict["funcargs"].args[0] == []) self.assertTrue(test_dict["funcargs"].args[1]["axis"] == (1, 2)) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/utils/test_config.py ================================================ import unittest import os import sys import copy import argparse from unittest.mock import patch from delira._version import get_versions from delira.utils.config import Config, LookupConfig, DeliraConfig from delira.logging import Logger, TensorboardBackend, make_logger, \ register_logger import warnings from . import check_for_no_backend class ConfigTest(unittest.TestCase): def setUp(self): self.config_cls = Config self.example_dict = { "shallowStr": "a", "shallowNum": 1, "deep": {"deepStr": "b", "deepNum": 2}, "nestedListOrig": [{"dictList": [1, 2, 3]}], } self.update_dict = { "deep": {"deepStr": "c"}, "shallowNew": 3, "deepNew": {"newNum": 4}, "nestedList": [{"dictList": [1, 2, 3]}], "nestedList2": [{"dictList": [1, 2, 3]}], } self._logger = self._setup_logger() register_logger(self._logger, __file__) def _setup_logger(self): return make_logger(TensorboardBackend( {"logdir": os.path.join(".", "runs", self._testMethodName)} )) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_config_access(self): # initialization from dict cf = self.config_cls(self.example_dict) self.assertEqual(cf["shallowStr"], self.example_dict["shallowStr"]) self.assertEqual(cf["shallowNum"], self.example_dict["shallowNum"]) # check if parameters were written correctly self.assertEqual(cf["deep"]["deepStr"], self.example_dict["deep"]["deepStr"]) self.assertEqual(cf["deep"]["deepNum"], self.example_dict["deep"]["deepNum"]) # check deep acces with operators self.assertEqual(cf["deep.deepStr"], self.example_dict["deep"]["deepStr"]) self.assertEqual(cf.deep.deepNum, self.example_dict["deep"]["deepNum"]) # empty initialization cf = self.config_cls() # set shallow attributes cf.shallowString = "string" cf.shallowNum = 1 cf.deep = {} cf.deep.string = "deepString" cf.deep.num = 2 cf["shallowString2"] = "string2" cf["shallowNum2"] = 1 cf["deep.string2"] = "deepString2" cf["deep.num2"] = 2 # check if parameters were written correctly self.assertEqual(cf["shallowString"], "string") self.assertEqual(cf["shallowNum"], 1) self.assertEqual(cf["deep.string"], "deepString") self.assertEqual(cf["deep.num"], 2) self.assertEqual(cf["shallowString2"], "string2") self.assertEqual(cf["shallowNum2"], 1) self.assertEqual(cf["deep.string2"], "deepString2") self.assertEqual(cf["deep.num2"], 2) # check contains operator self.assertTrue("shallowString" in cf) self.assertTrue("shallowString2" in cf) self.assertTrue("deep.string" in cf) self.assertTrue("deep.string2" in cf) warning_msg = ("The key 5 is not a string, but a . " "This may lead to unwanted behavior!") with self.assertWarns(RuntimeWarning, msg=warning_msg): cf[5] = 10 @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_config_access_with_non_existing_keys(self): cf = self.config_cls(self.example_dict) with self.assertRaises(KeyError): cf["unknown_key"] with self.assertRaises(KeyError): cf["shallowStr.unknown_key"] @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_update(self): cf = self.config_cls.create_from_dict(self.example_dict) with self.assertRaises(ValueError): cf.update(self.update_dict) # update with overwrite cf.update(self.update_dict, overwrite=True) self.assertEqual(cf["deep.deepStr"], self.update_dict["deep"]["deepStr"]) # add new values self.assertEqual(cf["shallowNew"], self.update_dict["shallowNew"]) self.assertEqual(cf["deepNew.newNum"], self.update_dict["deepNew"]["newNum"]) # check for shallow copy cf["nestedList"][0]["dictList"][0] = 10 self.assertEqual(self.update_dict["nestedList"][0]["dictList"][0], cf["nestedList"][0]["dictList"][0]) # check for deepcopy cf.update(self.update_dict, overwrite=True, deepcopy=True) cf["nestedList2"][0]["dictList"][0] = 10 self.assertNotEqual(self.update_dict["nestedList2"][0]["dictList"][0], cf["nestedList2"][0]["dictList"][0]) # check for no error when only updating nested keys cf = self.config_cls.create_from_dict(self.example_dict) update_dict = copy.deepcopy(self.update_dict) update_dict["deep"].pop("deepStr") update_dict["deep"]["deepStr2"] = "deepStr2" cf.update(update_dict) self.assertEqual(cf["deep.deepStr2"], update_dict["deep"]["deepStr2"]) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_dump_and_load(self): cf = self.config_cls.create_from_dict(self.example_dict) path = os.path.join(".", "test_config.yaml") # check dump cf.dump(path) # check load cf_loaded = self.config_cls() cf_loaded.load(path) self.assertDictEqual(cf, cf_loaded) cf_loaded_file = self.config_cls.create_from_file(path) self.assertDictEqual(cf, cf_loaded_file) # check dump cf_string = cf.dumps() # check load cf_loaded = self.config_cls() cf_loaded.loads(cf_string) self.assertDictEqual(cf, cf_loaded) cf_loaded_str = self.config_cls.create_from_str(cf_string) self.assertDictEqual(cf, cf_loaded_str) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_copy(self): cf = self.config_cls.create_from_dict(self.example_dict) # check for shallow copy cf_shallow = copy.copy(cf) cf_shallow["nestedListOrig"][0]["dictList"][0] = 10 self.assertEqual(cf["nestedListOrig"][0]["dictList"][0], cf_shallow["nestedListOrig"][0]["dictList"][0]) # check for deepcopy cf_deep = copy.deepcopy(cf) cf_deep["nestedListOrig"][0]["dictList"][0] = 20 self.assertNotEqual(cf["nestedListOrig"][0]["dictList"][0], cf_deep["nestedListOrig"][0]["dictList"][0]) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_create_from_argparse(self): parser = argparse.ArgumentParser() parser.add_argument('-p1') parser.add_argument('--param2') cf1 = self.config_cls.create_from_argparse( parser, args=['-p1', 'parameter1', '--param2', 'parameter2']) self.assertEqual(cf1['p1'], 'parameter1') self.assertEqual(cf1['param2'], 'parameter2') args = parser.parse_args( ['-p1', 'parameter1', '--param2', 'parameter2']) self.assertEqual(cf1['p1'], 'parameter1') self.assertEqual(cf1['param2'], 'parameter2') @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_internal_type(self): cf = self.config_cls.create_from_dict(self.example_dict) self.assertTrue(isinstance(cf["deep"], self.config_cls)) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_create_argparser(self): cf = self.config_cls.create_from_dict(self.example_dict) testargs = [ '--shallowNum', '10', '--deep.deepStr', 'check', '--testlist', 'ele1', 'ele2', '--setflag'] parser = cf.create_argparser() known, unknown = parser.parse_known_args(testargs) self.assertEqual(vars(known)['shallowNum'], 10) self.assertEqual(vars(known)['deep.deepStr'], 'check') self.assertEqual(unknown, ['--testlist', 'ele1', 'ele2', '--setflag']) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_update_from_argparse(self): cf = self.config_cls.create_from_dict(self.example_dict) testargs = ['--shallowNum', '10', '--deep.deepStr', 'check', '--testlist', 'ele1', 'ele2', '--setflag'] # placeholder pyfile because argparser omits first argument from sys # argv with patch.object(sys, 'argv', ['pyfile.py'] + testargs): cf.update_from_argparse(add_unknown_items=True) self.assertEqual(cf['shallowNum'], int(testargs[1])) self.assertEqual(cf['deep']['deepStr'], testargs[3]) self.assertEqual(cf['testlist'], testargs[5:7]) self.assertEqual(cf['setflag'], True) with warnings.catch_warnings(record=True) as w: with patch.object(sys, 'argv', ['pyfile.py', '--unknown', 'arg']): cf.update_from_argparse(add_unknown_items=False) self.assertEqual(len(w), 1) class LookupConfigTest(ConfigTest): def setUp(self): super().setUp() self.config_cls = LookupConfig @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_nested_lookpup(self): cf = self.config_cls.create_from_dict(self.example_dict) self.assertEqual(cf["deep.deepStr"], cf.nested_get("deep.deepStr")) self.assertEqual(cf["deep.deepNum"], cf.nested_get("deepNum")) with self.assertRaises(KeyError): cf.nested_get("nonExistingKey") cf["deepStr"] = "duplicate" with self.assertRaises(KeyError): cf.nested_get("deepStr") self.assertIsNone(cf.nested_get("nonExistingKey", None)) self.assertIsNone(cf.nested_get("nonExistingKey", default=None)) cf["nested_duplicate.deep"] = "duplicate" with self.assertRaises(KeyError): cf.nested_get("deep") multiple_val = cf.nested_get("deep", allow_multiple=True) expected_result = [{"deepStr": "b", "deepNum": 2}, "duplicate"] for val in multiple_val: self.assertIn(val, expected_result) expected_result.pop(expected_result.index(val)) self.assertEquals(len(expected_result), 0) class DeliraConfigTest(LookupConfigTest): def setUp(self): super().setUp() self.config_cls = DeliraConfig @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_property_params(self): for mode in ["fixed", "variable"]: cf = self.config_cls.create_from_dict({}) setattr(cf, "{}_params".format(mode), {"model": {"num_classes": 3}, "training": {"epochs": 2}}) # manual checking of values self.assertEqual(cf["{}_model.num_classes".format(mode)], 3) self.assertEqual(cf["{}_training.epochs".format(mode)], 2) # check getter params = getattr(cf, "{}_params".format(mode)) self.assertEqual(params["model.num_classes"], 3) self.assertEqual(params["training.epochs"], 2) for mode in ["training", "model"]: cf = self.config_cls.create_from_dict(self.example_dict) setattr(cf, "{}_params".format(mode), {"fixed": {"num_classes": 3}, "variable": {"epochs": 2}}) # manual checking of values self.assertEqual(cf["fixed_{}.num_classes".format(mode)], 3) self.assertEqual(cf["variable_{}.epochs".format(mode)], 2) # check getter params = getattr(cf, "{}_params".format(mode)) self.assertEqual(params["fixed.num_classes"], 3) self.assertEqual(params["variable.epochs"], 2) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_logging_as_string(self): cf = self.config_cls() cf.update({"augment": True}) cf.update({"fixed_model": "fm", "fixed_training": "ft", "variable_model": "vm", "variable_training": "vt"}, overwrite=True) cf_str = cf.log_as_string() cf_str_full = cf.log_as_string(full_config=True) self.assertEqual(cf_str, ("__convert__:\n" " repr:\n" " _timestamp: {}\n" " fixed_model: fm\n" " fixed_training: ft\n" " variable_model: vm\n" " variable_training: vt\n" " type:\n" " __type__:\n" " module: delira.utils.config\n" " name: LookupConfig\n".format( cf["_timestamp"]))) self.assertEqual(cf_str_full, ("__convert__:\n" " repr:\n" " _timestamp: {}\n" " _version: {}\n" " augment: true\n" " fixed_model: fm\n" " fixed_training: ft\n" " variable_model: vm\n" " variable_training: vt\n" " type:\n" " __type__:\n" " module: delira.utils.config\n" " name: DeliraConfig\n".format( cf["_timestamp"], cf["_version"]))) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed if no backend is specified") def test_internal_type(self): cf = self.config_cls.create_from_dict(self.example_dict) self.assertTrue(isinstance(cf["deep"], LookupConfig)) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/utils/test_messenger.py ================================================ from delira.training import BaseExperiment, BaseNetworkTrainer, Predictor from delira.utils import DeliraConfig from delira.models import AbstractNetwork from delira.data_loading import DataManager from delira.training.utils import convert_to_numpy_identity from delira.utils.messenger import BaseMessenger, SlackMessenger from ..training.backends.utils import DummyDataset from . import check_for_no_backend import unittest import logging import copy logger = logging.getLogger("UnitTestMessenger") class DummyNetwork(AbstractNetwork): """ Emulate Network """ def __init__(self, **kwargs): super().__init__() def __call__(self, *args, **kwargs): return {} @staticmethod def closure(model, data_dict: dict, optimizers: dict, losses=None, metrics=None, fold=0, **kwargs): return {}, {}, {} @staticmethod def prepare_batch(batch: dict, input_device, output_device): return {} class DummyTrainer(BaseNetworkTrainer): """ Emulate Trainer states """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.module = DummyNetwork() callbacks = kwargs.pop("callbacks", []) self._setup(network=self.module, lr_scheduler_cls=None, lr_scheduler_params={}, gpu_ids=[], key_mapping={}, convert_batch_to_npy_fn=convert_to_numpy_identity, prepare_batch_fn=self.module.prepare_batch, callbacks=callbacks) def train(self, *args, num_epochs=2, **kwargs): self._at_training_begin() for epoch in range(self.start_epoch, num_epochs + 1): self._at_epoch_begin(None, epoch, num_epochs) is_best = True if epoch % 2 == 1 else False self._at_epoch_end({}, None, epoch, is_best) self._at_training_end() return DummyNetwork() def test(self, *args, **kwargs): return [{}], [{}] def save_state(self, file_name, *args, **kwargs): pass class DummyPredictor(Predictor): """ Emulate predictor """ def predict(self, *args, **kwargs): return {} def predict_data_mgr(self, *args, **kwargs): yield {}, {} return class DummyExperiment(BaseExperiment): def __init__(self): dummy_config = DeliraConfig() dummy_config.fixed_params = { "model": {}, "training": { "losses": {}, "optimizer_cls": None, "optimizer_params": {}, "num_epochs": 2, "lr_sched_cls": None, "lr_sched_params": {}} } super().__init__(dummy_config, DummyNetwork, key_mapping={}, name="TestExperiment", trainer_cls=DummyTrainer, predictor_cls=DummyPredictor) def run(self, *args, raise_error=False, **kwargs): if raise_error: raise RuntimeError() else: return super().run(*args, **kwargs) def resume(self, *args, raise_error=False, **kwargs): if raise_error: raise RuntimeError() else: return super().resume(*args, **kwargs) def test(self, *args, raise_error=False, **kwargs): if raise_error: raise RuntimeError() else: return super().test(*args, **kwargs) def kfold(self, *args, raise_error=False, **kwargs): if raise_error: raise RuntimeError() else: return super().kfold(*args, **kwargs) class LoggingBaseMessenger(BaseMessenger): def __init__( self, experiment, notify_epochs=None, **kwargs): """ Test messenger for BaseMessenger """ super().__init__(experiment, notify_epochs=notify_epochs, **kwargs) def emit_message(self, msg): logger.info(msg) class TestBaseMessenger(unittest.TestCase): def setUp(self) -> None: self.msg_run_successful = [ "INFO:UnitTestMessenger:TestExperiment : Training started.", "INFO:UnitTestMessenger:Epoch 1 trained.", "INFO:UnitTestMessenger:Epoch 2 trained.", "INFO:UnitTestMessenger:TestExperiment : Training completed.", ] self.msg_run_failed = [ "INFO:UnitTestMessenger:TestExperiment : Training started.", "INFO:UnitTestMessenger:TestExperiment : Training failed. \n", ] # self.msg_resume_successful = [] # self.msg_resume_failed = [] self.msg_test_successful = [ "INFO:UnitTestMessenger:TestExperiment : Test started.", "INFO:UnitTestMessenger:TestExperiment : Test completed.", ] self.msg_test_failed = [ "INFO:UnitTestMessenger:TestExperiment : Test started.", "INFO:UnitTestMessenger:TestExperiment : Test failed. \n", ] self.msg_kfold_successful = [ "INFO:UnitTestMessenger:TestExperiment : Kfold started.", "INFO:UnitTestMessenger:Fold 0 started.", "INFO:UnitTestMessenger:Epoch 1 trained.", "INFO:UnitTestMessenger:Epoch 2 trained.", "INFO:UnitTestMessenger:Fold 0 completed.", "INFO:UnitTestMessenger:Fold 1 started.", "INFO:UnitTestMessenger:Epoch 1 trained.", "INFO:UnitTestMessenger:Epoch 2 trained.", "INFO:UnitTestMessenger:Fold 1 completed.", "INFO:UnitTestMessenger:TestExperiment : Kfold completed.", ] self.msg_kfold_failed = [ "INFO:UnitTestMessenger:TestExperiment : Kfold started.", "INFO:UnitTestMessenger:TestExperiment : Kfold failed. \n", ] self.msg_create_experiment = [] self.messenger_cls = LoggingBaseMessenger self.messenger_kwargs = {"notify_epochs": 1} self.run_kwargs = {"gpu_ids": [], "logging_type": "tensorboardX", "logging_kwargs": {}, "fold": 3} def create_experiment(self, expected_msg=None): with self.assertLogs(logger, level='INFO') as cm: dummy_exp = DummyExperiment() dummy_exp = self.messenger_cls(dummy_exp, **self.messenger_kwargs) if expected_msg is None or not expected_msg: logger.info("NoExpectedMessage") if expected_msg is None or not expected_msg: self.assertEqual(cm.output, ["INFO:UnitTestMessenger:NoExpectedMessage"]) else: self.assertEqual(cm.output, expected_msg) def run_experiment(self, raise_error=False, expected_msg=None): dummy_exp = DummyExperiment() dummy_exp = self.messenger_cls(dummy_exp, **self.messenger_kwargs) dset_train = DummyDataset(10) dset_test = DummyDataset(10) dmgr_train = DataManager(dset_train, 2, 1, None) dmgr_test = DataManager(dset_test, 2, 1, None) with self.assertLogs(logger, level='INFO') as cm: if raise_error: with self.assertRaises(RuntimeError): dummy_exp.run(dmgr_train, dmgr_test, raise_error=True, **self.run_kwargs) else: dummy_exp.run(dmgr_train, dmgr_test, raise_error=False, **self.run_kwargs,) if expected_msg is None or not expected_msg: logger.info("NoExpectedMessage") if expected_msg is None or not expected_msg: self.assertEqual(cm.output, ["INFO:UnitTestMessenger:NoExpectedMessage"]) else: self.assertEqual(cm.output, expected_msg) def t_experiment(self, raise_error=False, expected_msg=None): dummy_exp = DummyExperiment() dummy_exp = self.messenger_cls(dummy_exp, **self.messenger_kwargs) dset_test = DummyDataset(10) dmgr_test = DataManager(dset_test, 2, 1, None) model = DummyNetwork() with self.assertLogs(logger, level='INFO') as cm: if raise_error: with self.assertRaises(RuntimeError): dummy_exp.test(model, dmgr_test, {}, raise_error=True) else: dummy_exp.test(model, dmgr_test, {}, raise_error=False) if expected_msg is None or not expected_msg: logger.info("NoExpectedMessage") if expected_msg is None or not expected_msg: self.assertEqual(cm.output, ["INFO:UnitTestMessenger:NoExpectedMessage"]) else: self.assertEqual(cm.output, expected_msg) def kfold_experiment(self, raise_error=False, expected_msg=None): kfold_kwargs = copy.deepcopy(self.run_kwargs) kfold_kwargs.pop("fold") dummy_exp = DummyExperiment() dummy_exp = self.messenger_cls(dummy_exp, **self.messenger_kwargs) dset = DummyDataset(10) dmgr = DataManager(dset, 2, 1, None) with self.assertLogs(logger, level='INFO') as cm: if raise_error: with self.assertRaises(RuntimeError): dummy_exp.kfold(data=dmgr, metrics={}, num_splits=2, raise_error=True, **kfold_kwargs) else: dummy_exp.kfold(data=dmgr, metrics={}, num_splits=2, raise_error=False, **kfold_kwargs) if expected_msg is None: logger.info("NoExpectedMessage") if expected_msg is None: self.assertEqual(cm.output, ["INFO:UnitTestMessenger:NoExpectedMessage"]) else: self.assertEqual(cm.output, expected_msg) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is installed") def test_create_experiment(self): self.create_experiment(self.msg_create_experiment) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is installed") def test_run_successful(self): self.run_experiment(raise_error=False, expected_msg=self.msg_run_successful) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is installed") def test_run_failed(self): self.run_experiment(raise_error=True, expected_msg=self.msg_run_failed) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is installed") def test_test_successful(self): self.t_experiment(raise_error=False, expected_msg=self.msg_test_successful) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is installed") def test_test_failed(self): self.t_experiment(raise_error=True, expected_msg=self.msg_test_failed) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is installed") def test_kfold_successful(self): self.kfold_experiment(raise_error=False, expected_msg=self.msg_kfold_successful) @unittest.skipUnless( check_for_no_backend(), "Test should only be executed " "if no backend is installed") def test_kfold_failed(self): self.kfold_experiment(raise_error=True, expected_msg=self.msg_kfold_failed) class LoggingSlackMessenger(SlackMessenger): def emit_message(self, msg): logger.info(msg) return {} class TestSlackMessenger(TestBaseMessenger): def setUp(self) -> None: super().setUp() self.msg_create_experiment = [ "INFO:UnitTestMessenger:Created new experiment: TestExperiment", ] self.messenger_cls = LoggingSlackMessenger self.messenger_kwargs = {"notify_epochs": 1, "token": "dummyToken", "channel": "dummyChannel"} if __name__ == '__main__': unittest.main() ================================================ FILE: versioneer.py ================================================ # Version: 0.18 """The Versioneer - like a rocketeer, but for versions. The Versioneer ============== * like a rocketeer, but for versions! * https://github.com/warner/python-versioneer * Brian Warner * License: Public Domain * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy * [![Latest Version] (https://pypip.in/version/versioneer/badge.svg?style=flat) ](https://pypi.python.org/pypi/versioneer/) * [![Build Status] (https://travis-ci.org/warner/python-versioneer.png?branch=master) ](https://travis-ci.org/warner/python-versioneer) This is a tool for managing a recorded version number in distutils-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control system, and maybe making new tarballs. ## Quick Install * `pip install versioneer` to somewhere to your $PATH * add a `[versioneer]` section to your setup.cfg (see below) * run `versioneer install` in your source tree, commit the results ## Version Identifiers Source trees come from a variety of places: * a version-control system checkout (mostly used by developers) * a nightly tarball, produced by build automation * a snapshot tarball, produced by a web-based VCS browser, like github's "tarball from tag" feature * a release tarball, produced by "setup.py sdist", distributed through PyPI Within each source tree, the version identifier (either a string or a number, this tool is format-agnostic) can come from a variety of places: * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows about recent "tags" and an absolute revision-id * the name of the directory into which the tarball was unpacked * an expanded VCS keyword ($Id$, etc) * a `_version.py` created by some earlier build step For released software, the version identifier is closely related to a VCS tag. Some projects use tag names that include more than just the version string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool needs to strip the tag prefix to extract the version identifier. For unreleased software (between tags), the version identifier should provide enough information to help developers recreate the same tree, while also giving them an idea of roughly how old the tree is (after version 1.2, before version 1.3). Many VCS systems can report a description that captures this, for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has uncommitted changes. The version identifier is used for multiple purposes: * to allow the module to self-identify its version: `myproject.__version__` * to choose a name and prefix for a 'setup.py sdist' tarball ## Theory of Operation Versioneer works by adding a special `_version.py` file into your source tree, where your `__init__.py` can import it. This `_version.py` knows how to dynamically ask the VCS tool for version information at import time. `_version.py` also contains `$Revision$` markers, and the installation process marks `_version.py` to have this marker rewritten with a tag name during the `git archive` command. As a result, generated tarballs will contain enough information to get the proper version. To allow `setup.py` to compute a version too, a `versioneer.py` is added to the top level of your source tree, next to `setup.py` and the `setup.cfg` that configures it. This overrides several distutils/setuptools commands to compute the version when invoked, and changes `setup.py build` and `setup.py sdist` to replace `_version.py` with a small static file that contains just the generated version data. ## Installation See [INSTALL.md](./INSTALL.md) for detailed installation instructions. ## Version-String Flavors Code which uses Versioneer can learn about its version string at runtime by importing `_version` from your main `__init__.py` file and running the `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can import the top-level `versioneer.py` and run `get_versions()`. Both functions return a dictionary with different flavors of version information: * `['version']`: A condensed version string, rendered using the selected style. This is the most commonly used value for the project's version string. The default "pep440" style yields strings like `0.11`, `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section below for alternative styles. * `['full-revisionid']`: detailed revision identifier. For Git, this is the full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the commit date in ISO 8601 format. This will be None if the date is not available. * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that this is only accurate if run in a VCS checkout, otherwise it is likely to be False or None * `['error']`: if the version string could not be computed, this will be set to a string describing the problem, otherwise it will be None. It may be useful to throw an exception in setup.py if this is set, to avoid e.g. creating tarballs with a version string of "unknown". Some variants are more useful than others. Including `full-revisionid` in a bug report should allow developers to reconstruct the exact code being tested (or indicate the presence of local changes that should be shared with the developers). `version` is suitable for display in an "about" box or a CLI `--version` output: it can be easily compared against release notes and lists of bugs fixed in various releases. The installer adds the following text to your `__init__.py` to place a basic version in `YOURPROJECT.__version__`: from ._version import get_versions __version__ = get_versions()['version'] del get_versions ## Styles The setup.cfg `style=` configuration controls how the VCS information is rendered into a version string. The default style, "pep440", produces a PEP440-compliant string, equal to the un-prefixed tag name for actual releases, and containing an additional "local version" section with more detail for in-between builds. For Git, this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" tag. For released software (exactly equal to a known tag), the identifier will only contain the stripped tag, e.g. "0.11". Other styles are available. See [details.md](details.md) in the Versioneer source tree for descriptions. ## Debugging Versioneer tries to avoid fatal errors: if something goes wrong, it will tend to return a version of "0+unknown". To investigate the problem, run `setup.py version`, which will run the version-lookup code in a verbose mode, and will display the full contents of `get_versions()` (including the `error` string, which may help identify what went wrong). ## Known Limitations Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github [issues page](https://github.com/warner/python-versioneer/issues). ### Subprojects Versioneer has limited support for source trees in which `setup.py` is not in the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are two common reasons why `setup.py` might not be in the root: * Source trees which contain multiple subprojects, such as [Buildbot](https://github.com/buildbot/buildbot), which contains both "master" and "slave" subprojects, each with their own `setup.py`, `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also provide bindings to Python (and perhaps other langauges) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs and implementation details which frequently cause `pip install .` from a subproject directory to fail to find a correct version string (so it usually defaults to `0+unknown`). `pip install --editable .` should work correctly. `setup.py install` might work too. Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking this issue. The discussion in [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve pip to let Versioneer work correctly. Versioneer-0.16 and earlier only looked for a `.git` directory next to the `setup.cfg`, so subprojects were completely unsupported with those releases. ### Editable installs with setuptools <= 18.5 `setup.py develop` and `pip install --editable .` allow you to install a project into a virtualenv once, then continue editing the source code (and test) without re-installing after every change. "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a convenient way to specify executable scripts that should be installed along with the python package. These both work as expected when using modern setuptools. When using setuptools-18.5 or earlier, however, certain operations will cause `pkg_resources.DistributionNotFound` errors when running the entrypoint script, which must be resolved by re-installing the package. This happens when the install happens with one version, then the egg_info data is regenerated while a different version is checked out. Many setup.py commands cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. ### Unicode version strings While Versioneer works (and is continually tested) with both Python 2 and Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. Newer releases probably generate unicode version strings on py2. It's not clear that this is wrong, but it may be surprising for applications when then write these strings to a network connection or include them in bytes-oriented APIs like cryptographic checksums. [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates this question. ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) * edit `setup.cfg`, if necessary, to include any new configuration settings indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. * re-run `versioneer install` in your source tree, to replace `SRC/_version.py` * commit any changed files ## Future Directions This tool is designed to make it easily extended to other version-control systems: all VCS-specific components are in separate directories like src/git/ . The top-level `versioneer.py` script is assembled from these components by running make-versioneer.py . In the future, make-versioneer.py will take a VCS name as an argument, and will construct a version of `versioneer.py` that is specific to the given VCS. It might also take the configuration arguments that are currently provided manually during installation by editing setup.py . Alternatively, it might go the other direction and include code from all supported VCS systems, reducing the number of intermediate scripts. ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. Specifically, both are released under the Creative Commons "Public Domain Dedication" license (CC0-1.0), as described in https://creativecommons.org/publicdomain/zero/1.0/ . """ from __future__ import print_function try: import configparser except ImportError: import ConfigParser as configparser import errno import json import os import re import subprocess import sys class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_root(): """Get the project root directory. We require that all commands are run from the project root, i.e. the directory that contains setup.py, setup.cfg, and versioneer.py . """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " "or in a way that lets it use sys.argv[0] to find the root " "(like 'python path/to/setup.py COMMAND').") raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools # tree) execute all dependencies in a single python process, so # "versioneer" may be imported multiple times, and python's shared # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. me = os.path.realpath(os.path.abspath(__file__)) me_dir = os.path.normcase(os.path.splitext(me)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: print("Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(me), versioneer_py)) except NameError: pass return root def get_config_from_root(root): """Read the project setup.cfg file to determine Versioneer config.""" # This might raise EnvironmentError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() with open(setup_cfg, "r") as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" cfg.versionfile_source = get(parser, "versionfile_source") cfg.versionfile_build = get(parser, "versionfile_build") cfg.tag_prefix = get(parser, "tag_prefix") if cfg.tag_prefix in ("''", '""'): cfg.tag_prefix = "" cfg.parentdir_prefix = get(parser, "parentdir_prefix") cfg.verbose = get(parser, "verbose") return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" # these dictionaries contain VCS-specific tools LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, p.returncode return stdout, p.returncode LONG_VERSION_PY['git'] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.18 (https://github.com/warner/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "%(STYLE)s" cfg.tag_prefix = "%(TAG_PREFIX)s" cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %%s" %% dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) return None, p.returncode return stdout, p.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: print("likely tags: %%s" %% ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %%s" %% r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%%s*" %% tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%%s' doesn't start with prefix '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%%d" %% pieces["distance"] else: # exception #1 rendered = "0.post.dev%%d" %% pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%%s'" %% style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ''' @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def do_vcs_install(manifest_in, versionfile_source, ipy): """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py for export-subst keyword substitution. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] files = [manifest_in, versionfile_source] if ipy: files.append(ipy) try: me = __file__ if me.endswith(".pyc") or me.endswith(".pyo"): me = os.path.splitext(me)[0] + ".py" versioneer_file = os.path.relpath(me) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") f.write("%s export-subst\n" % versionfile_source) f.close() files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") SHORT_VERSION_PY = """ # This file was generated by 'versioneer.py' (0.18) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json version_json = ''' %s ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) """ def versions_from_file(filename): """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Eexceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" def get_versions(verbose=False): """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. """ if "versioneer" in sys.modules: # see the discussion in cmdclass.py:get_cmdclass() del sys.modules["versioneer"] root = get_root() cfg = get_config_from_root(root) assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) # extract version from first of: _version.py, VCS command (e.g. 'git # describe'), parentdir. This is meant to work for developers using a # source checkout, for users of a tarball created by 'setup.py sdist', # and for users of a tarball/zipball created by 'git archive' or github's # download-from-tag feature or the equivalent in other VCSes. get_keywords_f = handlers.get("get_keywords") from_keywords_f = handlers.get("keywords") if get_keywords_f and from_keywords_f: try: keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: print("got version from expanded keyword %s" % ver) return ver except NotThisMethod: pass try: ver = versions_from_file(versionfile_abs) if verbose: print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass from_vcs_f = handlers.get("pieces_from_vcs") if from_vcs_f: try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: print("got version from VCS %s" % ver) return ver except NotThisMethod: pass try: if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: print("got version from parentdir %s" % ver) return ver except NotThisMethod: pass if verbose: print("unable to compute version") return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} def get_version(): """Get the short version string for this project.""" return get_versions()["version"] def get_cmdclass(): """Get the custom setuptools/distutils subclasses used by Versioneer.""" if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and # 'easy_install .'), in which subdependencies of the main project are # built (using setup.py bdist_egg) in the same python process. Assume # a main project A and a dependency B, which use different versions # of Versioneer. A's setup.py imports A's Versioneer, leaving it in # sys.modules by the time B's setup.py is executed, causing B to run # with the wrong versioneer. Setuptools wraps the sub-dep builds in a # sandbox that restores sys.modules to it's pre-build state, so the # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/warner/python-versioneer/issues/52 cmds = {} # we add "version" to both distutils and setuptools from distutils.core import Command class cmd_version(Command): description = "report generated version string" user_options = [] boolean_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) print(" dirty: %s" % vers.get("dirty")) print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py # distutils/install -> distutils/build ->.. # setuptools/bdist_wheel -> distutils/install ->.. # setuptools/bdist_egg -> distutils/install_lib -> build_py # setuptools/install -> bdist_egg ->.. # setuptools/develop -> ? # pip install: # copies source tree to a tempdir before running egg_info/etc # if .git isn't copied too, 'git describe' will fail # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? # we override different "build_py" commands for both environments if "setuptools" in sys.modules: from setuptools.command.build_py import build_py as _build_py else: from distutils.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION # "product_version": versioneer.get_version(), # ... class cmd_build_exe(_build_exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _build_exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["build_exe"] = cmd_build_exe del cmds["build_py"] if 'py2exe' in sys.modules: # py2exe enabled? try: from py2exe.distutils_buildexe import py2exe as _py2exe # py3 except ImportError: from py2exe.build_exe import py2exe as _py2exe # py2 class cmd_py2exe(_py2exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _py2exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments if "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: from distutils.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self): versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old # version self.distribution.metadata.version = versions["version"] return _sdist.run(self) def make_release_tree(self, base_dir, files): root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) # now locate _version.py in the new base_dir directory # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, self._versioneer_generated_versions) cmds["sdist"] = cmd_sdist return cmds CONFIG_ERROR = """ setup.cfg is missing the necessary Versioneer configuration. You need a section like: [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = parentdir_prefix = myproject- You will also need to edit your setup.py to use the results: import versioneer setup(version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ...) Please read the docstring in ./versioneer.py for configuration instructions, edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. """ SAMPLE_CONFIG = """ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] #VCS = git #style = pep440 #versionfile_source = #versionfile_build = #tag_prefix = #parentdir_prefix = """ INIT_PY_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ def do_setup(): """Main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) return 1 print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: f.write(INIT_PY_SNIPPET) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) ipy = None # Make sure both the top-level "versioneer.py" and versionfile_source # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so # they'll be copied into source distributions. Pip won't be able to # install the package without this. manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so # it might give some false negatives. Appending redundant 'include' # lines is safe, though. if "versioneer.py" not in simple_includes: print(" appending 'versioneer.py' to MANIFEST.in") with open(manifest_in, "a") as f: f.write("include versioneer.py\n") else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: print(" versionfile_source already in MANIFEST.in") # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. do_vcs_install(manifest_in, cfg.versionfile_source, ipy) return 0 def scan_setup_py(): """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False errors = 0 with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: found.add("cmdclass") if "versioneer.get_version()" in line: found.add("get_version") if "versioneer.VCS" in line: setters = True if "versioneer.versionfile_source" in line: setters = True if len(found) != 3: print("") print("Your setup.py appears to be missing some important items") print("(but I might be wrong). Please make sure it has something") print("roughly like the following:") print("") print(" import versioneer") print(" setup( version=versioneer.get_version(),") print(" cmdclass=versioneer.get_cmdclass(), ...)") print("") errors += 1 if setters: print("You should remove lines like 'versioneer.VCS = ' and") print("'versioneer.versionfile_source = ' . This configuration") print("now lives in setup.cfg, and should be removed from setup.py") print("") errors += 1 return errors if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": errors = do_setup() errors += scan_setup_py() if errors: sys.exit(1)