Repository: pySTEPS/pysteps Branch: master Commit: 1d472a50354c Files: 264 Total size: 1.9 MB Directory structure: gitextract_7wafhhns/ ├── .github/ │ └── workflows/ │ ├── check_black.yml │ ├── python-publish.yml │ └── test_pysteps.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CITATION.bib ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── PKG-INFO ├── README.rst ├── ci/ │ ├── ci_test_env.yml │ ├── fetch_pysteps_data.py │ └── test_plugin_support.py ├── doc/ │ ├── .gitignore │ ├── Makefile │ ├── _static/ │ │ └── pysteps.css │ ├── _templates/ │ │ └── layout.html │ ├── make.bat │ ├── rebuild_docs.sh │ ├── requirements.txt │ └── source/ │ ├── conf.py │ ├── developer_guide/ │ │ ├── build_the_docs.rst │ │ ├── contributors_guidelines.rst │ │ ├── importer_plugins.rst │ │ ├── pypi.rst │ │ ├── test_pysteps.rst │ │ └── update_conda_forge.rst │ ├── index.rst │ ├── pysteps_reference/ │ │ ├── blending.rst │ │ ├── cascade.rst │ │ ├── datasets.rst │ │ ├── decorators.rst │ │ ├── downscaling.rst │ │ ├── extrapolation.rst │ │ ├── feature.rst │ │ ├── index.rst │ │ ├── io.rst │ │ ├── motion.rst │ │ ├── noise.rst │ │ ├── nowcasts.rst │ │ ├── postprocessing.rst │ │ ├── pysteps.rst │ │ ├── timeseries.rst │ │ ├── tracking.rst │ │ ├── utils.rst │ │ ├── verification.rst │ │ └── visualization.rst │ ├── references.bib │ ├── user_guide/ │ │ ├── example_data.rst │ │ ├── install_pysteps.rst │ │ ├── machine_learning_pysteps.rst │ │ ├── pystepsrc_example.rst │ │ └── set_pystepsrc.rst │ └── zz_bibliography.rst ├── environment.yml ├── environment_dev.yml ├── examples/ │ ├── LK_buffer_mask.py │ ├── README.txt │ ├── advection_correction.py │ ├── anvil_nowcast.py │ ├── data_transformations.py │ ├── ens_kalman_filter_blended_forecast.py │ ├── linda_nowcasts.py │ ├── my_first_nowcast.ipynb │ ├── optical_flow_methods_convergence.py │ ├── plot_cascade_decomposition.py │ ├── plot_custom_precipitation_range.py │ ├── plot_ensemble_verification.py │ ├── plot_extrapolation_nowcast.py │ ├── plot_linear_blending.py │ ├── plot_noise_generators.py │ ├── plot_optical_flow.py │ ├── plot_steps_nowcast.py │ ├── probability_forecast.py │ ├── rainfarm_downscale.py │ ├── steps_blended_forecast.py │ └── thunderstorm_detection_and_tracking.py ├── pyproject.toml ├── pysteps/ │ ├── __init__.py │ ├── blending/ │ │ ├── __init__.py │ │ ├── clim.py │ │ ├── ens_kalman_filter_methods.py │ │ ├── interface.py │ │ ├── linear_blending.py │ │ ├── pca_ens_kalman_filter.py │ │ ├── skill_scores.py │ │ ├── steps.py │ │ └── utils.py │ ├── cascade/ │ │ ├── __init__.py │ │ ├── bandpass_filters.py │ │ ├── decomposition.py │ │ └── interface.py │ ├── datasets.py │ ├── decorators.py │ ├── downscaling/ │ │ ├── __init__.py │ │ ├── interface.py │ │ └── rainfarm.py │ ├── exceptions.py │ ├── extrapolation/ │ │ ├── __init__.py │ │ ├── interface.py │ │ └── semilagrangian.py │ ├── feature/ │ │ ├── __init__.py │ │ ├── blob.py │ │ ├── interface.py │ │ ├── shitomasi.py │ │ └── tstorm.py │ ├── io/ │ │ ├── __init__.py │ │ ├── archive.py │ │ ├── exporters.py │ │ ├── importers.py │ │ ├── interface.py │ │ ├── mch_lut_8bit_Metranet_AZC_V104.txt │ │ ├── mch_lut_8bit_Metranet_v103.txt │ │ ├── nowcast_importers.py │ │ └── readers.py │ ├── motion/ │ │ ├── __init__.py │ │ ├── _proesmans.pyx │ │ ├── _vet.pyx │ │ ├── constant.py │ │ ├── darts.py │ │ ├── farneback.py │ │ ├── interface.py │ │ ├── lucaskanade.py │ │ ├── proesmans.py │ │ └── vet.py │ ├── noise/ │ │ ├── __init__.py │ │ ├── fftgenerators.py │ │ ├── interface.py │ │ ├── motion.py │ │ └── utils.py │ ├── nowcasts/ │ │ ├── __init__.py │ │ ├── anvil.py │ │ ├── extrapolation.py │ │ ├── interface.py │ │ ├── lagrangian_probability.py │ │ ├── linda.py │ │ ├── sprog.py │ │ ├── sseps.py │ │ ├── steps.py │ │ └── utils.py │ ├── postprocessing/ │ │ ├── __init__.py │ │ ├── diagnostics.py │ │ ├── ensemblestats.py │ │ ├── interface.py │ │ └── probmatching.py │ ├── pystepsrc │ ├── pystepsrc_schema.json │ ├── scripts/ │ │ ├── __init__.py │ │ ├── fit_vel_pert_params.py │ │ └── run_vel_pert_analysis.py │ ├── tests/ │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── test_archive.py │ │ ├── test_blending_clim.py │ │ ├── test_blending_linear_blending.py │ │ ├── test_blending_pca_ens_kalman_filter.py │ │ ├── test_blending_skill_scores.py │ │ ├── test_blending_steps.py │ │ ├── test_blending_utils.py │ │ ├── test_cascade.py │ │ ├── test_datasets.py │ │ ├── test_decorators.py │ │ ├── test_downscaling_rainfarm.py │ │ ├── test_ensscores.py │ │ ├── test_exporters.py │ │ ├── test_extrapolation_semilagrangian.py │ │ ├── test_feature.py │ │ ├── test_feature_tstorm.py │ │ ├── test_importer_decorator.py │ │ ├── test_interfaces.py │ │ ├── test_io_archive.py │ │ ├── test_io_bom_rf3.py │ │ ├── test_io_dwd_hdf5.py │ │ ├── test_io_fmi_geotiff.py │ │ ├── test_io_fmi_pgm.py │ │ ├── test_io_knmi_hdf5.py │ │ ├── test_io_mch_gif.py │ │ ├── test_io_mrms_grib.py │ │ ├── test_io_nowcast_importers.py │ │ ├── test_io_opera_hdf5.py │ │ ├── test_io_readers.py │ │ ├── test_io_saf_crri.py │ │ ├── test_motion.py │ │ ├── test_motion_farneback.py │ │ ├── test_motion_lk.py │ │ ├── test_noise_fftgenerators.py │ │ ├── test_noise_motion.py │ │ ├── test_nowcasts_anvil.py │ │ ├── test_nowcasts_lagrangian_probability.py │ │ ├── test_nowcasts_linda.py │ │ ├── test_nowcasts_sprog.py │ │ ├── test_nowcasts_sseps.py │ │ ├── test_nowcasts_steps.py │ │ ├── test_nowcasts_utils.py │ │ ├── test_paramsrc.py │ │ ├── test_plt_animate.py │ │ ├── test_plt_cartopy.py │ │ ├── test_plt_motionfields.py │ │ ├── test_plt_precipfields.py │ │ ├── test_plugins_support.py │ │ ├── test_postprocessing_ensemblestats.py │ │ ├── test_postprocessing_probmatching.py │ │ ├── test_timeseries_autoregression.py │ │ ├── test_tracking_tdating.py │ │ ├── test_utils_arrays.py │ │ ├── test_utils_cleansing.py │ │ ├── test_utils_conversion.py │ │ ├── test_utils_dimension.py │ │ ├── test_utils_interpolate.py │ │ ├── test_utils_pca.py │ │ ├── test_utils_reprojection.py │ │ ├── test_utils_spectral.py │ │ ├── test_utils_transformation.py │ │ ├── test_verification_detcatscores.py │ │ ├── test_verification_detcontscores.py │ │ ├── test_verification_probscores.py │ │ ├── test_verification_salscores.py │ │ └── test_verification_spatialscores.py │ ├── timeseries/ │ │ ├── __init__.py │ │ ├── autoregression.py │ │ └── correlation.py │ ├── tracking/ │ │ ├── __init__.py │ │ ├── interface.py │ │ ├── lucaskanade.py │ │ └── tdating.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── arrays.py │ │ ├── check_norain.py │ │ ├── cleansing.py │ │ ├── conversion.py │ │ ├── dimension.py │ │ ├── fft.py │ │ ├── images.py │ │ ├── interface.py │ │ ├── interpolate.py │ │ ├── pca.py │ │ ├── reprojection.py │ │ ├── spectral.py │ │ ├── tapering.py │ │ └── transformation.py │ ├── verification/ │ │ ├── __init__.py │ │ ├── detcatscores.py │ │ ├── detcontscores.py │ │ ├── ensscores.py │ │ ├── interface.py │ │ ├── lifetime.py │ │ ├── plots.py │ │ ├── probscores.py │ │ ├── salscores.py │ │ └── spatialscores.py │ └── visualization/ │ ├── __init__.py │ ├── animations.py │ ├── basemaps.py │ ├── motionfields.py │ ├── precipfields.py │ ├── spectral.py │ ├── thunderstorms.py │ └── utils.py ├── requirements.txt ├── requirements_dev.txt ├── setup.py └── tox.ini ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/check_black.yml ================================================ # This workflow will test the code base using the LATEST version of black # IMPORTANT: Black is under development. Hence, minor fommatting changes between # different version are expected. # If this test fails, install the latest version of black and then run black. # Preferably, run black only on the files that you have modified. # This will faciliate the revision of the proposed changes. name: Check Black on: # Triggers the workflow on push or pull request events but only for the master branch push: branches: [ master ] pull_request: branches: [ master ] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python 3.11 uses: actions/setup-python@v5 with: python-version: "3.11" - name: Install dependencies run: | python -m pip install --upgrade pip pip install black - name: Black version run: black --version - name: Black check working-directory: ${{github.workspace}} run: black --check . ================================================ FILE: .github/workflows/python-publish.yml ================================================ # This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Upload Python Package on: release: types: [published] jobs: deploy: runs-on: ubuntu-latest permissions: id-token: write steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel numpy cython - name: Build run: | python setup.py sdist - name: Publish uses: pypa/gh-action-pypi-publish@release/v1 ================================================ FILE: .github/workflows/test_pysteps.yml ================================================ name: Test pysteps on: # Triggers the workflow on push or pull request events to the master branch push: branches: - master - pysteps-v2 pull_request: branches: - master - pysteps-v2 jobs: unit_tests: name: Unit Tests (${{ matrix.python-version }}, ${{ matrix.os }}) runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ "ubuntu-latest", "macos-latest", "windows-latest" ] python-version: ["3.11", "3.13"] max-parallel: 6 defaults: run: shell: bash -l {0} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} # need headless opencv on Linux, see https://github.com/conda-forge/opencv-feedstock/issues/401 - name: Install mamba and create environment for Linux if: matrix.os == 'ubuntu-latest' uses: mamba-org/setup-micromamba@v1 with: # https://github.com/mamba-org/setup-micromamba/issues/225 micromamba-version: 1.5.10-0 environment-file: ci/ci_test_env.yml environment-name: test_environment generate-run-shell: false create-args: >- python=${{ matrix.python-version }} libopencv=*=headless* - name: Install mamba and create environment (not Linux) if: matrix.os != 'ubuntu-latest' uses: mamba-org/setup-micromamba@v1 with: # https://github.com/mamba-org/setup-micromamba/issues/225 micromamba-version: 1.5.10-0 environment-file: ci/ci_test_env.yml environment-name: test_environment generate-run-shell: false create-args: python=${{ matrix.python-version }} - name: Install pygrib (not win) if: matrix.os != 'windows-latest' run: mamba install --quiet pygrib - name: Install pysteps for MacOS if: matrix.os == 'macos-latest' working-directory: ${{github.workspace}} env: CC: gcc-13 CXX: g++-13 CXX1X: g++-13 HOMEBREW_NO_INSTALL_CLEANUP: 1 run: | brew update-reset brew update gcc-13 --version || brew install gcc@13 pip install . - name: Install pysteps if: matrix.os != 'macos-latest' working-directory: ${{github.workspace}} run: pip install . - name: Download pysteps data env: PYSTEPS_DATA_PATH: ${{github.workspace}}/pysteps_data working-directory: ${{github.workspace}}/ci run: python fetch_pysteps_data.py - name: Check imports working-directory: ${{github.workspace}}/pysteps_data run: | python --version python -c "import pysteps; print(pysteps.__file__)" python -c "from pysteps import motion" python -c "from pysteps.motion import vet" python -c "from pysteps.motion import proesmans" - name: Run tests and coverage report working-directory: ${{github.workspace}}/pysteps_data env: PYSTEPSRC: ${{github.workspace}}/pysteps_data/pystepsrc run: pytest --pyargs pysteps --cov=pysteps --cov-report=xml --cov-report=term -ra - name: Upload coverage to Codecov (Linux only) if: matrix.os == 'ubuntu-latest' uses: codecov/codecov-action@v4 env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python-version }} with: token: ${{ secrets.CODECOV_TOKEN }} files: ${{github.workspace}}/pysteps_data/coverage.xml flags: unit_tests env_vars: OS,PYTHON fail_ci_if_error: true verbose: true ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so *.c # Distribution / packaging .Python .tox build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Sphinx documentation docs/_build/ doc/_build/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Pycharm .idea # Spyder project settings .spyderproject .spyproject # VSCode .vscode # Rope project settings .ropeproject # mypy .mypy_cache/ # Mac OS Stuff .DS_Store # Running local tests /tmp /pysteps/tests/tmp/ ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/psf/black rev: 26.1.0 hooks: - id: black language_version: python3 ================================================ FILE: .readthedocs.yml ================================================ # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 # the build.os and build.tools section is mandatory build: os: "ubuntu-22.04" tools: python: "3.11" sphinx: configuration: doc/source/conf.py formats: - htmlzip python: install: - requirements: requirements.txt - requirements: doc/requirements.txt - method: pip path: . ================================================ FILE: CITATION.bib ================================================ @Article{gmd-12-4185-2019, AUTHOR = {Pulkkinen, S. and Nerini, D. and P\'erez Hortal, A. A. and Velasco-Forero, C. and Seed, A. and Germann, U. and Foresti, L.}, TITLE = {Pysteps: an open-source Python library for probabilistic precipitation nowcasting (v1.0)}, JOURNAL = {Geoscientific Model Development}, VOLUME = {12}, YEAR = {2019}, NUMBER = {10}, PAGES = {4185--4219}, URL = {https://gmd.copernicus.org/articles/12/4185/2019/}, DOI = {10.5194/gmd-12-4185-2019} } @article{qj.4461, AUTHOR = {Imhoff, Ruben O. and De Cruz, Lesley and Dewettinck, Wout and Brauer, Claudia C. and Uijlenhoet, Remko and van Heeringen, Klaas-Jan and Velasco-Forero, Carlos and Nerini, Daniele and Van Ginderachter, Michiel and Weerts, Albrecht H.}, TITLE = {Scale-dependent blending of ensemble rainfall nowcasts and NWP in the open-source pysteps library}, JOURNAL = {Quarterly Journal of the Royal Meteorological Society}, VOLUME = {n/a}, NUMBER = {n/a}, YEAR = {2023}, PAGES ={1--30}, DOI = {https://doi.org/10.1002/qj.4461}, URL = {https://rmets.onlinelibrary.wiley.com/doi/abs/10.1002/qj.4461}, } ================================================ FILE: CONTRIBUTING.rst ================================================ Contributing to pysteps ======================= Welcome! Pysteps is a community-driven initiative for developing and maintaining an easy to use, modular, free and open-source Python framework for short-term ensemble prediction systems. There are many ways to contribute to pysteps: * contributing bug reports and feature requests * contributing documentation * code contributions, new features, or bug fixes * contribute with usage examples Workflow for code contributions ------------------------------- We welcome all kinds of contributions, like documentation updates, bug fixes, or new features. The workflow for the contibutions uses the usual `GitHub pull-request flow `_. If you have ideas for new contributions to the project, feel free to get in touch with the pysteps community on our `pysteps slack `__. To get access to it, you need to ask for an invitation or you can use the automatic invitation page `here `__. Our slack channel is a great place for preliminary discussions about new features or functionalities. Another place where you can report bugs and suggest new enhancements is the `project's issue tracker `_. First Time Contributors ----------------------- If you are interested in helping to improve pysteps, the best way to get started is by looking for "Good First Issue" in the `issue tracker `_. In a nutshell, the main steps to follow for contributing to pysteps are: * Setting up the development environment * Fork the repository * Install pre-commit hooks * Create a new branch for each contribution * Read the Code Style guide * Work on your changes * Test your changes * Push to your fork repository and create a new PR in GitHub. Setting up the Development environment -------------------------------------- The recommended way to setup up the developer environment is the Anaconda (commonly referred to as Conda). Conda quickly installs, runs, and updates packages and their dependencies. It also allows you to create, save, load, and switch between different environments on your local computer. Before continuing, Mac OSX users also need to install a more recent compiler. See instructions `here `__. The developer environment can be created from the file `environment_dev.yml `_ in the project's root directory by running the command:: conda env create -f environment_dev.yml This will create the **pysteps_dev** environment that can be activated using:: conda activate pysteps_dev Once the environment is activated, the latest version of pysteps can be installed in development mode, in such a way that the project appears to be installed, but yet is still editable from the source tree:: pip install -e To test if the installation went fine, you can try importing pysteps from the python interpreter by running:: python -c "import pysteps" Fork the repository ~~~~~~~~~~~~~~~~~~~ Once you have set the development environment, the next step is creating your local copy of the repository, where you will commit your modifications. The steps to follow are: #. Set up Git on your computer. #. Create a GitHub account (if you don't have one). #. Fork the repository in your GitHub. #. Clone a local copy of your fork. For example:: git clone https://github.com//pysteps.git Done!, now you have a local copy of pysteps git repository. If you are new to GitHub, below you can find a list of helpful tutorials: - http://rogerdudler.github.io/git-guide/index.html - https://www.atlassian.com/git/tutorials Install pre-commit hooks ~~~~~~~~~~~~~~~~~~~~~~~~ After setting up your development environment, install the git pre-commit hook by executing the following command in the repository's root:: pre-commit install The pre-commit hooks are scripts executed automatically in every commit to identify simple issues with the code. When an issue is identified (the pre-commit script exits with non-zero status), the hook aborts the commit and prints the error. Currently, pysteps only tests that the code to be committed complies with black's format style. In case that the commit is aborted, you only need to run black in the entire source code. This can be done by running :code:`black .` or :code:`pre-commit run --all-files`. The latter is recommended since it indicates if the commit contained any formatting errors (that are automatically corrected). Black's configuration is stored in the `pyproject.toml` file to ensure that the same configuration is used in every development environment. This configuration is automatically loaded when black is run from any directory in the pysteps project. IMPORTANT: Periodically update the black version used in the pre-commit hook by running:: pre-commit autoupdate For more information about git hooks and the pre-commit package, see: - https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks - https://pre-commit.com/ Create a new branch ~~~~~~~~~~~~~~~~~~~ As a collaborator, all the new contributions you want should be made in a new branch under your forked repository. Working on the master branch is reserved for Core Contributors only. Core Contributors are developers that actively work and maintain the repository. They are the only ones who accept pull requests and push commits directly to the pysteps repository. For more information on how to create and work with branches, see `"Branches in a Nutshell" `__ in the Git documentation Code Style ---------- We strongly suggest following the `PEP8 coding standards `__. Since PEP8 is a set of recommendations, these are the most important good coding practices for the pysteps project: * Always use four spaces for indentation (don’t use tabs). * Max line-length: 88 characters (note that we don't use the PEP8's 79 value). Enforced by `black`. * Always indent wrapped code for readability. Enforced by `black`. * Avoid extraneous whitespace. Enforced by `black`. * Don’t use whitespace to line up assignment operators (=, :). Enforced by `black`. * Avoid writing multiple statements in the same line. * Naming conventions should follow the recomendations from the `Google's python style guide `__, summarized as follows: .. raw:: html
Type Public Internal
Packages lower_with_under
Modules lower_with_under _lower_with_under
Classes CapWords _CapWords
Exceptions CapWords
Functions lower_with_under() _lower_with_under()
Global/Class Constants CAPS_WITH_UNDER _CAPS_WITH_UNDER
Global/Class Variables lower_with_under _lower_with_under
Instance Variables lower_with_under _lower_with_under (protected)
Method Names lower_with_under() _lower_with_under() (protected)
Function/Method Parameters lower_with_under
Local Variables lower_with_under
(source: `Section 3.16.4, Google's python style guide `__) - If you need to ignore part of the variables returned by a function, use "_" (single underscore) or __ (double underscore):: precip, __, metadata = import_bom_rf3('example_file.bom') precip2, _, metadata2 = import_bom_rf3('example_file2.bom') - Zen of Python (`PEP 20 `__), the guiding principles for Python’s design:: >>> import this The Zen of Python, by Tim Peters Beautiful is better than ugly. Explicit is better than implicit. Simple is better than complex. Complex is better than complicated. Flat is better than nested. Sparse is better than dense. Readability counts. Special cases aren't special enough to break the rules. Although practicality beats purity. Errors should never pass silently. Unless explicitly silenced. In the face of ambiguity, refuse the temptation to guess. There should be one-- and preferably only one --obvious way to do it. Although that way may not be obvious at first unless you're Dutch. Now is better than never. Although never is often better than *right* now. If the implementation is hard to explain, it's a bad idea. If the implementation is easy to explain, it may be a good idea. Namespaces are one honking great idea -- let's do more of those! For more suggestions on good coding practices for python, check these guidelines: - `The Hitchhiker's Guide to Python `__ - `Google's python style guide `__ - `PEP8 `__ **Using Black auto-formatter** To ensure a minimal style consistency, we use `black `__ to auto-format to the source code. The black configuration used in the pysteps project is defined in the pyproject.toml, and it is automatically detected by black. Black can be installed using any of the following:: conda install black #For the latest version: conda install -c conda-forge black pip install black Check the `official documentation `__ for more information. **Docstrings** Every module, function, or class must have a docstring that describe its purpose and how to use it. The docstrings follows the conventions described in the `PEP 257 `__ and the `Numpy's docstrings format `__. Here is a summary of the most important rules: - Always use triple quotes for doctrings, even if it fits a single line. - For one-line docstring, end the phrase with a period. - Use imperative mood for all docstrings ("""Return some value.""") rather than descriptive mood ("""Returns some value."""). Here is an example of a docstring:: def adjust_lag2_corrcoef1(gamma_1, gamma_2): """ A simple adjustment of lag-2 temporal autocorrelation coefficient to ensure that the resulting AR(2) process is stationary when the parameters are estimated from the Yule-Walker equations. Parameters ---------- gamma_1 : float Lag-1 temporal autocorrelation coeffient. gamma_2 : float Lag-2 temporal autocorrelation coeffient. Returns ------- out : float The adjusted lag-2 correlation coefficient. """ Contributions guidelines ------------------------ The collaborator guidelines used in pysteps were largely inspired by those of the `MyPy project `__. Collaborators guidelines ~~~~~~~~~~~~~~~~~~~~~~~~ As a collaborator, all your new contributions should be made in a new branch under your forked repository. Working on the master branch is reserved for Core Contributors only to submit small changes only. Core Contributors are developers that actively work and maintain the repository. They are the only ones who accept pull requests and push commits directly to the **pysteps** repository. **IMPORTANT** However, for contribution requires a significant amount of work, we strongly suggest opening a new issue with the **enhancement** or **discussion** tag to encourage discussions. The discussions will help clarify the best way to approach the suggested changes or raise potential concerns. For code contributions, collaboratos can use the usual `GitHub pull-request flow `__. Once your proposed changes are ready, you need to create a pull request (PR) from your fork in your GitHub account. Afterward, core contributors will review your proposed changes, provide feedback in the PR discussion, and maybe, request changes to the code. Once the PR is ready, a Core Developer will merge the changes into the main branch. **Important:** It is strongly suggested that each PR only addresses a single objective (e.g., fix a bug, improve documentation, etc.). This will help to reduce the time needed to process the PR. For changes outside the PR's objectives, we highly recommend opening a new PR. Testing your changes ~~~~~~~~~~~~~~~~~~~~ Before committing changes or creating pull requests, check that all the tests in the pysteps suite pass. See the `Testing pysteps `__ for detailed instruction to run the tests. Although it is not strictly needed, we suggest creating minimal tests for new contributions to ensure that it achieves the desired behavior. Pysteps uses the pytest framework that it is easy to use and also supports complex functional testing for applications and libraries. Check the `pytests official documentation `_ for more information. The tests should be placed under the `pysteps.tests `_ module. The file should follow the **test_*.py** naming convention and have a descriptive name. A quick way to get familiar with the pytest syntax and the testing procedures is checking the python scripts present in the pysteps test module. Core developer guidelines ~~~~~~~~~~~~~~~~~~~~~~~~~ Working directly on the master branch is discouraged and is reserved only for small changes and updates that do not compromise the stability of the code. The *master* branch is a production branch that is ready to be deployed (cloned, installed, and ready to use). In consequence, this master branch is meant to be stable. The pysteps repository uses the GitHub Actions service to run tests every time you commit to GitHub. In that way, your modifications along with the entire library are tested. Pushing untested or work-in-progress changes to the master branch can potentially introduce bugs or break the stability of the package. Since the tests triggered by a commit to the master branch take around 20 minutes, any errors introduced there will be noticed after the stablility of the master branch was compromised. In addition, other developers start working on a new feature from master from a potentially broken state. Instead, it is recommended to work on each new feature in its own branch, which can be pushed to the central repository for backup/collaboration. When you’re done with the feature's development work, you can merge the feature branch into the master or submit a Pull Request. This approach has two main advantages: - Every commit on the feature branch is tested via GitHub Actions. If the tests fail, they do not affect the **master** branch. - Once the changes are finished and the tests passed, the commits history can be squashed into a single commit and then merged into the master branch. Squashing the commits helps to keep a clean commit history in the main branch. Processing pull requests ~~~~~~~~~~~~~~~~~~~~~~~~ .. _`Squash and merge`: https://github.com/blog/2141-squash-your-commits To process the pull request, we follow similar rules to those used in the `mypy developer guidelines `_: * Always wait for tests to pass before merging PRs. * Always use "`Squash and merge`_" to merge PRs. * Make sure that the subject of the commit message summarizes the objective of the PR and does not finish with a dot. * Write a new commit message before merging that provides a detailed description of the changes introduced by the PR. Try to keep the maximum line length under 80 characters, spplitting lines if necessary. **IMPORTANT:** Make sure that the commit message doesn't contain the branch's commit history! Also, if the PR fixes an issue, mention this explicitly. * Use the imperative mood in the subject line (e.g. "Fix typo in README"). After the PR is merged, the merged branch can be safely deleted. Preparing a new release ~~~~~~~~~~~~~~~~~~~~~~~ Core developers should follow the steps to prepare a new release (version): 1. Before creating the actual release in GitHub, be sure that every item in the following checklist was followed: * In the file setup.py, update the **version="X.X.X"** keyword in the setup function. * Update the version in PKG-INFO file. * If new dependencies were added to pysteps since the last release, add them to the **environment.yml, requirements.txt**, and **requirements_dev.txt** files. #. Create a new release in GitHub following `these guidelines `_. Include a detailed changelog in the release. #. Generating the source distribution for new pysteps version and upload it to the `Python Package Index `_ (PyPI). See `Packaging the pysteps project `__ for a detailed description of this process. #. Update the conda-forge pysteps-feedstock following this guidelines: `Updating the conda-forge pysteps-feedstock `__ Credits ------- This document was based on contributors guides of two Python open-source projects: * Py-Art_: Copyright (c) 2013, UChicago Argonne, LLC. `License `_. * mypy_: Copyright (c) 2015-2016 Jukka Lehtosalo and contributors. `MIT License `_. * Official github documentation (https://help.github.com) .. _Py-Art: https://github.com/ARM-DOE/pyart .. _mypy: https://github.com/python/mypy ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2019, PySteps developers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: MANIFEST.in ================================================ include LICENSE *.txt *.rst include pysteps/pystepsrc include pysteps/pystepsrc_schema.json include pysteps/io/mch_lut_8bit_Metranet_AZC_V104.txt include pysteps/io/mch_lut_8bit_Metranet_v103.txt recursive-include pysteps *.pyx include pyproject.toml ================================================ FILE: PKG-INFO ================================================ Metadata-Version: 1.2 Name: pysteps Version: 1.20.0 Summary: Python framework for short-term ensemble prediction systems Home-page: http://pypi.python.org/pypi/pysteps/ License: LICENSE Description: ======= pySteps ======= The pysteps initiative is a community that develops and maintains an easy to use, modular, free and open-source python framework for short-term ensemble prediction systems. The focus is on probabilistic nowcasting of radar precipitation fields, but pysteps is designed to allow a wider range of uses. Platform: UNKNOWN ================================================ FILE: README.rst ================================================ pysteps - Python framework for short-term ensemble prediction systems ===================================================================== .. start-badges .. list-table:: :stub-columns: 1 :widths: 10 90 * - docs - |stable| |colab| |gallery| * - status - |test| |docs| |codecov| |codacy| |black| * - package - |github| |conda| |pypi| |zenodo| * - community - |contributors| |downloads| |license| .. |docs| image:: https://readthedocs.org/projects/pysteps/badge/?version=latest :alt: Documentation Status :target: https://pysteps.readthedocs.io/ .. |test| image:: https://github.com/pySTEPS/pysteps/workflows/Test%20pysteps/badge.svg :alt: Test pysteps :target: https://github.com/pySTEPS/pysteps/actions?query=workflow%3A"Test+Pysteps" .. |black| image:: https://github.com/pySTEPS/pysteps/workflows/Check%20Black/badge.svg :alt: Check Black :target: https://github.com/pySTEPS/pysteps/actions?query=workflow%3A"Check+Black" .. |codecov| image:: https://codecov.io/gh/pySTEPS/pysteps/branch/master/graph/badge.svg :alt: Coverage :target: https://codecov.io/gh/pySTEPS/pysteps .. |github| image:: https://img.shields.io/github/release/pySTEPS/pysteps.svg :target: https://github.com/pySTEPS/pysteps/releases/latest :alt: Latest github release .. |conda| image:: https://anaconda.org/conda-forge/pysteps/badges/version.svg :target: https://anaconda.org/conda-forge/pysteps :alt: Anaconda Cloud .. |pypi| image:: https://badge.fury.io/py/pysteps.svg :target: https://pypi.org/project/pysteps/ :alt: Latest PyPI version .. |license| image:: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg :alt: License :target: https://opensource.org/licenses/BSD-3-Clause .. |contributors| image:: https://img.shields.io/github/contributors/pySTEPS/pysteps :alt: GitHub contributors :target: https://github.com/pySTEPS/pysteps/graphs/contributors .. |downloads| image:: https://img.shields.io/conda/dn/conda-forge/pysteps :alt: Conda downloads :target: https://anaconda.org/conda-forge/pysteps .. |colab| image:: https://colab.research.google.com/assets/colab-badge.svg :alt: My first nowcast :target: https://colab.research.google.com/github/pySTEPS/pysteps/blob/master/examples/my_first_nowcast.ipynb .. |gallery| image:: https://img.shields.io/badge/example-gallery-blue.svg :alt: pysteps example gallery :target: https://pysteps.readthedocs.io/en/stable/auto_examples/index.html .. |stable| image:: https://img.shields.io/badge/docs-stable-blue.svg :alt: pysteps documentation :target: https://pysteps.readthedocs.io/en/stable/ .. |codacy| image:: https://api.codacy.com/project/badge/Grade/6cff9e046c5341a4afebc0347362f8de :alt: Codacy Badge :target: https://app.codacy.com/gh/pySTEPS/pysteps?utm_source=github.com&utm_medium=referral&utm_content=pySTEPS/pysteps&utm_campaign=Badge_Grade .. |zenodo| image:: https://zenodo.org/badge/140263418.svg :alt: DOI :target: https://zenodo.org/badge/latestdoi/140263418 .. end-badges What is pysteps? ================ Pysteps is an open-source and community-driven Python library for probabilistic precipitation nowcasting, i.e. short-term ensemble prediction systems. The aim of pysteps is to serve two different needs. The first is to provide a modular and well-documented framework for researchers interested in developing new methods for nowcasting and stochastic space-time simulation of precipitation. The second aim is to offer a highly configurable and easily accessible platform for practitioners ranging from weather forecasters to hydrologists. The pysteps library supports standard input/output file formats and implements several optical flow methods as well as advanced stochastic generators to produce ensemble nowcasts. In addition, it includes tools for visualizing and post-processing the nowcasts and methods for deterministic, probabilistic, and neighbourhood forecast verification. Quick start ----------- Use pysteps to compute and plot a radar extrapolation nowcast in Google Colab with `this interactive notebook `_. Installation ============ The recommended way to install pysteps is with `conda `_ from the conda-forge channel:: $ conda install -c conda-forge pysteps More details can be found in the `installation guide `_. Usage ===== Have a look at the `gallery of examples `__ to get a good overview of what pysteps can do. For a more detailed description of all the available methods, check the `API reference `_ page. Example data ============ A set of example radar data is available in a separate repository: `pysteps-data `_. More information on how to download and install them is available `here `_. Contributions ============= *We welcome contributions!* For feedback, suggestions for developments, and bug reports please use the dedicated `issues page `_. For more information, please read our `contributors guidelines `_. Reference publications ====================== The overall library is described in Pulkkinen, S., D. Nerini, A. Perez Hortal, C. Velasco-Forero, U. Germann, A. Seed, and L. Foresti, 2019: Pysteps: an open-source Python library for probabilistic precipitation nowcasting (v1.0). *Geosci. Model Dev.*, **12 (10)**, 4185–4219, doi:`10.5194/gmd-12-4185-2019 `_. While the more recent blending module is described in Imhoff, R.O., L. De Cruz, W. Dewettinck, C.C. Brauer, R. Uijlenhoet, K-J. van Heeringen, C. Velasco-Forero, D. Nerini, M. Van Ginderachter, and A.H. Weerts, 2023: Scale-dependent blending of ensemble rainfall nowcasts and NWP in the open-source pysteps library. *Q J R Meteorol Soc.*, 1-30, doi: `10.1002/qj.4461 `_. Contributors ============ .. image:: https://contrib.rocks/image?repo=pySTEPS/pysteps :target: https://github.com/pySTEPS/pysteps/graphs/contributors ================================================ FILE: ci/ci_test_env.yml ================================================ # pysteps development environment name: test_environment channels: - conda-forge - defaults dependencies: - python>=3.11 - pip - mamba # Minimal dependencies - numpy - cython - jsmin - jsonschema - matplotlib - netCDF4 - opencv - pillow - pyproj - scipy # Optional dependencies - dask - pyfftw - cartopy - h5py - PyWavelets - pandas - scikit-image - scikit-learn - rasterio - gdal # Test dependencies - pytest - pytest-cov - pip: - cookiecutter ================================================ FILE: ci/fetch_pysteps_data.py ================================================ # -*- coding: utf-8 -*- """ Script used to install the pysteps data in a test environment and set a pystepsrc configuration file that points to that data. The test data is downloaded in the `PYSTEPS_DATA_PATH` environmental variable. After this script is run, the `PYSTEPSRC` environmental variable should be set to PYSTEPSRC=$PYSTEPS_DATA_PATH/pystepsrc for pysteps to use that configuration file. """ import os from pysteps.datasets import create_default_pystepsrc, download_pysteps_data tox_test_data_dir = os.environ["PYSTEPS_DATA_PATH"] download_pysteps_data(tox_test_data_dir, force=True) create_default_pystepsrc( tox_test_data_dir, config_dir=tox_test_data_dir, file_name="pystepsrc" ) ================================================ FILE: ci/test_plugin_support.py ================================================ # -*- coding: utf-8 -*- """ Script to test the plugin support. This script assumes that a package created with the default pysteps plugin template (and using the default values) is installed. https://github.com/pySTEPS/cookiecutter-pysteps-plugin """ from pysteps import io print("Testing plugin support: ", end="") assert hasattr(io.importers, "import_institution_name") assert "institution_name" in io.interface._importer_methods from pysteps.io.importers import import_institution_name import_institution_name("filename") print("PASSED") ================================================ FILE: doc/.gitignore ================================================ _build/ generated auto_examples ================================================ FILE: doc/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = pysteps SOURCEDIR = source BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: doc/_static/pysteps.css ================================================ .section h1 { border-bottom: 2px solid #0099ff; display: inline-block; } .section h2 { border-bottom: 2px solid #ccebff; display: inline-block; } /* override table width restrictions */ @media screen and (min-width: 767px) { .wy-table-responsive table td { /* !important prevents the common CSS stylesheets from overriding this as on RTD they are loaded after this stylesheet */ white-space: normal !important; } .wy-table-responsive { overflow: visible !important; } } ================================================ FILE: doc/_templates/layout.html ================================================ {% extends "!layout.html" %} {% set css_files = css_files + ["_static/pysteps.css"] %} ================================================ FILE: doc/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=pysteps if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: doc/rebuild_docs.sh ================================================ # Build documentation from scratch. rm -r source/generated &> /dev/null rm -r source/auto_examples &> /dev/null make clean make html ================================================ FILE: doc/requirements.txt ================================================ # Additional requirements related to the documentation build only sphinx sphinxcontrib.bibtex sphinx-book-theme sphinx_gallery scikit-image scikit-learn pandas git+https://github.com/pySTEPS/pysteps-nwp-importers.git@main#egg=pysteps_nwp_importers pygrib h5py ================================================ FILE: doc/source/conf.py ================================================ # -*- coding: utf-8 -*- # All configuration values have a default; values that are commented out # serve to show the default. import os import subprocess import sys from datetime import datetime import json from jsmin import jsmin # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. if "READTHEDOCS" not in os.environ: sys.path.insert(1, os.path.abspath("../../")) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = "1.6" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.coverage", "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinxcontrib.bibtex", "sphinx_gallery.gen_gallery", ] bibtex_bibfiles = ["references.bib"] # numpydoc_show_class_members = False # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = ".rst" # The master toctree document. master_doc = "index" # General information about the project. project = "pysteps" copyright = f"2018-{datetime.now():%Y}, pysteps developers" author = "pysteps developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # def get_version(): """Returns project version as string from 'git describe' command.""" from subprocess import check_output _version = check_output(["git", "describe", "--tags", "--always"]) if _version: return _version.decode("utf-8") else: return "X.Y" # The short X.Y version. version = get_version().lstrip("v").rstrip().split("-")[0] # The full version, including alpha/beta/rc tags. release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Read the Docs build -------------------------------------------------- def set_root(): fn = os.path.abspath(os.path.join("..", "..", "pysteps", "pystepsrc")) with open(fn, "r") as f: rcparams = json.loads(jsmin(f.read())) for key, value in rcparams["data_sources"].items(): new_path = os.path.join("..", "..", "pysteps-data", value["root_path"]) new_path = os.path.abspath(new_path) value["root_path"] = new_path fn = os.path.abspath(os.path.join("..", "..", "pystepsrc.rtd")) with open(fn, "w") as f: json.dump(rcparams, f, indent=4) if "READTHEDOCS" in os.environ: repourl = "https://github.com/pySTEPS/pysteps-data.git" dir = os.path.join(os.getcwd(), "..", "..", "pysteps-data") dir = os.path.abspath(dir) subprocess.check_call(["rm", "-rf", dir]) subprocess.check_call(["git", "clone", repourl, dir]) os.environ["PYSTEPS_DATA_PATH"] = dir set_root() pystepsrc = os.path.abspath(os.path.join("..", "..", "pystepsrc.rtd")) os.environ["PYSTEPSRC"] = pystepsrc # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # # html_theme = 'alabaster' # html_theme = 'classic' html_theme = "sphinx_book_theme" html_title = "" html_context = { "github_user": "pySTEPS", "github_repo": "pysteps", "github_version": "master", "doc_path": "doc", } # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { "repository_url": "https://github.com/pySTEPS/pysteps", "repository_branch": "master", "path_to_docs": "doc/source", "use_edit_page_button": True, "use_repository_button": True, "use_issues_button": True, } # The name of an image file (relative to this directory) to place at the top # of the sidebar. html_logo = "../_static/pysteps_logo.png" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["../_static"] html_css_files = ["../_static/pysteps.css"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars # html_sidebars = { # "**": [ # "relations.html", # needs 'show_related': True theme option to display # "searchbox.html", # ] # } html_domain_indices = True autosummary_generate = True # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = "pystepsdoc" # -- Options for LaTeX output --------------------------------------------- # This hack is taken from numpy (https://github.com/numpy/numpy/blob/master/doc/source/conf.py). latex_preamble = r""" \usepackage{amsmath} \DeclareUnicodeCharacter{00A0}{\nobreakspace} % In the parameters section, place a newline after the Parameters % header \usepackage{expdlist} \let\latexdescription=\description \def\description{\latexdescription{}{} \breaklabel} % Make Examples/etc section headers smaller and more compact \makeatletter \titleformat{\paragraph}{\normalsize\py@HeaderFamily}% {\py@TitleColor}{0em}{\py@TitleColor}{\py@NormalColor} \titlespacing*{\paragraph}{0pt}{1ex}{0pt} \makeatother % Fix footer/header \renewcommand{\chaptermark}[1]{\markboth{\MakeUppercase{\thechapter.\ #1}}{}} \renewcommand{\sectionmark}[1]{\markright{\MakeUppercase{\thesection.\ #1}}} """ latex_elements = { "papersize": "a4paper", "pointsize": "10pt", "preamble": latex_preamble, # Latex figure (float) alignment # # 'figure_align': 'htbp', } latex_domain_indices = False # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, "pysteps.tex", "pysteps reference", author, "manual"), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "pysteps", "pysteps reference", [author], 1)] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "pysteps", "pysteps reference", author, "pysteps", "One line description of project.", "Miscellaneous", ), ] # -- Options for Sphinx-Gallery ------------------------------------------- # The configuration dictionary for Sphinx-Gallery sphinx_gallery_conf = { "examples_dirs": "../../examples", # path to your example scripts "gallery_dirs": "auto_examples", # path where to save gallery generated examples "filename_pattern": r"/*\.py", # Include all the files in the examples dir } ================================================ FILE: doc/source/developer_guide/build_the_docs.rst ================================================ .. _build_the_docs: ================= Building the docs ================= The pysteps documentations is build using `Sphinx `_, a tool that makes it easy to create intelligent and beautiful documentation The documentation is located in the **doc** folder in the pysteps repo. Automatic build --------------- The simplest way to build the documentation is using tox and the tox-conda plugin (conda needed). To install these packages activate your conda development environment and run:: conda install -c conda-forge tox tox-conda Then, to build the documentation, from the repo's root run:: tox -e docs This will create a conda environment will all the necessary dependencies and the data needed to create the examples. Manual build ------------ To build the docs you need to need to satisfy a few more dependencies related to Sphinx that are specified in the doc/requirements.txt file: - sphinx - numpydoc - sphinxcontrib.bibtex - sphinx-book-theme - sphinx_gallery You can install these packages running `pip install -r doc/requirements.txt`. In addition to this requirements, to build the example gallery in the documentation the example pysteps-data is needed. To download and install this data see the installation instructions in the :ref:`example_data` section. Once these requirements are met, to build the documentation, in the **doc** folder run:: make html This will build the documentation along with the example gallery. The build documentation (html web page) will be available in **doc/_build/html/**. To correctly visualize the documentation, you need to set up and run a local HTTP server. To do that, in the **doc/_build/html/** directory run:: python -m http.server This will set up a local HTTP server on 0.0.0.0 port 8000. To see the built documentation open the following url in the browser: http://0.0.0.0:8000/ ================================================ FILE: doc/source/developer_guide/contributors_guidelines.rst ================================================ .. _contributor_guidelines: .. include:: ../../../CONTRIBUTING.rst ================================================ FILE: doc/source/developer_guide/importer_plugins.rst ================================================ .. _importer-plugins: =========================== Create your importer plugin =========================== Since version 1.4, pysteps allows the users to add new importers by installing external packages, called plugins, without modifying the pysteps installation. These plugins need to follow a particular structure to allow pysteps to discover and integrate the new importers to the pysteps interface without any user intervention. .. contents:: Table of Contents :local: :depth: 3 How do the plugins work? ======================== When the plugin is installed, it advertises the new importers to other packages (in our case, pysteps) using the python `entry points specification`_. These new importers are automatically discovered every time that the pysteps library is imported. The discovered importers are added as attributes to the io.importers module and also registered to the io.get_method interface without any user intervention. In addition, since the installation of the plugins does not modify the actual pysteps installation (i.e., the pysteps sources), the pysteps library can be updated without reinstalling the plugin. .. _`entry points specification`: https://packaging.python.org/specifications/entry-points/ Create your plugin ================== There are two ways of creating a plugin. The first one is building the importers plugin from scratch. However, this can be a daunting task if you are creating your first plugin. To facilitate the creating of new plugins, we provide a `Cookiecutter`_ template, in a separate project, that creates a template project to be used as a starting point to build the plugin. The template for the pysteps plugins is maintained as a separate project at `cookiecutter-pysteps-plugin `_. For detailed instruction on how to create a plugin, `check the template's documentation`_. .. _`check the template's documentation`: https://cookiecutter-pysteps-plugin.readthedocs.io/en/latest .. _Cookiecutter: https://cookiecutter.readthedocs.io ================================================ FILE: doc/source/developer_guide/pypi.rst ================================================ .. _pypi_relase: ============================= Packaging the pysteps project ============================= The `Python Package Index `_ (PyPI) is a software repository for the Python programming language. PyPI helps you find and install software developed and shared by the Python community. The following guide to package pysteps was adapted from the `PyPI `_ official documentation. Generating the source distribution ================================== The first step is to generate a `source distribution (sdist) `_ for the pysteps library. These are archives that are uploaded to the `Package Index `_ and can be installed by pip. To create the sdist package we need the **setuptools** package installed. Then, from the root folder of the pysteps source run:: python setup.py sdist Once this command is completed, it should generate a tar.gz (source archive) file the **dist** directory:: dist/ pysteps-a.b.c.tar.gz where a.b.c denote the version number. Uploading the source distribution to the archive ================================================ The last step is to upload your package to the `Python Package Index `_. **Important** Before we actually upload the distribution to the Python Index, we will test it in `Test PyPI `_. Test PyPI is a separate instance of the package index that allows us to try the distribution without affecting the real index (PyPi). Because TestPyPI has a separate database from the actual PyPI, you’ll need a separate user account for specifically for TestPyPI. You can register your account in https://test.pypi.org/account/register/. Once you are registered, you can use `twine `_ to upload the distribution packages. Alternatively, the package can be uploaded manually from the **Test PyPI** page. If Twine is not installed, you can install it by running ``pip install twine`` or ``conda install twine``. Test PyPI ^^^^^^^^^ To upload the recently created source distribution (**dist/pysteps-a.b.c.tar.gz**) under the **dist** directory run:: twine upload --repository-url https://test.pypi.org/legacy/ dist/pysteps-a.b.c.tar.gz where a.b.c denote the version number. You will be prompted for the username and password you registered with Test PyPI. After the command completes, you should see output similar to this:: Uploading distributions to https://test.pypi.org/legacy/ Enter your username: [your username] Enter your password: Uploading pysteps-a.b.c.tar.gz 100%|█████████████████████| 4.25k/4.25k [00:01<00:00, 3.05kB/s] Once uploaded your package should be viewable on TestPyPI, for example, https://test.pypi.org/project/pysteps Test the uploaded package ------------------------- Before uploading the package to the official `Python Package Index `_, test that the package can be installed using pip. Automatic test ^^^^^^^^^^^^^^ The simplest way to hat the package can be installed using pip is using tox and the tox-conda plugin (conda needed). To install these packages activate your conda development environment and run:: conda install -c conda-forge tox tox-conda Then, to test the installation in a minimal and an environment with all the dependencies (full env), run:: tox -r -e pypi_test # Test the installation in a minimal env tox -r -e pypi_test_full # Test the installation in an full env Manual test ^^^^^^^^^^^ To manually test the installation on new environment, create a copy of the basic development environment using the `environment_dev.yml `_ file in the root folder of the pysteps project:: conda env create -f environment_dev.yml -n pysteps_test Then we activate the environment:: source activate pysteps_test or:: conda activate pysteps_test If the environment pysteps_test was already created, remove any version of pysteps already installed:: pip uninstall pysteps Now, install the pysteps package from test.pypi.org. Since not all the dependecies are available in the Test PyPI repository, we need to add the official repo as an extra index to pip. By doing so, pip will look first in the Test PyPI index and then in the official PyPI:: pip install --no-cache-dir --index-url https://test.pypi.org/simple/ --extra-index-url=https://pypi.org/simple/ pysteps To test that the installation was successful, from a folder different than the pysteps source, run:: pytest --pyargs pysteps If any test didn't pass, check the sources or consider creating a new release fixing those bugs. Upload package to PyPi ---------------------- Once the `sdist `_ package was tested, we can safely upload it to the Official PyPi repository with:: twine upload dist/pysteps-a.b.c.tar.gz Now, **pysteps** can be installed by simply running:: pip install pysteps As an extra sanity measure, it is recommended to test the pysteps package installed from the Official PyPi repository (instead of the test PyPi). Automatic test ^^^^^^^^^^^^^^ Similarly to the `Test the uploaded package`_ section, to test the installation from PyPI in a clean environment, run:: tox -r -e pypi Manual test ^^^^^^^^^^^ Follow test instructions in `Test PyPI`_ section. ================================================ FILE: doc/source/developer_guide/test_pysteps.rst ================================================ .. _testing_pysteps: =============== Testing pysteps =============== The pysteps distribution includes a small test suite for some of the modules. To run the tests the `pytest `__ package is needed. To install it, in a terminal run:: pip install pytest Automatic testing ================= The simplest way to run the pysteps' test suite is using tox and the tox-conda plugin (conda needed). To install these packages activate your conda development environment and run:: conda install -c conda-forge tox tox-conda Then, to run the tests, from the repo's root run:: tox # Run pytests tox -e install # Test package installation tox -e black # Test for black formatting warnings Manual testing ============== Example data ------------ The build-in tests require the pysteps example data installed. See the installation instructions in the :ref:`example_data` section. Test an installed package ------------------------- After the package is installed, you can launch the test suite from any directory by running:: pytest --pyargs pysteps Test from sources ----------------- Before testing the package directly from the sources, we need to build the extensions in-place. To do that, from the root pysteps folder run:: python setup.py build_ext -i Now, the package sources can be tested in-place using the **pytest** command on the root of the pysteps source directory. E.g.:: pytest -v --tb=line ================================================ FILE: doc/source/developer_guide/update_conda_forge.rst ================================================ .. _update_conda_feedstock: ========================================== Updating the conda-forge pysteps-feedstock ========================================== .. _pysteps-feedstock: https://github.com/conda-forge/pysteps-feedstock .. _`conda-forge/pysteps-feedstock`: https://github.com/conda-forge/pysteps-feedstock Here we will describe the steps to update the pysteps conda-forge feedstock. This tutorial is intended for the core developers listed as maintainers of the conda recipe in the `conda-forge/pysteps-feedstock`_. Examples for needing to update the pysteps-feedstock are: * New release * Fix errors pysteps package errors **The following tutorial was adapted from the official conda-forge.org documentation, released under CC4.0 license** What is a “conda-forge” ======================= Conda-forge is a community effort that provides conda packages for a wide range of software. The conda team from Anaconda packages a multitude of packages and provides them to all users free of charge in their default channel. **conda-forge** is a community-led conda channel of installable packages that allows users to share software that is not included in the official Anaconda repository. The main advantages of **conda-forge** are: - all packages are shared in a single channel named conda-forge - care is taken that all packages are up-to-date - common standards ensure that all packages have compatible versions - by default, packages are built for macOS, linux amd64 and windows amd64 In order to provide high-quality builds, the process has been automated into the conda-forge GitHub organization. The conda-forge organization contains one repository for each of the installable packages. Such a repository is known as a **feedstock**. The actual pysteps feedstock is https://github.com/conda-forge/pysteps-feedstock A feedstock is made up of a conda recipe (the instructions on what and how to build the package) and the necessary configurations for automatic building using freely available continuous integration services. See the official `conda-forge documentation `_ for more details. Maintain pysteps conda-forge package ==================================== Pysteps core developers that are maintainers of the pysteps feedstock. All pysteps developers listed as maintainers of the pysteps feedstock are given push access to the feedstock repository. This means that a maintainer can create branches in the main repository. Every time that a new commit is pushed/merged in the feedstock repository, conda-forge runs Continuous Integration (CI) system that run quality checks, builds the pysteps recipe on Windows, OSX, and Linux, and publish the built recipes in the conda-forge channel. Important --------- For updates, using a branch in the main repo and a subsequent Pull Request (PR) to the master branch is discouraged because: - CI is run on both the branch and on the Pull Request (if any) associated with that branch. This wastes CI resources. - Branches are automatically published by the CI system. This mean that a for every push, the packages will be published before the PR is actually merged. For these reasons, to update the feedstock, the maintainers need to fork the feedstock, create a new branch in that fork, push to that branch in the fork, and then open a PR to the conda-forge repo. Workflow for updating a pysteps-feedstock ----------------------------------------- The mandatory steps to update the pysteps-feedstock_ are: 1. Forking the pysteps-feedstock_. * Clone the forked repository in your computer:: git clone https://github.com//pysteps-feedstock #. Syncing your fork with the pysteps feedstock. This step is only needed if your local repository is not up to date the pysteps-feedstock_. If you just cloned the forked pysteps-feedstock_, you can ignore this step. * Make sure you are on the master branch:: git checkout master * Register conda-forge’s feedstock with:: git remote add upstream https://github.com/conda-forge/pysteps-feedstock * Fetch the latest updates with git fetch upstream:: git fetch upstream * Pull in the latest changes into your master branch:: git rebase upstream/master #. Create a new branch:: git checkout -b #. Update the recipe and push changes in this new branch * See next section "Updating recipes" for more details * Push changes:: git commit -m #. Pushing your changes to GitHub:: git push origin #. Propose a Pull Request * Create a pull request via the web interface Updating pysteps recipe ======================= The pysteps-feedstock_ should be updated when: * We release a new pysteps version * Need to fix errors in the pysteps package New release ----------- When a new pysteps version is released, before update the pysteps feedstock, the new version needs to be uploaded to the Python Package Index (PyPI) (see :ref:`pypi_relase` for more details). This step is needed because the conda recipe uses the PyPI to build the pysteps conda package. Once the new version is available in the PyPI, the conda recipe in pysteps-feedstock/recipe/meta.yaml needs to be updated by: 1. Updating version and hash #. Checking the dependencies #. Bumping the build number - When the package version changes, reset the build number back to 0. - The build number is increased when the source code for the package has not changed but you need to make a new build. - In case that the recipe must be updated, increase by 1 the **build_number** in the conda recipe in `pysteps-feedstock/recipe/meta.yaml `_. Some examples for needing to increase the build number are: - updating the pinned dependencies - Fixing wrong dependencies #. Rerendering feedstocks - Rerendering is conda-forge’s way to update the files common to all feedstocks (e.g. README, CI configuration, pinned dependencies). - When to rerender: We need to re-render when there are changes the following parts of the feedstock: - the platform configuration (skip sections) - the yum_requirements.txt - updates in the build matrix due to new versions of Python, NumPy, PERL, R, etc. - updates in conda-forge pinning that affect the feedstock - build issues that a feedstock configuration update will fix - To rerender the feedstock, the first step is to install **conda-smithy** in your root environment:: conda install -c conda-forge conda-smithy - Commit all changes and from the root directory of the feedstock, type:: conda smithy rerender -c auto Optionally one can commit the changes manually. To do this drop *-c auto* from the command. More information on https://conda-forge.org/docs/maintainer/updating_pkgs.html#dev-rerender-local conda-forge autotick bot ------------------------ The conda-forge autotick bot is now a central part of the conda-forge ecosystem. The conda-forge autotick bot was created to track out-of-date feedstocks and issue pull requests with updated recipes. The bot tracks and updates out-of-date feedstocks in four steps: - Find the names of all feedstocks on conda-forge. - Compute the dependency graph of packages on conda-forge found in step 1. - Find the most recent version of each feedstock’s source code. - Open a PR into each out-of-date feedstock updating the meta.yaml for the most recent upstream release. These steps are run automatically every six hours. Hence, when a new pysteps version is upload to PyPI, this bot will automatically update the recipe and submit a PR. If the tests in the PR pass, then it can be merger into the feedstock's master branch. ================================================ FILE: doc/source/index.rst ================================================ pysteps -- The nowcasting initiative ==================================== Pysteps is a community-driven initiative for developing and maintaining an easy to use, modular, free and open source Python framework for short-term ensemble prediction systems. The focus is on probabilistic nowcasting of radar precipitation fields, but pysteps is designed to allow a wider range of uses. Pysteps is actively developed on GitHub__, while a more thorough description of pysteps is available in the pysteps reference publications: .. note:: Pulkkinen, S., D. Nerini, A. Perez Hortal, C. Velasco-Forero, U. Germann, A. Seed, and L. Foresti, 2019: Pysteps: an open-source Python library for probabilistic precipitation nowcasting (v1.0). *Geosci. Model Dev.*, **12 (10)**, 4185–4219, doi:`10.5194/gmd-12-4185-2019 `_. Imhoff, R.O., L. De Cruz, W. Dewettinck, C.C. Brauer, R. Uijlenhoet, K-J. van Heeringen, C. Velasco-Forero, D. Nerini, M. Van Ginderachter, and A.H. Weerts, 2023: Scale-dependent blending of ensemble rainfall nowcasts and NWP in the open-source pysteps library. *Q J R Meteorol Soc.*, 1-30, doi: `doi:10.1002/qj.4461 `_. __ https://github.com/pySTEPS/pysteps .. toctree:: :maxdepth: 1 :hidden: :caption: For users Installation Gallery <../auto_examples/index> My first nowcast (Colab Notebook) API Reference Example data Configuration file (pystepsrc) Machine learning applications Bibliography .. toctree:: :maxdepth: 1 :hidden: :caption: For developers Contributing Guide Importer plugins Testing Building the docs Packaging Publishing to conda-forge GitHub repository ================================================ FILE: doc/source/pysteps_reference/blending.rst ================================================ ================ pysteps.blending ================ Implementation of blending methods for blending (ensemble) nowcasts with Numerical Weather Prediction (NWP) models. .. automodule:: pysteps.blending.interface .. automodule:: pysteps.blending.clim .. automodule:: pysteps.blending.ens_kalman_filter_methods .. automodule:: pysteps.blending.linear_blending .. automodule:: pysteps.blending.pca_ens_kalman_filter .. automodule:: pysteps.blending.skill_scores .. automodule:: pysteps.blending.steps .. automodule:: pysteps.blending.utils ================================================ FILE: doc/source/pysteps_reference/cascade.rst ================================================ =============== pysteps.cascade =============== Methods for constructing bandpass filters and decomposing 2d precipitation fields into different spatial scales. .. automodule:: pysteps.cascade.interface .. automodule:: pysteps.cascade.bandpass_filters .. automodule:: pysteps.cascade.decomposition ================================================ FILE: doc/source/pysteps_reference/datasets.rst ================================================ .. automodule:: pysteps.datasets ================================================ FILE: doc/source/pysteps_reference/decorators.rst ================================================ .. automodule:: pysteps.decorators ================================================ FILE: doc/source/pysteps_reference/downscaling.rst ================================================ =================== pysteps.downscaling =================== Implementation of deterministic and ensemble downscaling methods. .. automodule:: pysteps.downscaling.interface .. automodule:: pysteps.downscaling.rainfarm ================================================ FILE: doc/source/pysteps_reference/extrapolation.rst ================================================ ===================== pysteps.extrapolation ===================== Extrapolation module functions and interfaces. .. automodule:: pysteps.extrapolation.interface .. automodule:: pysteps.extrapolation.semilagrangian ================================================ FILE: doc/source/pysteps_reference/feature.rst ================================================ =============== pysteps.feature =============== Implementations of feature detection methods. .. automodule:: pysteps.feature.interface .. automodule:: pysteps.feature.blob .. automodule:: pysteps.feature.tstorm .. automodule:: pysteps.feature.shitomasi ================================================ FILE: doc/source/pysteps_reference/index.rst ================================================ .. _pysteps-reference: API Reference ============= :Release: |version| :Date: |today| This page gives an comprehensive description of all the modules and functions available in pysteps. .. toctree:: :maxdepth: 2 :caption: API Reference pysteps blending cascade decorators extrapolation datasets downscaling feature io motion noise nowcasts postprocessing timeseries tracking utils verification visualization .. only:: html Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` .. only:: html Bibliography ------------ * :ref:`bibliography` ================================================ FILE: doc/source/pysteps_reference/io.rst ================================================ ========== pysteps.io ========== Methods for browsing data archives, reading 2d precipitation fields and writing forecasts into files. .. automodule:: pysteps.io.interface .. automodule:: pysteps.io.archive .. automodule:: pysteps.io.importers .. automodule:: pysteps.io.nowcast_importers .. automodule:: pysteps.io.exporters .. automodule:: pysteps.io.readers ================================================ FILE: doc/source/pysteps_reference/motion.rst ================================================ ============== pysteps.motion ============== Implementations of optical flow methods. .. automodule:: pysteps.motion.interface .. automodule:: pysteps.motion.constant .. automodule:: pysteps.motion.darts .. automodule:: pysteps.motion.lucaskanade .. automodule:: pysteps.motion.proesmans .. automodule:: pysteps.motion.vet ================================================ FILE: doc/source/pysteps_reference/noise.rst ================================================ ============= pysteps.noise ============= Implementation of deterministic and ensemble nowcasting methods. .. automodule:: pysteps.noise.interface .. automodule:: pysteps.noise.fftgenerators .. automodule:: pysteps.noise.motion .. automodule:: pysteps.noise.utils ================================================ FILE: doc/source/pysteps_reference/nowcasts.rst ================================================ ================ pysteps.nowcasts ================ Implementation of deterministic and ensemble nowcasting methods. .. automodule:: pysteps.nowcasts.interface .. automodule:: pysteps.nowcasts.anvil .. automodule:: pysteps.nowcasts.extrapolation .. automodule:: pysteps.nowcasts.linda .. automodule:: pysteps.nowcasts.lagrangian_probability .. automodule:: pysteps.nowcasts.sprog .. automodule:: pysteps.nowcasts.sseps .. automodule:: pysteps.nowcasts.steps .. automodule:: pysteps.nowcasts.utils ================================================ FILE: doc/source/pysteps_reference/postprocessing.rst ================================================ ====================== pysteps.postprocessing ====================== Methods for post-processing of forecasts. .. automodule:: pysteps.postprocessing.ensemblestats .. automodule:: pysteps.postprocessing.probmatching ================================================ FILE: doc/source/pysteps_reference/pysteps.rst ================================================ ======= pysteps ======= Pystep top module utils .. autosummary:: :toctree: ../generated/ pysteps.load_config_file ================================================ FILE: doc/source/pysteps_reference/timeseries.rst ================================================ ================== pysteps.timeseries ================== Methods and models for time series analysis. .. automodule:: pysteps.timeseries.autoregression .. automodule:: pysteps.timeseries.correlation ================================================ FILE: doc/source/pysteps_reference/tracking.rst ================================================ ================ pysteps.tracking ================ Implementations of feature tracking methods. .. automodule:: pysteps.tracking.interface .. automodule:: pysteps.tracking.lucaskanade .. automodule:: pysteps.tracking.tdating ================================================ FILE: doc/source/pysteps_reference/utils.rst ================================================ ============= pysteps.utils ============= Implementation of miscellaneous utility functions. .. automodule:: pysteps.utils.interface .. automodule:: pysteps.utils.arrays .. automodule:: pysteps.utils.cleansing .. automodule:: pysteps.utils.conversion .. automodule:: pysteps.utils.dimension .. automodule:: pysteps.utils.fft .. automodule:: pysteps.utils.images .. automodule:: pysteps.utils.interpolate .. automodule:: pysteps.utils.pca .. automodule:: pysteps.utils.reprojection .. automodule:: pysteps.utils.spectral .. automodule:: pysteps.utils.tapering .. automodule:: pysteps.utils.transformation ================================================ FILE: doc/source/pysteps_reference/verification.rst ================================================ ==================== pysteps.verification ==================== Methods for verification of deterministic, probabilistic and ensemble forecasts. .. automodule:: pysteps.verification.interface .. automodule:: pysteps.verification.detcatscores .. automodule:: pysteps.verification.detcontscores .. automodule:: pysteps.verification.ensscores .. automodule:: pysteps.verification.lifetime .. automodule:: pysteps.verification.plots .. automodule:: pysteps.verification.probscores .. automodule:: pysteps.verification.salscores .. automodule:: pysteps.verification.spatialscores ================================================ FILE: doc/source/pysteps_reference/visualization.rst ================================================ ===================== pysteps.visualization ===================== Methods for plotting precipitation and motion fields. .. automodule:: pysteps.visualization.animations .. automodule:: pysteps.visualization.basemaps .. automodule:: pysteps.visualization.motionfields .. automodule:: pysteps.visualization.precipfields .. automodule:: pysteps.visualization.spectral .. automodule:: pysteps.visualization.thunderstorms .. automodule:: pysteps.visualization.utils ================================================ FILE: doc/source/references.bib ================================================ @TECHREPORT{BPS2004, AUTHOR = "N. E. Bowler and C. E. Pierce and A. W. Seed", TITLE = "{STEPS}: A probabilistic precipitation forecasting scheme which merges an extrapolation nowcast with downscaled {NWP}", INSTITUTION = "UK Met Office", TYPE = "Forecasting Research Technical Report", NUMBER = 433, ADDRESS = "Wallingford, United Kingdom", YEAR = 2004, } @ARTICLE{BPS2006, AUTHOR = "N. E. Bowler and C. E. Pierce and A. W. Seed", TITLE = "{STEPS}: A probabilistic precipitation forecasting scheme which merges an extrapolation nowcast with downscaled {NWP}", JOURNAL = "Quarterly Journal of the Royal Meteorological Society", VOLUME = 132, NUMBER = 620, PAGES = "2127--2155", YEAR = 2006, DOI = "10.1256/qj.04.100" } @ARTICLE{BS2007, AUTHOR = "J. Br{\"o}cker and L. A. Smith", TITLE = "Increasing the Reliability of Reliability Diagrams", JOURNAL = "Weather and Forecasting", VOLUME = 22, NUMBER = 3, PAGES = "651--661", YEAR = 2007, DOI = "10.1175/WAF993.1" } @BOOK{CP2002, AUTHOR = "A. Clothier and G. Pegram", TITLE = "Space-time modelling of rainfall using the string of beads model: integration of radar and raingauge data", SERIES = "WRC Report No. 1010/1/02", PUBLISHER = "Water Research Commission", ADDRESS = "Durban, South Africa", YEAR = 2002 } @ARTICLE{CRS2004, AUTHOR = "B. Casati and G. Ross and D. B. Stephenson", TITLE = "A New Intensity-Scale Approach for the Verification of Spatial Precipitation Forecasts", VOLUME = 11, NUMBER = 2, JOURNAL = "Meteorological Applications", PAGES = "141-–154", YEAR = 2004, DOI = "10.1017/S1350482704001239" } @ARTICLE{DOnofrio2014, TITLE = "Stochastic rainfall downscaling of climate models", AUTHOR = "D'Onofrio, D and Palazzi, E and von Hardenberg, J and Provenzale, A and Calmanti, S", JOURNAL = "J. Hydrometeorol.", PUVLISHER = "American Meteorological Society", VOLUME = 15, NUMBER = 2, PAGES = "830--843", YEAR = 2014, } @ARTICLE{EWWM2013, AUTHOR = "E. Ebert and L. Wilson and A. Weigel and M. Mittermaier and P. Nurmi and P. Gill and M. Göber and S. Joslyn and B. Brown and T. Fowler and A. Watkins", TITLE = "Progress and challenges in forecast verification", JOURNAL = "Meteorological Applications", VOLUME = 20, NUMBER = 2, PAGES = "130--139", YEAR = 2013, DOI = "10.1002/met.1392" } @ARTICLE{Feldmann2021, AUTHOR = "M. Feldmann and U. Germann and M. Gabella and A. Berne", TITLE = "A Characterisation of Alpine Mesocyclone Occurrence", JOURNAL = "Weather and Climate Dynamics Discussions", PAGES = "1--26", URL = "https://wcd.copernicus.org/preprints/wcd-2021-53/", DOI = "10.5194/wcd-2021-53", YEAR = 2021 } @ARTICLE{FSNBG2019, AUTHOR = "Foresti, L. and Sideris, I.V. and Nerini, D. and Beusch, L. and Germann, U.", TITLE = "Using a 10-Year Radar Archive for Nowcasting Precipitation Growth and Decay: A Probabilistic Machine Learning Approach", JOURNAL = "Weather and Forecasting", VOLUME = 34, PAGES = "1547--1569", YEAR = 2019, DOI = "10.1175/WAF-D-18-0206.1" } @ARTICLE{FNPC2020, AUTHOR = "Franch, G. and Nerini, D. and Pendesini, M. and Coviello, L. and Jurman, G. and Furlanello, C.", TITLE = "Precipitation Nowcasting with Orographic Enhanced Stacked Generalization: Improving Deep Learning Predictions on Extreme Events", JOURNAL = "Atmosphere", VOLUME = 11, NUMBER = 3, PAGES = "267", YEAR = 2020, DOI = "10.3390/atmos11030267" } @ARTICLE{FW2005, AUTHOR = "N. I. Fox and C. K. Wikle", TITLE = "A Bayesian Quantitative Precipitation Nowcast Scheme", JOURNAL = "Weather and Forecasting", VOLUME = 20, NUMBER = 3, PAGES = "264--275", YEAR = 2005 } @ARTICLE{GZ2002, AUTHOR = "U. Germann and I. Zawadzki", TITLE = "Scale-Dependence of the Predictability of Precipitation from Continental Radar Images. {P}art {I}: Description of the Methodology", JOURNAL = "Monthly Weather Review", VOLUME = 130, NUMBER = 12, PAGES = "2859--2873", YEAR = 2002, DOI = "10.1175/1520-0493(2002)130<2859:SDOTPO>2.0.CO;2" } @ARTICLE{GZ2004, AUTHOR = "U. Germann and I. Zawadzki", TITLE = "Scale-Dependence of the Predictability of Precipitation from Continental Radar Images. {P}art {II}: Probability Forecasts", JOURNAL = "Journal of Applied Meteorology", VOLUME = 43, NUMBER = 1, PAGES = "74--89", YEAR = 2004, DOI = "10.1175/1520-0450(2004)043<0074:SDOTPO>2.0.CO;2" } @ARTICLE{Her2000, AUTHOR = "H. Hersbach", TITLE = "Decomposition of the Continuous Ranked Probability Score for Ensemble Prediction Systems", JOURNAL = "Weather and Forecasting", VOLUME = 15, NUMBER = 5, PAGES = "559--570", YEAR = 2000, DOI = "10.1175/1520-0434(2000)015<0559:DOTCRP>2.0.CO;2" } @article{Hwang2015, AUTHOR = "Hwang, Yunsung and Clark, Adam J and Lakshmanan, Valliappa and Koch, Steven E", TITLE = "Improved nowcasts by blending extrapolation and model forecasts", JOURNAL = "Weather and Forecasting", VOLUME = 30, NUMBER = 5, PAGES = "1201--1217", YEAR = 2015, DOI = "10.1175/WAF-D-15-0057.1" } @ARTICLE{LZ1995, AUTHOR = "S. Laroche and I. Zawadzki", TITLE = "Retrievals of Horizontal Winds from Single-Doppler Clear-Air Data by Methods of Cross Correlation and Variational Analysis", JOURNAL = "Journal of Atmospheric and Oceanic Technology", VOLUME = 12, NUMBER = 4, PAGES = "721--738", YEAR = 1995, DOI = "10.1175/1520-0426(1995)012<0721:ROHWFS>2.0.CO;2", } @ARTICLE{NBSG2017, AUTHOR = "D. Nerini and N. Besic and I. Sideris and U. Germann and L. Foresti", TITLE = "A non-stationary stochastic ensemble generator for radar rainfall fields based on the short-space {F}ourier transform", JOURNAL = "Hydrology and Earth System Sciences", VOLUME = 21, NUMBER = 6, YEAR = 2017, PAGES = "2777--2797", DOI = "10.5194/hess-21-2777-2017" } @ARTICLE{PCH2018, AUTHOR = "S. Pulkkinen and V. Chandrasekar and A.-M. Harri", TITLE = "Nowcasting of Precipitation in the High-Resolution {D}allas-{F}ort {W}orth ({DFW}) Urban Radar Remote Sensing Network", JOURNAL = "IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing", VOLUME = 11, NUMBER = 8, PAGES = "2773--2787", YEAR = 2018, DOI = "10.1109/JSTARS.2018.2840491" } @ARTICLE{PCH2019a, AUTHOR = "S. Pulkkinen and V. Chandrasekar and A.-M. Harri", TITLE = "Fully Spectral Method for Radar-Based Precipitation Nowcasting", JOURNAL = "IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing", VOLUME = 12, NUMBER = 5, PAGES = "1369-1382", YEAR = 2018 } @ARTICLE{PCH2019b, AUTHOR = "S. Pulkkinen and V. Chandrasekar and A.-M. Harri", TITLE = "Stochastic Spectral Method for Radar-Based Probabilistic Precipitation Nowcasting", JOURNAL = "Journal of Atmospheric and Oceanic Technology", VOLUME = 36, NUMBER = 6, PAGES = "971--985", YEAR = 2019 } @ARTICLE{PCLH2020, AUTHOR = "S. Pulkkinen and V. Chandrasekar and A. von Lerber and A.-M. Harri", TITLE = "Nowcasting of Convective Rainfall Using Volumetric Radar Observations", JOURNAL = "IEEE Transactions on Geoscience and Remote Sensing", DOI = "10.1109/TGRS.2020.2984594", PAGES = "1--15", YEAR = 2020 } @ARTICLE{PCN2021, AUTHOR = "S. Pulkkinen and V. Chandrasekar and T. Niemi", TITLE = "Lagrangian Integro-Difference Equation Model for Precipitation Nowcasting", JOURNAL = "Journal of Atmospheric and Oceanic Technology", NOTE = "submitted", YEAR = 2021 } @INCOLLECTION{PGPO1994, AUTHOR = "M. Proesmans and L. van Gool and E. Pauwels and A. Oosterlinck", TITLE = "Determination of optical flow and its discontinuities using non-linear diffusion", BOOKTITLE = "Computer Vision — ECCV '94", VOLUME = 801, SERIES = "Lecture Notes in Computer Science", EDITOR = "J.-O. Eklundh", PUBLISHER = "Springer Berlin Heidelberg", PAGES = "294--304", YEAR = 1994 } @ARTICLE{RC2011, AUTHOR = "E. Ruzanski and V. Chandrasekar", JOURNAL = "IEEE Transactions on Geoscience and Remote Sensing", TITLE = "Scale Filtering for Improved Nowcasting Performance in a High-Resolution {X}-Band Radar Network", VOLUME = 49, NUMBER = 6, PAGES="2296--2307", MONTH = "June", YEAR=2011 } @ARTICLE{Ravuri2021, AUTHOR = "Ravuri, Suman and Lenc, Karel and Willson, Matthew and Kangin, Dmitry and Lam, Remi and Mirowski, Piotr and Fitzsimons, Megan and Athanassiadou, Maria and Kashem, Sheleem and Madge, Sam and Prudden, Rachel and Mandhane, Amol and Clark, Aidan and Brock, Andrew and Simonyan, Karen and Hadsell, Raia and Robinson, Niall and Clancy, Ellen and Arribas, Alberto and Mohamed, Shakir", JOURNAL = "Nature", TITLE = "Skilful precipitation nowcasting using deep generative models of radar", VOLUME = 597, NUMBER = 7878, PAGES = "672--677", YEAR = 2011, DOI = "10.1038/s41586-021-03854-z", } @ARTICLE{RCW2011, AUTHOR = "E. Ruzanski and V. Chandrasekar and Y. Wang", TITLE = "The {CASA} Nowcasting System", JOURNAL = "Journal of Atmospheric and Oceanic Technology", VOLUME = 28, NUMBER = 5, PAGES = "640--655", YEAR = 2011, DOI = "10.1175/2011JTECHA1496.1" } @ARTICLE{RL2008, AUTHOR = "N. M. Roberts and H. W. Lean", TITLE = "Scale-Selective Verification of Rainfall Accumulations from High-Resolution Forecasts of Convective Events", JOURNAL = "Monthly Weather Review", VOLUME = 136, NUMBER = 1, PAGES = "78--97", YEAR = 2008, DOI = "10.1175/2007MWR2123.1" } @ARTICLE{Rebora2006, AUTHOR = "N. Rebora and L. Ferraris and J. von Hardenberg and A. Provenzale", TITLE = "RainFARM: Rainfall Downscaling by a Filtered Autoregressive Model", JOURNAL = "Journal of Hydrometeorology", VOLUME = 7, NUMBER = 4, PAGES = "724-738", YEAR = 2006, DOI = "10.1175/JHM517.1" } @ARTICLE{Seed2003, AUTHOR = "A. W. Seed", TITLE = "A Dynamic and Spatial Scaling Approach to Advection Forecasting", JOURNAL = "Journal of Applied Meteorology", VOLUME = 42, NUMBER = 3, PAGES = "381-388", YEAR = 2003, DOI = "10.1175/1520-0450(2003)042<0381:ADASSA>2.0.CO;2" } @ARTICLE{SPN2013, AUTHOR = "A. W. Seed and C. E. Pierce and K. Norman", TITLE = "Formulation and evaluation of a scale decomposition-based stochastic precipitation nowcast scheme", JOURNAL = "Water Resources Research", VOLUME = 49, NUMBER = 10, PAGES = "6624--6641", YEAR = 2013, DOI = "10.1002/wrcr.20536" } @Article{Terzago2018, AUTHOR = "Terzago, S. and Palazzi, E. and von Hardenberg, J.", TITLE = "Stochastic downscaling of precipitation in complex orography: a simple method to reproduce a realistic fine-scale climatology", JOURNAL = "Natural Hazards and Earth System Sciences", VOLUME = 18, YEAR = 2018, NUMBER = 11, PAGES = "2825--2840", DOI = "10.5194/nhess-18-2825-2018" } @ARTICLE{TRT2004, AUTHOR = "A. M. Hering and C. Morel and G. Galli and P. Ambrosetti and M. Boscacci", TITLE = "Nowcasting thunderstorms in the Alpine Region using a radar based adaptive thresholding scheme", JOURNAL = "Proceedings of ERAD Conference 2004", NUMBER = "January", PAGES = "206--211", YEAR = 2004 } @ARTICLE{WHZ2009, AUTHOR = "Heini Wernli and Christiane Hofmann and Matthias Zimmer", TITLE = "Spatial Forecast Verification Methods Intercomparison Project: Application of the SAL Technique", JOURNAL = "Weather and Forecasting", NUMBER = "6", VOLUME = "24", PAGES = "1472 - 14847", YEAR = 2009 } @ARTICLE{WPHF2008, AUTHOR = "Heini Wernli and Marcus Paulat and Martin Hagen and Christoph Frei", TITLE = "SAL—A Novel Quality Measure for the Verification of Quantitative Precipitation Forecasts", JOURNAL = "Monthly Weather Review", NUMBER = "11", VOLUME = "136", PAGES = "4470 - 4487", YEAR = 2008 } @ARTICLE{XWF2005, AUTHOR = "K. Xu and C. K Wikle and N. I. Fox", TITLE = "A Kernel-Based Spatio-Temporal Dynamical Model for Nowcasting Weather Radar Reflectivities", JOURNAL = "Journal of the American Statistical Association", VOLUME = 100, NUMBER = 472, PAGES = "1133--1144", YEAR = 2005 } @ARTICLE{ZR2009, AUTHOR = "P. Zacharov and D. Rezacova", TITLE = "Using the fractions skill score to assess the relationship between an ensemble {QPF} spread and skill", JOURNAL = "Atmospheric Research", VOLUME = 94, NUMBER = 4, PAGES = "684--693", YEAR = 2009, DOI = "10.1016/j.atmosres.2009.03.004" } @ARTICLE{Imhoff2023, AUTHOR = "R.O. Imhoff and L. De Cruz and W. Dewettinck and C.C. Brauer and R. Uijlenhoet and K-J. van Heeringen and C. Velasco-Forero and D. Nerini and M. Van Ginderachter and A.H. Weerts", TITLE = "Scale-dependent blending of ensemble rainfall nowcasts and {NWP} in the open-source pysteps library", JOURNAL = "Quarterly Journal of the Royal Meteorological Society", VOLUME = 149, NUMBER = 753, PAGES = "1--30", YEAR = 2023, DOI = "10.1002/qj.4461" } @ARTICLE{Nerini2019MWR, title = {A {Reduced}-{Space} {Ensemble} {Kalman} {Filter} {Approach} for {Flow}-{Dependent} {Integration} of {Radar} {Extrapolation} {Nowcasts} and {NWP} {Precipitation} {Ensembles}}, volume = {147}, doi = {10.1175/MWR-D-18-0258.1}, number = {3}, journal = {Monthly Weather Review}, author = {D. Nerini and L. Foresti and D. Leuenberger and S. Robert and U. Germann}, year = {2019}, pages = {987--1006}, } ================================================ FILE: doc/source/user_guide/example_data.rst ================================================ .. _example_data: Installing the example data =========================== The examples scripts in the user guide, as well as the build-in tests, use the example radar data available in a separate repository: `pysteps-data `_. The easiest way to install the example data is by using the :func:`~pysteps.datasets.download_pysteps_data` and :func:`~pysteps.datasets.create_default_pystepsrc` functions from the :mod:`pysteps.datasets` module. Installation using the datasets module ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Below is a snippet code that can be used to install can configure `pystepsrc` file to point to that example data. In the example below, the example data is placed in the user's home folder under the **pysteps_data** directory. It also creates a default configuration file that points to the downloaded data and places it in the $HOME/.pysteps (Unix and Mac OS X) or $USERPROFILE/pysteps (Windows). This is one of the default locations where pysteps looks for the configuration file (see :ref:`pysteps_lookup` for more information). .. code-block:: python import os # Import the helper functions from pysteps.datasets import download_pysteps_data, create_default_pystepsrc # In this example we will place it in the user's home folder on the # `pysteps_data` folder. home_dir = os.path.expanduser("~") pysteps_data_dir_path = os.path.join(home_dir, "pysteps_data") # Download the pysteps data. download_pysteps_data(pysteps_data_dir_path, force=True) # Create a default configuration file that points to the downloaded data. # By default it will place the configuration file in the # $HOME/.pysteps (unix and Mac OS X) or $USERPROFILE/pysteps (windows). config_file_path = create_default_pystepsrc(pysteps_data_dir_path) Note that for these changes to take effect you need to restart the python interpreter or use the :func:`pysteps.load_config_file` function as follows:: # Load the new configuration file and replace the default configuration import pysteps pysteps.load_config_file(config_file_path, verbose=True) To customize the default configuration file see the :ref:`pystepsrc` section. Manual installation ~~~~~~~~~~~~~~~~~~~ Another alternative is to download the data manually into your computer and configure the :ref:`pystepsrc ` file to point to that example data. First, download the data from the repository by `clicking here `_. Unzip the data into a folder of your preference. Once the data is unzipped, the directory structure looks like this:: pysteps-data | ├── radar ├── KNMI ├── OPERA ├── bom ├── dwd ├── fmi ├── mch The next step is updating the *pystepsrc* file to point to these directories, as described in the :ref:`pystepsrc` section. ================================================ FILE: doc/source/user_guide/install_pysteps.rst ================================================ .. _install_pysteps: Installing pysteps ================== Dependencies ------------ The pysteps package needs the following dependencies * `python >=3.11, <3.14 `_ (lower or higher versions may work but are not tested). * `jsonschema `_ * `matplotlib `_ * `netCDF4 `_ * `numpy `_ * `opencv `_ * `pillow `_ * `pyproj `_ * `scipy `_ Additionally, the following packages can be installed for better computational efficiency: * `dask `_ and `toolz `_ (for code parallelization) * `pyfftw `_ (for faster FFT computation) Other optional dependencies include: * `cartopy >=0.18 `_ (for geo-referenced visualization) * `h5py `_ (for importing HDF5 data) * `pygrib `_ (for importing MRMS data) * `gdal `_ (for importing GeoTIFF data) * `pywavelets `_ (for intensity-scale verification) * `pandas `_ and `scikit-image >=0.19 `_ (for advanced feature detection methods) * `rasterio `_ (for the reprojection module) * `scikit-learn >=1.7 `_ (for PCA-based blending methods) **Important**: If you only want to use pysteps, you can continue reading below. But, if you want to contribute to pysteps or edit the package, you need to install pysteps in development mode: :ref:`Contributing to pysteps `. Install with conda/mamba (recommended) -------------------------------------- `Conda `_ is an open-source package management system and environment management system that runs on Windows, macOS, and Linux. `Mamba `_ is a drop-in replacement for conda offering better performances and more reliable environment solutions. Mamba quickly installs, runs, and updates packages and their dependencies. It also allows you to easily create, save, load, or switch between different environments on your local computer. Since version 1.0, pysteps is available on `conda-forge `_, a community-driven package repository for conda packages. To install pysteps with mamba in a new environment, run in a terminal:: mamba create -n pysteps python=3.11 mamba activate pysteps This will create and activate the new python environment called 'pysteps' using python 3.11. The next step is to add the conda-forge channel where the pysteps package is located:: conda config --env --prepend channels conda-forge Let's set this channel as the priority one:: conda config --env --set channel_priority strict The latter step is not strictly necessary but is recommended since the conda-forge and the default conda channels are not 100% compatible. Finally, to install pysteps and all its dependencies run:: mamba install pysteps Install pysteps on Apple Silicon Macs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ On conda-forge, pysteps is currently compiled for Mac computers with Intel processors (osx-64). However, thanks to `Rosetta 2 `_ it is possible to install the same package on a Mac computers with an Apple Silicon processor (arm-64). First, make sure that Rosetta 2 is installed:: softwareupdate --install-rosetta Use mamba to create a new environment called 'pysteps' for intel packages with python 3.11:: CONDA_SUBDIR=osx-64 mamba create -n pysteps python=3.11 mamba activate pysteps Make sure that conda/mamba commands in this environment use intel packages:: conda config --env --set subdir osx-64 Verify that the correct platform is being used:: python -c "import platform;print(platform.machine())" # Should print "x86_64" Finally, run the same pysteps install instructions as given above:: conda config --env --prepend channels conda-forge conda config --env --set channel_priority strict mamba install pysteps We can now verify that pysteps loads correctly:: python -c "import pysteps" Note that the first time that pysteps is imported will typically take longer, as Rosetta 2 needs to translate the binary code for the Apple Silicon processor. Install from source ------------------- The recommended way to install pysteps from the source is using ``pip`` to adhere to the `PEP517 standards `_. Using ``pip`` instead of ``setup.py`` guarantees that all the package dependencies are properly handled during the installation process. OSX users: gcc compiler ~~~~~~~~~~~~~~~~~~~~~~~ pySTEPS uses Cython extensions that need to be compiled with multi-threading support enabled. The default Apple Clang compiler does not support OpenMP. Hence, using the default compiler would have disabled multi-threading and may raise the following error during the installation:: clang: error: unsupported option '-fopenmp' error: command 'gcc' failed with exit status 1 To solve this issue, obtain the latest gcc version with Homebrew_ that has multi-threading enabled:: brew install gcc@13 .. _Homebrew: https://brew.sh/ To make sure that the installer uses the homebrew's gcc, export the following environmental variables in the terminal (supposing that gcc version 13 was installed):: export CC=gcc-13 export CXX=g++-13 First, check that the homebrew's gcc is detected:: which gcc-13 This should point to the homebrew's gcc installation. Under certain circumstances, Homebrew_ does not add the symbolic links for the gcc executables under /usr/local/bin. If that is the case, specify the CC and CCX variables using the full path to the homebrew installation. For example:: export CC=/usr/local/Cellar/gcc/13.2.0/bin/gcc-13 export CXX=/usr/local/Cellar/gcc/13.2.0/bin/g++-13 Then, you can continue with the normal installation procedure described next. Installation using pip ~~~~~~~~~~~~~~~~~~~~~~ The latest pysteps version in the repository can be installed using pip by simply running in a terminal:: pip install git+https://github.com/pySTEPS/pysteps Or, from a local copy of the repo:: git clone https://github.com/pySTEPS/pysteps cd pysteps pip install . The above commands install the latest version of the **master** branch, which is continuously under development. .. warning:: If you are installing pysteps from the sources using pip, the Python interpreter must be launched outside of the pysteps root directory. Importing pysteps from a working directory that contains the pysteps source code will raise a ``ModuleNotFoundError``. This error is caused by the root pysteps folder being recognized as the pysteps package, also known as `the double import trap `_. Setting up the user-defined configuration file ---------------------------------------------- The pysteps package allows the users to customize the default settings and configuration. The configuration parameters used by default are loaded from a user-defined `JSON `_ file and then stored in the **pysteps.rcparams**, a dictionary-like object that can be accessed as attributes or as items. .. toctree:: :maxdepth: 1 Set-up the user-defined configuration file Example pystepsrc file .. _import_pysteps: Final test: import pysteps in Python ------------------------------------ Activate the pysteps environment:: conda activate pysteps Launch Python and import pysteps:: python >>> import pysteps ================================================ FILE: doc/source/user_guide/machine_learning_pysteps.rst ================================================ .. _machine_learning_pysteps: Benchmarking machine learning models with pysteps ================================================= How to correctly compare the accuracy of machine learning against traditional nowcasting methods available in pysteps? Before starting the comparison, you need to ask yourself what is the objective of nowcasting: #. Do you only want to minimize prediction errors? #. Do you also want to represent the prediction uncertainty? To achieve objective 1, it is sufficient to produce a single deterministic nowcast that filters out the unpredictable small-scale precipitation features. However, this will create a nowcast that will become increasingly smooth over time. To achieve objective 2, you need to produce a probabilistic or an ensemble nowcast (several ensemble members or realizations). In weather forecasting (and nowcasting), we usually want to achieve both goals because it is impossible to predict the evolution of a chaotic system with 100% accuracy, especially space-time precipitation fields and thunderstorms! Machine learning and pysteps offer several methods to produce both deterministic and probabilistic nowcasts. Therefore, if you want to compare machine learning-based nowcasts to simpler extrapolation-based models, you need to select the right method and verification measure. 1. Deterministic nowcasting -------------------------------------------- Deterministic nowcasts can be divided into: a. Variance-preserving nowcasts, such as extrapolation nowcasts by Eulerian and Lagrangian persistence. b. Error-minimization nowcasts, such as machine learning, Fourier-filtered and ensemble mean nowcasts. **Very important**: these two types of deterministic nowcasts are not directly comparable because they have a different variance! This is best explained by the decomposition of the mean squared error (MSE): :math:`MSE = bias^2 + Var` All deterministic machine learning algorithms that minimize the MSE (or a related measure) will also inevitably minimize the variance of nowcast fields. This is a natural attempt to filter out the unpredictable evolution of precipitation features, which would otherwise increase the variance (and the MSE). The same principle holds for convolutional and/or deep neural network architectures, which also produce smooth nowcasts. Therefore, it is better to avoid directly comparing an error-minimization machine learning nowcast to a variance-preserving radar extrapolation, as produced by the module :py:mod:`pysteps.nowcasts.extrapolation`. Instead, you should use compare with the mean of a sufficiently large ensemble. A deterministic equivalent of the ensemble mean can be approximated using the modules :py:mod:`pysteps.nowcasts.sprog` or :py:mod:`pysteps.nowcasts.anvil`. Another possibility, but more computationally demanding, is to average many ensemble members generated by the modules :py:mod:`pysteps.nowcasts.steps` or :py:mod:`pysteps.nowcasts.linda`. Still, even by using the pysteps ensemble mean, it is not given that its variance will be the same as the one of machine learning predictions. Possible solutions to this: #. use a normalized MSE (NMSE) or another score accounting for differences in the variance between prediction and observation. #. decompose the field with a Fourier (or wavelet) transform to compare features at the same spatial scales. A good deterministic comparison of a deep convolutional machine learning neural network nowcast and pysteps is given in :cite:`FNPC2020`. 2. Probabilistic nowcasting -------------------------------------------- Probabilistic machine learning regression methods can be roughly categorized into: a. Quantile-based methods, such as quantile regression, quantile random forests, and quantile neural networks. b. Ensemble-based methods, such as generative adversarial networks (GANs) and variational auto-encoders (VAEs). Quantile-based machine learning nowcasts are interesting, but can only estimate the probability of exceedance at a given point (see e.g. :cite:`FSNBG2019`). To estimate areal exceedance probabilities, for example above catchments, or to propagate the nowcast uncertainty into hydrological models, the full ensemble still needs to be generated, e.g. with generative machine learning models. Generative machine learning methods are similar to the pysteps ensemble members. Both are designed to produce an ensemble of possible realizations that preserve the variance of observed radar fields. A proper probabilistic verification of generative machine learning models against pysteps is an interesting research direction which was recently undertake in the work of :cite:`Ravuri2021`. Summary ------- The table below is an attempt to classify machine learning and pysteps nowcasting methods according to the four main prediction types: #. Deterministic (variance-preserving), like one control NWP forecast #. Deterministic (error-minimization), like an ensemble mean NWP forecast #. Probabilistic (quantile-based), like a probabilistic NWP forecast (without members) #. Probabilistic (ensemble-based), like the members of an ensemble NWP forecast The comparison of methods from different types should only be done carefully and with good reasons. .. list-table:: :widths: 30 20 20 20 :header-rows: 1 * - Nowcast type - Machine learning - pysteps - Verification * - Deterministic (variance-preserving) - SRGAN, Others? - :py:mod:`pysteps.nowcasts.extrapolation` (any optical flow method) - MSE, RMSE, MAE, ETS, etc * - Deterministic (error-minimization) - Classical ANNs, (deep) CNNs, random forests, AdaBoost, etc - :py:mod:`pysteps.nowcasts.sprog`, :py:mod:`pysteps.nowcasts.anvil` or ensemble mean of :py:mod:`pysteps.nowcasts.steps`/:py:mod:`~pysteps.nowcasts.linda` - MSE, RMSE, MAE, ETS, etc or better normalized scores, etc * - Probabilistic (quantile-based) - Quantile ANN, quantile random forests, quantile regression - :py:mod:`pysteps.nowcasts.lagrangian_probability` or probabilities derived from :py:mod:`pysteps.nowcasts.steps`/:py:mod:`~pysteps.nowcasts.linda` - Reliability diagram (predicted vs observed quantile), probability integral transform (PIT) histogram * - Probabilistic (ensemble-based) - GANs (:cite:`Ravuri2021`), VAEs, etc - Ensemble and probabilities derived from :py:mod:`pysteps.nowcasts.steps`/:py:mod:`~pysteps.nowcasts.linda` - Probabilistic verification: reliability diagrams, continuous ranked probability scores (CRPS), etc. Ensemble verification: rank histograms, spread-error relationships, etc ================================================ FILE: doc/source/user_guide/pystepsrc_example.rst ================================================ .. _pystepsrc_example: Example of pystepsrc file ========================= Below you can find the default pystepsrc file. The lines starting with "//" are comments and they are ignored. .. code:: // pysteps configuration { // "silent_import" : whether to suppress the initial pysteps message "silent_import": false, "outputs": { // path_outputs : path where to save results (figures, forecasts, etc) "path_outputs": "./" }, "plot": { // "motion_plot" : "streamplot" or "quiver" "motion_plot": "quiver", // "colorscale" : "BOM-RF3", "pysteps" or "STEPS-BE" "colorscale": "pysteps" }, "data_sources": { "bom": { "root_path": "./radar/bom", "path_fmt": "prcp-cscn/2/%Y/%m/%d", "fn_pattern": "2_%Y%m%d_%H%M00.prcp-cscn", "fn_ext": "nc", "importer": "bom_rf3", "timestep": 6, "importer_kwargs": { "gzipped": true } }, "fmi": { "root_path": "./radar/fmi", "path_fmt": "%Y%m%d", "fn_pattern": "%Y%m%d%H%M_fmi.radar.composite.lowest_FIN_SUOMI1", "fn_ext": "pgm.gz", "importer": "fmi_pgm", "timestep": 5, "importer_kwargs": { "gzipped": true } }, "mch": { "root_path": "./radar/mch", "path_fmt": "%Y%m%d", "fn_pattern": "AQC%y%j%H%M?_00005.801", "fn_ext": "gif", "importer": "mch_gif", "timestep": 5, "importer_kwargs": { "product": "AQC", "unit": "mm", "accutime": 5 } }, "opera": { "root_path": "./radar/OPERA", "path_fmt": "%Y%m%d", "fn_pattern": "T_PAAH21_C_EUOC_%Y%m%d%H%M%S", "fn_ext": "hdf", "importer": "opera_hdf5", "timestep": 15, "importer_kwargs": {} }, "knmi": { "root_path": "./radar/KNMI", "path_fmt": "%Y/%m", "fn_pattern": "RAD_NL25_RAP_5min_%Y%m%d%H%M", "fn_ext": "h5", "importer": "knmi_hdf5", "timestep": 5, "importer_kwargs": { "accutime": 5, "qty": "ACRR", "pixelsize": 1000.0 } }, "saf": { "root_path": "./saf", "path_fmt": "%Y%m%d/CRR", "fn_pattern": "S_NWC_CRR_MSG4_Europe-VISIR_%Y%m%dT%H%M00Z", "fn_ext": "nc", "importer": "saf_crri", "timestep": 15, "importer_kwargs": { "gzipped": true } } } } ================================================ FILE: doc/source/user_guide/set_pystepsrc.rst ================================================ .. _pystepsrc: The pysteps configuration file (pystepsrc) ========================================== .. _JSON: https://en.wikipedia.org/wiki/JSON The pysteps package allows the users to customize the default settings and configuration. The configuration parameters used by default are loaded from a user-defined JSON_ file and then stored in `pysteps.rcparams`, a dictionary-like object that can be accessed as attributes or as items. For example, the default parameters can be obtained using any of the following ways:: import pysteps # Retrieve the colorscale for plots colorscale = pysteps.rcparams['plot']['colorscale'] colorscale = pysteps.rcparams.plot.colorscale # Retrieve the the root directory of the fmi data pysteps.rcparams['data_sources']['fmi']['root_path'] pysteps.rcparams.data_sources.fmi.root_path A less wordy alternative:: from pysteps import rcparams colorscale = rcparams['plot']['colorscale'] colorscale = rcparams.plot.colorscale fmi_root_path = rcparams['data_sources']['fmi']['root_path'] fmi_root_path = rcparams.data_sources.fmi.root_path .. _pysteps_lookup: Configuration file lookup ~~~~~~~~~~~~~~~~~~~~~~~~~ When the pysteps package imported, it looks for **pystepsrc** file in the following order: - **$PWD/pystepsrc** : Looks for the file in the current directory - **$PYSTEPSRC** : If the system variable $PYSTEPSRC is defined and it points to a file, it is used. - **$PYSTEPSRC/pystepsrc** : If $PYSTEPSRC points to a directory, it looks for the pystepsrc file inside that directory. - **$HOME/.pysteps/pystepsrc** (Unix and Mac OS X) : If the system variable $HOME is defined, it looks for the configuration file in this path. - **%USERPROFILE%\\pysteps\\pystepsrc** (Windows only): It looks for the configuration file in the pysteps directory located user's home directory (indicated by the %USERPROFILE% system variable). - Lastly, it looks inside the library in *pysteps\\pystepsrc* for a system-defined copy. The recommended method to setup the configuration files is to edit a copy of the default **pystepsrc** file that is distributed with the package and place that copy inside the user home folder. See the instructions below. Setting up the user-defined configuration file ---------------------------------------------- Linux and OSX users ~~~~~~~~~~~~~~~~~~~ For Linux and OSX users, the recommended way to customize the pysteps configuration is placing the pystepsrc parameters file in the users home folder ${HOME} in the following path: **${HOME}/.pysteps/pystepsrc** To steps to setup up the configuration file in the home directory first, we need to create the directory if it does not exist. In a terminal, run:: $ mkdir -p ${HOME}/.pysteps The next step is to find the location of the library's default pystepsrc file. When we import pysteps in a python interpreter, the configuration file loaded is shown:: import pysteps "Pysteps configuration file found at: /path/to/pysteps/library/pystepsrc" Then we copy the library's default configuration file to that directory:: $ cp /path/to/pysteps/library/pystepsrc ${HOME}/.pysteps/pystepsrc Edit the file with the text editor of your preference and change the default configurations with your preferences. Finally, check that the correct configuration file is loaded by the library:: import pysteps "Pysteps configuration file found at: /home/user_name/.pysteps/pystepsrc" Windows ~~~~~~~ For windows users, the recommended way to customize the pysteps configuration is placing the pystepsrc parameters file in the users' folder (defined in the %USERPROFILE% environment variable) in the following path: **%USERPROFILE%\\pysteps\\pystepsrc** To setup up the configuration file in the home directory first, we need to create the directory if it does not exist. In a **windows terminal**, run:: $ mkdir %USERPROFILE%\pysteps **Important** It was reported that the %USERPROFILE% variable may be interpreted as an string literal when the anaconda terminal is used. This will result in a '%USERPROFILE%' folder being created in the current working directory instead of the desired pysteps folder in the user's home. If that is the case, use the explicit path to your home folder instead of `%USERPROFILE%`. For example:: $ mkdir C:\Users\your_username\pysteps The next step is to find the location of the library's default pystepsrc file. When we import pysteps in a python interpreter, the configuration file loaded is shown:: import pysteps "Pysteps configuration file found at: C:\path\to\pysteps\library\pystepsrc" Then we copy the library's default configuration file to that directory:: $ copy C:\path\to\pysteps\library\pystepsrc %USERPROFILE%\pysteps\pystepsrc Edit the file with the text editor of your preference and change the default configurations with your preferences. Finally, check that the correct configuration file is loaded by the library:: import pysteps "Pysteps configuration file found at: C:\User\Profile\.pysteps\pystepsrc" More ---- .. toctree:: :maxdepth: 1 Example pystepsrc file ================================================ FILE: doc/source/zz_bibliography.rst ================================================ .. _bibliography: ============ Bibliography ============ .. bibliography:: :all: ================================================ FILE: environment.yml ================================================ name: pysteps channels: - conda-forge - defaults dependencies: - python>=3.10 - jsmin - jsonschema - matplotlib - netCDF4 - numpy - opencv - pillow - pyproj - scipy ================================================ FILE: environment_dev.yml ================================================ # pysteps development environment name: pysteps_dev channels: - conda-forge - defaults dependencies: - python>=3.10 - pip - jsmin - jsonschema - matplotlib - netCDF4 - numpy - opencv - pillow - pyproj - scipy - pytest - pywavelets - cython - dask - pyfftw - h5py - PyWavelets - pygrib - black - pytest-cov - codecov - pre_commit - cartopy>=0.18 - scikit-image - scikit-learn - pandas - rasterio ================================================ FILE: examples/LK_buffer_mask.py ================================================ # -*- coding: utf-8 -*- """ Handling of no-data in Lucas-Kanade =================================== Areas of missing data in radar images are typically caused by visibility limits such as beam blockage and the radar coverage itself. These artifacts can mislead the echo tracking algorithms. For instance, precipitation leaving the domain might be erroneously detected as having nearly stationary velocity. This example shows how the Lucas-Kanade algorithm can be tuned to avoid the erroneous interpretation of velocities near the maximum range of the radars by buffering the no-data mask in the radar image in order to exclude all vectors detected nearby no-data areas. """ from datetime import datetime from matplotlib import cm, colors import matplotlib.pyplot as plt import numpy as np from pysteps import io, motion, nowcasts, rcparams, verification from pysteps.utils import conversion, transformation from pysteps.visualization import plot_precip_field, quiver ################################################################################ # Read the radar input images # --------------------------- # # First, we will import the sequence of radar composites. # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Selected case date = datetime.strptime("201607112100", "%Y%m%d%H%M") data_source = rcparams.data_sources["mch"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Find the two input files from the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=5, num_prev_files=1 ) # Read the radar composites importer = io.get_method(importer_name, "importer") R, quality, metadata = io.read_timeseries(fns, importer, **importer_kwargs) del quality # Not used ############################################################################### # Preprocess the data # ~~~~~~~~~~~~~~~~~~~ # Convert to mm/h R, metadata = conversion.to_rainrate(R, metadata) # Keep the reference frame in mm/h and its mask (for plotting purposes) ref_mm = R[0, :, :].copy() mask = np.ones(ref_mm.shape) mask[~np.isnan(ref_mm)] = np.nan # Log-transform the data [dBR] R, metadata = transformation.dB_transform(R, metadata, threshold=0.1, zerovalue=-15.0) # Keep the reference frame in dBR (for plotting purposes) ref_dbr = R[0].copy() ref_dbr[ref_dbr < -10] = np.nan # Plot the reference field plot_precip_field(ref_mm, title="Reference field") circle = plt.Circle((620, 400), 100, color="b", clip_on=False, fill=False) plt.gca().add_artist(circle) plt.show() ############################################################################### # Notice the "half-in, half-out" precipitation area within the blue circle. # As we are going to show next, the tracking algorithm can erroneously interpret # precipitation leaving the domain as stationary motion. # # Also note that the radar image includes NaNs in areas of missing data. # These are used by the optical flow algorithm to define the radar mask. # # Sparse Lucas-Kanade # ------------------- # # By setting the optional argument ``dense=False`` in ``xy, uv = dense_lucaskanade(...)``, # the LK algorithm returns the motion vectors detected by the Lucas-Kanade scheme # without interpolating them on the grid. # This allows us to better identify the presence of wrongly detected # stationary motion in areas where precipitation is leaving the domain (look # for the red dots within the blue circle in the figure below). # Get Lucas-Kanade optical flow method dense_lucaskanade = motion.get_method("LK") # Mask invalid values R = np.ma.masked_invalid(R) # Use no buffering of the radar mask fd_kwargs1 = {"buffer_mask": 0} xy, uv = dense_lucaskanade(R, dense=False, fd_kwargs=fd_kwargs1) plt.imshow(ref_dbr, cmap=plt.get_cmap("Greys")) plt.imshow(mask, cmap=colors.ListedColormap(["black"]), alpha=0.5) plt.quiver( xy[:, 0], xy[:, 1], uv[:, 0], uv[:, 1], color="red", angles="xy", scale_units="xy", scale=0.2, ) circle = plt.Circle((620, 245), 100, color="b", clip_on=False, fill=False) plt.gca().add_artist(circle) plt.title("buffer_mask = 0") plt.show() ################################################################################ # The LK algorithm cannot distinguish missing values from no precipitation, that is, # no-data are the same as no-echoes. As a result, the fixed boundaries produced # by precipitation in contact with no-data areas are interpreted as stationary motion. # One way to mitigate this effect of the boundaries is to introduce a slight buffer # of the no-data mask so that the algorithm will ignore all the portions of the # radar domain that are nearby no-data areas. # This buffer can be set by the keyword argument ``buffer_mask`` within the # feature detection optional arguments ``fd_kwargs``. # Note that by default ``dense_lucaskanade`` uses a 5-pixel buffer. # with buffer buffer = 10 fd_kwargs2 = {"buffer_mask": buffer} xy, uv = dense_lucaskanade(R, dense=False, fd_kwargs=fd_kwargs2) plt.imshow(ref_dbr, cmap=plt.get_cmap("Greys")) plt.imshow(mask, cmap=colors.ListedColormap(["black"]), alpha=0.5) plt.quiver( xy[:, 0], xy[:, 1], uv[:, 0], uv[:, 1], color="red", angles="xy", scale_units="xy", scale=0.2, ) circle = plt.Circle((620, 245), 100, color="b", clip_on=False, fill=False) plt.gca().add_artist(circle) plt.title("buffer_mask = %i" % buffer) plt.show() ################################################################################ # Dense Lucas-Kanade # ------------------ # # The above displacement vectors produced by the Lucas-Kanade method are now # interpolated to produce a full field of motion (i.e., ``dense=True``). # By comparing the velocity of the motion fields, we can easily notice # the negative bias that is introduced by the the erroneous interpretation of # velocities near the maximum range of the radars. UV1 = dense_lucaskanade(R, dense=True, fd_kwargs=fd_kwargs1) UV2 = dense_lucaskanade(R, dense=True, fd_kwargs=fd_kwargs2) V1 = np.sqrt(UV1[0] ** 2 + UV1[1] ** 2) V2 = np.sqrt(UV2[0] ** 2 + UV2[1] ** 2) plt.imshow((V1 - V2) / V2, cmap=cm.RdBu_r, vmin=-0.5, vmax=0.5) plt.colorbar(fraction=0.04, pad=0.04) plt.title("Relative difference in motion speed") plt.show() ################################################################################ # Notice how the presence of erroneous velocity vectors produces a significantly # slower motion field near the right edge of the domain. # # Forecast skill # -------------- # # We are now going to evaluate the benefit of buffering the radar mask by computing # the forecast skill in terms of the Spearman correlation coefficient. # The extrapolation forecasts are computed using the dense UV motion fields # estimated above. # Get the advection routine and extrapolate the last radar frame by 12 time steps # (i.e., 1 hour lead time) extrapolate = nowcasts.get_method("extrapolation") R[~np.isfinite(R)] = metadata["zerovalue"] R_f1 = extrapolate(R[-1], UV1, 12) R_f2 = extrapolate(R[-1], UV2, 12) # Back-transform to rain rate R_f1 = transformation.dB_transform(R_f1, threshold=-10.0, inverse=True)[0] R_f2 = transformation.dB_transform(R_f2, threshold=-10.0, inverse=True)[0] # Find the veriyfing observations in the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=5, num_next_files=12 ) # Read and convert the radar composites R_o, _, metadata_o = io.read_timeseries(fns, importer, **importer_kwargs) R_o, metadata_o = conversion.to_rainrate(R_o, metadata_o) # Compute Spearman correlation skill = verification.get_method("corr_s") score_1 = [] score_2 = [] for i in range(12): score_1.append(skill(R_f1[i, :, :], R_o[i + 1, :, :])["corr_s"]) score_2.append(skill(R_f2[i, :, :], R_o[i + 1, :, :])["corr_s"]) x = (np.arange(12) + 1) * 5 # [min] plt.plot(x, score_1, label="buffer_mask = 0") plt.plot(x, score_2, label="buffer_mask = %i" % buffer) plt.legend() plt.xlabel("Lead time [min]") plt.ylabel("Corr. coeff. []") plt.title("Spearman correlation") plt.tight_layout() plt.show() ################################################################################ # As expected, the corrected motion field produces better forecast skill already # within the first hour into the nowcast. # sphinx_gallery_thumbnail_number = 2 ================================================ FILE: examples/README.txt ================================================ .. _example_gallery: Example gallery =============== Below is a collection of example scripts and tutorials to illustrate the usage of pysteps. These scripts require the pysteps example data. See the installation instructions in the :ref:`example_data` section. ================================================ FILE: examples/advection_correction.py ================================================ """ Advection correction ==================== This tutorial shows how to use the optical flow routines of pysteps to implement the advection correction procedure described in Anagnostou and Krajewski (1999). Advection correction is a temporal interpolation procedure that is often used when estimating rainfall accumulations to correct for the shift of rainfall patterns between consecutive radar rainfall maps. This shift becomes particularly significant for long radar scanning cycles and in presence of fast moving precipitation features. .. note:: The code for the advection correction using pysteps was originally written by `Daniel Wolfensberger `_. """ from datetime import datetime import matplotlib.pyplot as plt import numpy as np from pysteps import io, motion, rcparams from pysteps.utils import conversion, dimension from pysteps.visualization import plot_precip_field from scipy.ndimage import map_coordinates ################################################################################ # Read the radar input images # --------------------------- # # First, we import a sequence of 36 images of 5-minute radar composites # that we will use to produce a 3-hour rainfall accumulation map. # We will keep only one frame every 10 minutes, to simulate a longer scanning # cycle and thus better highlight the need for advection correction. # # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Selected case date = datetime.strptime("201607112100", "%Y%m%d%H%M") data_source = rcparams.data_sources["mch"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Find the input files from the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=5, num_next_files=35 ) # Read the radar composites importer = io.get_method(importer_name, "importer") R, __, metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Convert to mm/h R, metadata = conversion.to_rainrate(R, metadata) # Upscale to 2 km (simply to reduce the memory demand) R, metadata = dimension.aggregate_fields_space(R, metadata, 2000) # Keep only one frame every 10 minutes (i.e., every 2 timesteps) # (to highlight the need for advection correction) R = R[::2] ################################################################################ # Advection correction # -------------------- # # Now we need to implement the advection correction for a pair of successive # radar images. The procedure is based on the algorithm described in Anagnostou # and Krajewski (Appendix A, 1999). # # To evaluate the advection occurred between two successive radar images, we are # going to use the Lucas-Kanade optical flow routine available in pysteps. def advection_correction(R, T=5, t=1): """ R = np.array([qpe_previous, qpe_current]) T = time between two observations (5 min) t = interpolation timestep (1 min) """ # Evaluate advection oflow_method = motion.get_method("LK") fd_kwargs = {"buffer_mask": 10} # avoid edge effects V = oflow_method(np.log(R), fd_kwargs=fd_kwargs) # Perform temporal interpolation Rd = np.zeros((R[0].shape)) x, y = np.meshgrid( np.arange(R[0].shape[1], dtype=float), np.arange(R[0].shape[0], dtype=float) ) for i in range(t, T + t, t): pos1 = (y - i / T * V[1], x - i / T * V[0]) R1 = map_coordinates(R[0], pos1, order=1) pos2 = (y + (T - i) / T * V[1], x + (T - i) / T * V[0]) R2 = map_coordinates(R[1], pos2, order=1) Rd += (T - i) * R1 + i * R2 return t / T**2 * Rd ############################################################################### # Finally, we apply the advection correction to the whole sequence of radar # images and produce the rainfall accumulation map. R_ac = R[0].copy() for i in range(R.shape[0] - 1): R_ac += advection_correction(R[i : (i + 2)], T=10, t=1) R_ac /= R.shape[0] ############################################################################### # Results # ------- # # We compare the two accumulation maps. The first map on the left is # computed without advection correction and we can therefore see that the shift # between successive images 10 minutes apart produces irregular accumulations. # Conversely, the rainfall accumulation of the right is produced using advection # correction to account for this spatial shift. The final result is a smoother # rainfall accumulation map. plt.figure(figsize=(9, 4)) plt.subplot(121) plot_precip_field(R.mean(axis=0), title="3-h rainfall accumulation") plt.subplot(122) plot_precip_field(R_ac, title="Same with advection correction") plt.tight_layout() plt.show() ################################################################################ # Reference # ~~~~~~~~~ # # Anagnostou, E. N., and W. F. Krajewski. 1999. "Real-Time Radar Rainfall # Estimation. Part I: Algorithm Formulation." Journal of Atmospheric and # Oceanic Technology 16: 189–97. # https://doi.org/10.1175/1520-0426(1999)016<0189:RTRREP>2.0.CO;2 ================================================ FILE: examples/anvil_nowcast.py ================================================ # coding: utf-8 """ ANVIL nowcast ============= This example demonstrates how to use ANVIL and the advantages compared to extrapolation nowcast and S-PROG. Load the libraries. """ from datetime import datetime, timedelta import warnings warnings.simplefilter("ignore") import matplotlib.pyplot as plt import numpy as np from pysteps import motion, io, rcparams, utils from pysteps.nowcasts import anvil, extrapolation, sprog from pysteps.utils import transformation from pysteps.visualization import plot_precip_field ############################################################################### # Read the input data # ------------------- # # ANVIL was originally developed to use vertically integrated liquid (VIL) as # the input data, but the model allows using any two-dimensional input fields. # Here we use a composite of rain rates. date = datetime.strptime("201505151620", "%Y%m%d%H%M") # Read the data source information from rcparams data_source = rcparams.data_sources["mch"] root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] # Find the input files in the archive. Use history length of 5 timesteps filenames = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=5, num_prev_files=5 ) # Read the input time series importer = io.get_method(importer_name, "importer") rainrate_field, quality, metadata = io.read_timeseries( filenames, importer, **importer_kwargs ) # Convert to rain rate (mm/h) rainrate_field, metadata = utils.to_rainrate(rainrate_field, metadata) ################################################################################ # Compute the advection field # --------------------------- # # Apply the Lucas-Kanade method with the parameters given in Pulkkinen et al. # (2020) to compute the advection field. fd_kwargs = {} fd_kwargs["max_corners"] = 1000 fd_kwargs["quality_level"] = 0.01 fd_kwargs["min_distance"] = 2 fd_kwargs["block_size"] = 8 lk_kwargs = {} lk_kwargs["winsize"] = (15, 15) oflow_kwargs = {} oflow_kwargs["fd_kwargs"] = fd_kwargs oflow_kwargs["lk_kwargs"] = lk_kwargs oflow_kwargs["decl_scale"] = 10 oflow = motion.get_method("lucaskanade") # transform the input data to logarithmic scale rainrate_field_log, _ = utils.transformation.dB_transform( rainrate_field, metadata=metadata ) velocity = oflow(rainrate_field_log, **oflow_kwargs) ############################################################################### # Compute the nowcasts and threshold rain rates below 0.5 mm/h # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ forecast_extrap = extrapolation.forecast( rainrate_field[-1], velocity, 3, extrap_kwargs={"allow_nonfinite_values": True} ) forecast_extrap[forecast_extrap < 0.5] = 0.0 # log-transform the data and the threshold value to dBR units for S-PROG rainrate_field_db, _ = transformation.dB_transform( rainrate_field, metadata, threshold=0.1, zerovalue=-15.0 ) rainrate_thr, _ = transformation.dB_transform( np.array([0.5]), metadata, threshold=0.1, zerovalue=-15.0 ) forecast_sprog = sprog.forecast( rainrate_field_db[-3:], velocity, 3, n_cascade_levels=6, precip_thr=rainrate_thr[0] ) forecast_sprog, _ = transformation.dB_transform( forecast_sprog, threshold=-10.0, inverse=True ) forecast_sprog[forecast_sprog < 0.5] = 0.0 forecast_anvil = anvil.forecast( rainrate_field[-4:], velocity, 3, ar_window_radius=25, ar_order=2 ) forecast_anvil[forecast_anvil < 0.5] = 0.0 ############################################################################### # Read the reference observation field and threshold rain rates below 0.5 mm/h # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ filenames = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=5, num_next_files=3 ) refobs_field, _, metadata = io.read_timeseries(filenames, importer, **importer_kwargs) refobs_field, metadata = utils.to_rainrate(refobs_field[-1], metadata) refobs_field[refobs_field < 0.5] = 0.0 ############################################################################### # Plot the extrapolation, S-PROG and ANVIL nowcasts. # -------------------------------------------------- # # For comparison, the observed rain rate fields are also plotted. Growth and # decay areas are marked with red and blue circles, respectively. def plot_growth_decay_circles(ax): circle = plt.Circle( (360, 300), 25, color="b", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (420, 350), 30, color="b", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (405, 380), 30, color="b", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (420, 500), 25, color="b", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (480, 535), 30, color="b", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (330, 470), 35, color="b", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (505, 205), 30, color="b", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (440, 180), 30, color="r", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (590, 240), 30, color="r", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) circle = plt.Circle( (585, 160), 15, color="r", clip_on=False, fill=False, zorder=1e9 ) ax.add_artist(circle) fig = plt.figure(figsize=(10, 13)) ax = fig.add_subplot(321) rainrate_field[-1][rainrate_field[-1] < 0.5] = 0.0 plot_precip_field(rainrate_field[-1]) plot_growth_decay_circles(ax) ax.set_title("Obs. %s" % str(date)) ax = fig.add_subplot(322) plot_precip_field(refobs_field) plot_growth_decay_circles(ax) ax.set_title("Obs. %s" % str(date + timedelta(minutes=15))) ax = fig.add_subplot(323) plot_precip_field(forecast_extrap[-1]) plot_growth_decay_circles(ax) ax.set_title("Extrapolation +15 minutes") ax = fig.add_subplot(324) plot_precip_field(forecast_sprog[-1]) plot_growth_decay_circles(ax) ax.set_title("S-PROG (with post-processing)\n +15 minutes") ax = fig.add_subplot(325) plot_precip_field(forecast_anvil[-1]) plot_growth_decay_circles(ax) ax.set_title("ANVIL +15 minutes") plt.show() ############################################################################### # Remarks # ------- # # The extrapolation nowcast is static, i.e. it does not predict any growth or # decay. While S-PROG is to some extent able to predict growth and decay, this # this comes with loss of small-scale features. In addition, statistical # post-processing needs to be applied to correct the bias and incorrect wet-area # ratio introduced by the autoregressive process. ANVIL is able to do both: # predict growth and decay and preserve the small-scale structure in a way that # post-processing is not necessary. ================================================ FILE: examples/data_transformations.py ================================================ # -*- coding: utf-8 -*- """ Data transformations ==================== The statistics of intermittent precipitation rates are particularly non-Gaussian and display an asymmetric distribution bounded at zero. Such properties restrict the usage of well-established statistical methods that assume symmetric or Gaussian data. A common workaround is to introduce a suitable data transformation to approximate a normal distribution. In this example, we test the data transformation methods available in pysteps in order to obtain a more symmetric distribution of the precipitation data (excluding the zeros). The currently available transformations include the Box-Cox, dB, square-root and normal quantile transforms. """ from datetime import datetime import matplotlib.pyplot as plt import numpy as np from pysteps import io, rcparams from pysteps.utils import conversion, transformation from scipy.stats import skew ############################################################################### # Read the radar input images # --------------------------- # # First, we will import the sequence of radar composites. # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Selected case date = datetime.strptime("201609281600", "%Y%m%d%H%M") data_source = rcparams.data_sources["fmi"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Get 1 hour of observations in the data archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_next_files=11 ) # Read the radar composites importer = io.get_method(importer_name, "importer") Z, _, metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Keep only positive rainfall values Z = Z[Z > metadata["zerovalue"]].flatten() # Convert to rain rate R, metadata = conversion.to_rainrate(Z, metadata) ############################################################################### # Test data transformations # ------------------------- # Define method to visualize the data distribution with boxplots and plot the # corresponding skewness def plot_distribution(data, labels, skw): N = len(data) fig, ax1 = plt.subplots() ax2 = ax1.twinx() ax2.plot(np.arange(N + 2), np.zeros(N + 2), ":r") ax1.boxplot(data, labels=labels, sym="", medianprops={"color": "k"}) ymax = [] for i in range(N): y = skw[i] x = i + 1 ax2.plot(x, y, "*r", ms=10, markeredgecolor="k") ymax.append(np.max(data[i])) # ylims ylims = np.percentile(ymax, 50) ax1.set_ylim((-1 * ylims, ylims)) ylims = np.max(np.abs(skw)) ax2.set_ylim((-1.1 * ylims, 1.1 * ylims)) # labels ax1.set_ylabel(r"Standardized values [$\sigma$]") ax2.set_ylabel(r"Skewness []", color="r") ax2.tick_params(axis="y", labelcolor="r") ############################################################################### # Box-Cox transform # ~~~~~~~~~~~~~~~~~ # The Box-Cox transform is a well-known power transformation introduced by # `Box and Cox (1964)`_. In its one-parameter version, the Box-Cox transform # takes the form T(x) = ln(x) for lambda = 0, or T(x) = (x**lambda - 1)/lambda # otherwise. # # To find a suitable lambda, we will experiment with a range of values # and select the one that produces the most symmetric distribution, i.e., the # lambda associated with a value of skewness closest to zero. # To visually compare the results, the transformed data are standardized. # # .. _`Box and Cox (1964)`: https://doi.org/10.1111/j.2517-6161.1964.tb00553.x data = [] labels = [] skw = [] # Test a range of values for the transformation parameter Lambda Lambdas = np.linspace(-0.4, 0.4, 11) for i, Lambda in enumerate(Lambdas): R_, _ = transformation.boxcox_transform(R, metadata, Lambda) R_ = (R_ - np.mean(R_)) / np.std(R_) data.append(R_) labels.append("{0:.2f}".format(Lambda)) skw.append(skew(R_)) # skewness # Plot the transformed data distribution as a function of lambda plot_distribution(data, labels, skw) plt.title("Box-Cox transform") plt.tight_layout() plt.show() # Best lambda idx_best = np.argmin(np.abs(skw)) Lambda = Lambdas[idx_best] print("Best parameter lambda: %.2f\n(skewness = %.2f)" % (Lambda, skw[idx_best])) ############################################################################### # Compare data transformations # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ data = [] labels = [] skw = [] ############################################################################### # Rain rates # ~~~~~~~~~~ # First, let's have a look at the original rain rate values. data.append((R - np.mean(R)) / np.std(R)) labels.append("R") skw.append(skew(R)) ############################################################################### # dB transform # ~~~~~~~~~~~~ # We transform the rainfall data into dB units: 10*log(R) R_, _ = transformation.dB_transform(R, metadata) data.append((R_ - np.mean(R_)) / np.std(R_)) labels.append("dB") skw.append(skew(R_)) ############################################################################### # Square-root transform # ~~~~~~~~~~~~~~~~~~~~~ # Transform the data using the square-root: sqrt(R) R_, _ = transformation.sqrt_transform(R, metadata) data.append((R_ - np.mean(R_)) / np.std(R_)) labels.append("sqrt") skw.append(skew(R_)) ############################################################################### # Box-Cox transform # ~~~~~~~~~~~~~~~~~ # We now apply the Box-Cox transform using the best parameter lambda found above. R_, _ = transformation.boxcox_transform(R, metadata, Lambda) data.append((R_ - np.mean(R_)) / np.std(R_)) labels.append("Box-Cox\n($\lambda=$%.2f)" % Lambda) skw.append(skew(R_)) ############################################################################### # Normal quantile transform # ~~~~~~~~~~~~~~~~~~~~~~~~~ # At last, we apply the empirical normal quantile (NQ) transform as described in # `Bogner et al (2012)`_. # # .. _`Bogner et al (2012)`: http://dx.doi.org/10.5194/hess-16-1085-2012 R_, _ = transformation.NQ_transform(R, metadata) data.append((R_ - np.mean(R_)) / np.std(R_)) labels.append("NQ") skw.append(skew(R_)) ############################################################################### # By plotting all the results, we can notice first of all the strongly asymmetric # distribution of the original data (R) and that all transformations manage to # reduce its skewness. Among these, the Box-Cox transform (using the best parameter # lambda) and the normal quantile (NQ) transform provide the best correction. # Despite not producing a perfectly symmetric distribution, the square-root (sqrt) # transform has the strong advantage of being defined for zeros, too, while all # other transformations need an arbitrary rule for non-positive values. plot_distribution(data, labels, skw) plt.title("Data transforms") plt.tight_layout() plt.show() ================================================ FILE: examples/ens_kalman_filter_blended_forecast.py ================================================ # -*- coding: utf-8 -*- """ Ensemble-based Blending ======================= This tutorial demonstrates how to construct a blended rainfall forecast by combining an ensemble nowcast with an ensemble Numerical Weather Prediction (NWP) forecast. The method follows the Reduced-Space Ensemble Kalman Filter approach described in :cite:`Nerini2019MWR`. The procedure starts from the most recent radar observations. In the **prediction step**, a stochastic radar extrapolation technique generates short-term forecasts. In the **correction step**, these forecasts are updated using information from the latest ensemble NWP run. To make the matrix operations tractable, the Bayesian update is carried out in the subspace defined by the leading principal components—hence the term *reduced space*. The datasets used in this tutorial are provided by the German Weather Service (DWD). """ import os from datetime import datetime, timedelta import numpy as np from matplotlib import pyplot as plt import pysteps from pysteps import io, rcparams, blending from pysteps.utils import aggregate_fields_space from pysteps.visualization import plot_precip_field import pysteps_nwp_importers ################################################################################ # Read the radar images and the NWP forecast # ------------------------------------------ # # First, we import a sequence of 4 images of 5-minute radar composites # and the corresponding NWP rainfall forecast that was available at that time. # # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Additionally, the pysteps-nwp-importers plugin needs to be installed, see # https://github.com/pySTEPS/pysteps-nwp-importers. # Selected case date_radar = datetime.strptime("202506041645", "%Y%m%d%H%M") # The last NWP forecast was issued at 16:00 - the blending tool will be able # to find the correct lead times itself. date_nwp = datetime.strptime("202506041600", "%Y%m%d%H%M") radar_data_source = rcparams.data_sources["dwd"] nwp_data_source = rcparams.data_sources["dwd_nwp"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = radar_data_source["root_path"] path_fmt = radar_data_source["path_fmt"] fn_pattern = radar_data_source["fn_pattern"] fn_ext = radar_data_source["fn_ext"] importer_name = radar_data_source["importer"] importer_kwargs = radar_data_source["importer_kwargs"] timestep_radar = radar_data_source["timestep"] # Find the radar files in the archive fns = io.find_by_date( date_radar, root_path, path_fmt, fn_pattern, fn_ext, timestep_radar, num_prev_files=2, ) # Read the radar composites (which are already in mm/h) importer = io.get_method(importer_name, "importer") radar_precip, _, radar_metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Import the NWP data filename = os.path.join( nwp_data_source["root_path"], datetime.strftime(date_nwp, nwp_data_source["path_fmt"]), datetime.strftime(date_nwp, nwp_data_source["fn_pattern"]) + "." + nwp_data_source["fn_ext"], ) nwp_importer = io.get_method("dwd_nwp", "importer") kwargs = nwp_data_source["importer_kwargs"] # Resolve grid_file_path relative to PYSTEPS_DATA_PATH kwargs["grid_file_path"] = os.path.join( os.environ["PYSTEPS_DATA_PATH"], kwargs["grid_file_path"] ) nwp_precip, _, nwp_metadata = nwp_importer(filename, **kwargs) # We lower the number of ens members to 10 to reduce the memory needs in the # example here. However, it is advised to have a minimum of 20 members for the # Reduced-Space Ensemble Kalman filter approach nwp_precip = nwp_precip[:, 0:10, :].astype("single") ################################################################################ # Pre-processing steps # -------------------- # Set the zerovalue and precipitation thresholds (these are fixed from DWD) prec_thr = 0.049 zerovalue = 0.027 # Transform the zerovalue and precipitation thresholds to dBR log_thr_prec = 10.0 * np.log10(prec_thr) log_zerovalue = 10.0 * np.log10(zerovalue) # Reproject the DWD ICON NWP data onto a regular grid nwp_metadata["clon"] = nwp_precip["longitude"].values nwp_metadata["clat"] = nwp_precip["latitude"].values # We change the time step from the DWD NWP data to 15 min (it is actually 5 min) # to have a longer forecast horizon available for this example, as pysteps_data # only contains 1 hour of DWD forecast data (to minimize storage). nwp_metadata["accutime"] = 15.0 nwp_precip = ( nwp_precip.values.astype("single") * 3.0 ) # (to account for the change in time step from 5 to 15 min) # Reproject ID2 data onto a regular grid nwp_precip_rprj, nwp_metadata_rprj = ( pysteps_nwp_importers.importer_dwd_nwp.unstructured2regular( nwp_precip, nwp_metadata, radar_metadata ) ) nwp_precip = None # Upscale both the radar and NWP data to a twice as coarse resolution to lower # the memory needs (for this example) radar_precip, radar_metadata = aggregate_fields_space( radar_precip, radar_metadata, radar_metadata["xpixelsize"] * 4 ) nwp_precip_rprj, nwp_metadata_rprj = aggregate_fields_space( nwp_precip_rprj.astype("single"), nwp_metadata_rprj, nwp_metadata_rprj["xpixelsize"] * 4, ) # Make sure the units are in mm/h converter = pysteps.utils.get_method("mm/h") radar_precip, radar_metadata = converter( radar_precip, radar_metadata ) # The radar data should already be in mm/h nwp_precip_rprj, nwp_metadata_rprj = converter(nwp_precip_rprj, nwp_metadata_rprj) # Threshold the data radar_precip[radar_precip < prec_thr] = 0.0 nwp_precip_rprj[nwp_precip_rprj < prec_thr] = 0.0 # Plot the radar rainfall field and the first time step and first ensemble member # of the NWP forecast. date_str = datetime.strftime(date_radar, "%Y-%m-%d %H:%M") plt.figure(figsize=(10, 5)) plt.subplot(121) plot_precip_field( radar_precip[-1, :, :], geodata=radar_metadata, title=f"Radar observation at {date_str}", colorscale="STEPS-NL", ) plt.subplot(122) plot_precip_field( nwp_precip_rprj[0, 0, :, :], geodata=nwp_metadata_rprj, title=f"NWP forecast at {date_str}", colorscale="STEPS-NL", ) plt.tight_layout() plt.show() # transform the data to dB transformer = pysteps.utils.get_method("dB") radar_precip, radar_metadata = transformer( radar_precip, radar_metadata, threshold=prec_thr, zerovalue=log_zerovalue ) nwp_precip_rprj, nwp_metadata_rprj = transformer( nwp_precip_rprj, nwp_metadata_rprj, threshold=prec_thr, zerovalue=log_zerovalue ) ############################################################################### # Determine the velocity fields # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # In contrast to the STEPS blending method, no motion field for the NWP fields # is needed in the ensemble kalman filter blending approach. # Estimate the motion vector field oflow_method = pysteps.motion.get_method("lucaskanade") velocity_radar = oflow_method(radar_precip) ################################################################################ # The blended forecast # ~~~~~~~~~~~~~~~~~~~~ # Set the timestamps for radar_precip and nwp_precip_rprj timestamps_radar = np.array( sorted( [ date_radar - timedelta(minutes=i * timestep_radar) for i in range(len(radar_precip)) ] ) ) timestamps_nwp = np.array( sorted( [ date_nwp + timedelta(minutes=i * int(nwp_metadata_rprj["accutime"])) for i in range(nwp_precip_rprj.shape[0]) ] ) ) # Set the combination kwargs combination_kwargs = dict( n_tapering=0, # Tapering parameter: controls how many diagonals of the covariance matrix are kept (0 = no tapering) non_precip_mask=True, # Specifies whether the computation should be truncated on grid boxes where at least a minimum number of ens. members forecast precipitation. n_ens_prec=1, # Minimum number of ens. members that forecast precip for the above-mentioned mask. lien_criterion=True, # Specifies wheter the Lien criterion should be applied. n_lien=5, # Minimum number of ensemble members that forecast precipitation for the Lien criterion (equals half the ens. members here) prob_matching="iterative", # The type of probability matching used. inflation_factor_bg=3.0, # Inflation factor of the background (NWC) covariance matrix. (this value indicates a faster convergence towards the NWP ensemble) inflation_factor_obs=1.0, # Inflation factor of the observation (NWP) covariance matrix. offset_bg=0.0, # Offset of the background (NWC) covariance matrix. offset_obs=0.0, # Offset of the observation (NWP) covariance matrix. nwp_hres_eff=14.0, # Effective horizontal resolution of the utilized NWP model (in km here). sampling_prob_source="ensemble", # Computation method of the sampling probability for the probability matching. 'ensemble' computes this probability as the ratio between the ensemble differences. use_accum_sampling_prob=False, # Specifies whether the current sampling probability should be used for the probability matching or a probability integrated over the previous forecast time. ) # Call the PCA EnKF method blending_method = blending.get_method("pca_enkf") precip_forecast = blending_method( obs_precip=radar_precip, # Radar data in dBR obs_timestamps=timestamps_radar, # Radar timestamps nwp_precip=nwp_precip_rprj, # NWP in dBR nwp_timestamps=timestamps_nwp, # NWP timestamps velocity=velocity_radar, # Velocity vector field forecast_horizon=120, # Forecast length (horizon) in minutes - only a short forecast horizon due to the limited dataset length stored here. issuetime=date_radar, # Forecast issue time as datetime object n_ens_members=10, # No. of ensemble members precip_mask_dilation=1, # Dilation of precipitation mask in grid boxes n_cascade_levels=6, # No. of cascade levels precip_thr=log_thr_prec, # Precip threshold norain_thr=0.0005, # Minimum of 0.5% precip needed, otherwise 'zero rainfall' num_workers=4, # No. of parallel threads noise_stddev_adj="auto", # Standard deviation adjustment noise_method="ssft", # SSFT as noise method enable_combination=True, # Enable combination noise_kwargs={"win_size": (512, 512), "win_fun": "hann", "overlap": 0.5}, extrap_kwargs={"interp_order": 3, "map_coordinates_mode": "nearest"}, combination_kwargs=combination_kwargs, filter_kwargs={"include_mean": True}, ) # Transform the data back into mm/h precip_forecast, _ = converter(precip_forecast, radar_metadata) radar_precip, _ = converter(radar_precip, radar_metadata) nwp_precip, _ = converter(nwp_precip_rprj, nwp_metadata_rprj) ################################################################################ # Visualize the output # ~~~~~~~~~~~~~~~~~~~~ # # The NWP rainfall forecast has a much lower weight than the radar-based # extrapolation # forecast at the issue time of the forecast (+0 min). Therefore, # the first time steps consist mostly of the extrapolation. However, near the end # of the forecast (+180 min), the NWP share in the blended forecast has become # the more dominant contribution to the forecast and thus the forecast starts # to resemble the NWP forecast. fig = plt.figure(figsize=(5, 12)) leadtimes_min = [15, 30, 45, 60, 90, 120] n_leadtimes = len(leadtimes_min) for n, leadtime in enumerate(leadtimes_min): # Nowcast with blending into NWP plt.subplot(n_leadtimes, 2, n * 2 + 1) plot_precip_field( precip_forecast[0, int(leadtime / timestep_radar) - 1, :, :], geodata=radar_metadata, title=f"Blended +{leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) # Raw NWP forecast plt.subplot(n_leadtimes, 2, n * 2 + 2) plot_precip_field( nwp_precip[int(leadtime / int(nwp_metadata_rprj["accutime"])) - 1, 0, :, :], geodata=nwp_metadata_rprj, title=f"NWP +{leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ################################################################################ # References # ~~~~~~~~~~ # # Nerini, D., Foresti, L., Leuenberger, D., Robert, S., Germann, U. 2019. "A # Reduced-Space Ensemble Kalman Filter Approach for Flow-Dependent Integration # of Radar Extrapolation Nowcasts and NWP Precipitation Ensembles." Monthly # Weather Review 147(3): 987-1006. https://doi.org/10.1175/MWR-D-18-0258.1. ================================================ FILE: examples/linda_nowcasts.py ================================================ #!/bin/env python """ LINDA nowcasts ============== This example shows how to compute and plot a deterministic and ensemble LINDA nowcasts using Swiss radar data. """ from datetime import datetime import warnings warnings.simplefilter("ignore") import matplotlib.pyplot as plt from pysteps import io, rcparams from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.nowcasts import linda, sprog, steps from pysteps.utils import conversion, dimension, transformation from pysteps.visualization import plot_precip_field ############################################################################### # Read the input rain rate fields # ------------------------------- date = datetime.strptime("201701311200", "%Y%m%d%H%M") data_source = "mch" # Read the data source information from rcparams datasource_params = rcparams.data_sources[data_source] # Find the radar files in the archive fns = io.find_by_date( date, datasource_params["root_path"], datasource_params["path_fmt"], datasource_params["fn_pattern"], datasource_params["fn_ext"], datasource_params["timestep"], num_prev_files=2, ) # Read the data from the archive importer = io.get_method(datasource_params["importer"], "importer") reflectivity, _, metadata = io.read_timeseries( fns, importer, **datasource_params["importer_kwargs"] ) # Convert reflectivity to rain rate rainrate, metadata = conversion.to_rainrate(reflectivity, metadata) # Upscale data to 2 km to reduce computation time rainrate, metadata = dimension.aggregate_fields_space(rainrate, metadata, 2000) # Plot the most recent rain rate field plt.figure() plot_precip_field(rainrate[-1, :, :]) plt.show() ############################################################################### # Estimate the advection field # ---------------------------- # The advection field is estimated using the Lucas-Kanade optical flow advection = dense_lucaskanade(rainrate, verbose=True) ############################################################################### # Deterministic nowcast # --------------------- # Compute 30-minute LINDA nowcast with 8 parallel workers # Restrict the number of features to 15 to reduce computation time nowcast_linda = linda.forecast( rainrate, advection, 6, max_num_features=15, add_perturbations=False, num_workers=8, measure_time=True, )[0] # Compute S-PROG nowcast for comparison rainrate_db, _ = transformation.dB_transform( rainrate, metadata, threshold=0.1, zerovalue=-15.0 ) nowcast_sprog = sprog.forecast( rainrate_db[-3:, :, :], advection, 6, n_cascade_levels=6, precip_thr=-10.0, ) # Convert reflectivity nowcast to rain rate nowcast_sprog = transformation.dB_transform( nowcast_sprog, threshold=-10.0, inverse=True )[0] # Plot the nowcasts fig = plt.figure(figsize=(9, 4)) ax = fig.add_subplot(1, 2, 1) plot_precip_field( nowcast_linda[-1, :, :], title="LINDA (+ 30 min)", ) ax = fig.add_subplot(1, 2, 2) plot_precip_field( nowcast_sprog[-1, :, :], title="S-PROG (+ 30 min)", ) plt.show() ############################################################################### # The above figure shows that the filtering scheme implemented in LINDA preserves # small-scale and band-shaped features better than S-PROG. This is because the # former uses a localized elliptical convolution kernel instead of the # cascade-based autoregressive process, where the parameters are estimated over # the whole domain. ############################################################################### # Probabilistic nowcast # --------------------- # Compute 30-minute LINDA nowcast ensemble with 40 members and 8 parallel workers nowcast_linda = linda.forecast( rainrate, advection, 6, max_num_features=15, add_perturbations=True, vel_pert_method=None, n_ens_members=40, num_workers=8, measure_time=True, )[0] # Compute 40-member STEPS nowcast for comparison nowcast_steps = steps.forecast( rainrate_db[-3:, :, :], advection, 6, 40, n_cascade_levels=6, precip_thr=-10.0, mask_method="incremental", kmperpixel=2.0, timestep=datasource_params["timestep"], vel_pert_method=None, ) # Convert reflectivity nowcast to rain rate nowcast_steps = transformation.dB_transform( nowcast_steps, threshold=-10.0, inverse=True )[0] # Plot two ensemble members of both nowcasts fig = plt.figure() for i in range(2): ax = fig.add_subplot(2, 2, i + 1) ax = plot_precip_field( nowcast_linda[i, -1, :, :], geodata=metadata, colorbar=False, axis="off" ) ax.set_title(f"LINDA Member {i+1}") for i in range(2): ax = fig.add_subplot(2, 2, 3 + i) ax = plot_precip_field( nowcast_steps[i, -1, :, :], geodata=metadata, colorbar=False, axis="off" ) ax.set_title(f"STEPS Member {i+1}") ############################################################################### # The above figure shows the main difference between LINDA and STEPS. In # addition to the convolution kernel, another improvement in LINDA is a # localized perturbation generator using the short-space Fourier transform # (SSFT) and a spatially variable marginal distribution. As a result, the # LINDA ensemble members preserve the anisotropic and small-scale structures # considerably better than STEPS. plt.tight_layout() plt.show() ================================================ FILE: examples/my_first_nowcast.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "L_dntwSQBnbK" }, "source": [ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pySTEPS/pysteps/blob/master/examples/my_first_nowcast.ipynb)\n", "\n", "# My first precipitation nowcast\n", "\n", "In this example, we will use pysteps to compute and plot an extrapolation nowcast using the NSSL's Multi-Radar/Multi-Sensor System\n", "([MRMS](https://www.nssl.noaa.gov/projects/mrms/)) rain rate product.\n", "\n", "The MRMS precipitation product is available every 2 minutes, over the contiguous US. \n", "Each precipitation composite has 3500 x 7000 grid points, separated 1 km from each other.\n", "\n", "## Set-up Colab environment\n", "\n", "**Important**: In colab, execute this section one cell at a time. Trying to excecute all the cells at once may results in cells being skipped and some dependencies not being installed.\n", "\n", "First, let's set up our working environment. Note that these steps are only needed to work with google colab. \n", "\n", "To install pysteps locally, you can follow [these instructions](https://pysteps.readthedocs.io/en/latest/user_guide/install_pysteps.html).\n", "\n", "First, let's install the latest Pysteps version from the Python Package Index (PyPI) using pip. This will also install the minimal dependencies needed to run pysteps. \n", "\n", "#### Install optional dependencies\n", "\n", "Now, let's install the optional dependendies that will allow us to plot and read the example data.\n", "- pygrib: to read the MRMS data grib format\n", "- pyproj: needed by pygrib\n", "\n", "**NOTE:** Do not import pysteps in this notebook until the following optional dependencies are loaded. Otherwise, pysteps will assume that they are not installed and some of its functionalities won't work." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "mFx4hq_DBtp-" }, "outputs": [], "source": [ "# These libraries are needed for the pygrib library in Colab.\n", "# Note that is needed if you install pygrib using pip.\n", "# If you use conda, the libraries will be installed automatically.\n", "! apt-get install libeccodes-dev libproj-dev\n", "\n", "# Install the python packages\n", "! pip install pyproj\n", "! pip install pygrib" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6BF2paxnTuGB" }, "outputs": [], "source": [ "# Uninstall existing shapely\n", "# We will re-install shapely in the next step by ignoring the binary\n", "# wheels to make it compatible with other modules that depend on\n", "# GEOS, such as Cartopy (used here).\n", "!pip uninstall --yes shapely" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "7x8Hx_4hE_BU" }, "outputs": [], "source": [ "# To install cartopy in Colab using pip, we need to install the library\n", "# dependencies first.\n", "\n", "!apt-get install -qq libgdal-dev libgeos-dev\n", "!pip install shapely --no-binary shapely\n", "!pip install cartopy" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "ybD55ZJhmdYa" }, "source": [ "#### Install pysteps\n", "\n", "Now that all dependencies are installed, we can install pysteps." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "VA7zp3nRmhfF" }, "outputs": [], "source": [ "# ! pip install git+https://github.com/pySTEPS/pysteps\n", "! pip install pysteps" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "-AkfR6JSBujn" }, "source": [ "## Getting the example data\n", "\n", "Now that we have the environment ready, let's install the example data and configure the pysteps's default parameters by following [this tutorial](https://pysteps.readthedocs.io/en/latest/user_guide/example_data.html).\n", "\n", "First, we will use the [pysteps.datasets.download_pysteps_data()](https://pysteps.readthedocs.io/en/latest/generated/pysteps.datasets.download_pysteps_data.html) function to download the data.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "vri-R_ZVGihj" }, "outputs": [], "source": [ "# Import the helper functions\n", "from pysteps.datasets import download_pysteps_data, create_default_pystepsrc\n", "\n", "# Download the pysteps data in the \"pysteps_data\"\n", "download_pysteps_data(\"pysteps_data\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "wdKfjliCKXhx" }, "source": [ "Next, we need to create a default configuration file that points to the downloaded data. \n", "By default, pysteps will place the configuration file in `$HOME/.pysteps` (unix and Mac OS X) or `$USERPROFILE/pysteps` (windows).\n", "To quickly create a configuration file, we will use the [pysteps.datasets.create_default_pystepsrc()](https://pysteps.readthedocs.io/en/latest/generated/pysteps.datasets.create_default_pystepsrc.html#pysteps.datasets.create_default_pystepsrc) helper function." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "pGdKHa36H5JX" }, "outputs": [], "source": [ "# If the configuration file is placed in one of the default locations\n", "# (https://pysteps.readthedocs.io/en/latest/user_guide/set_pystepsrc.html#configuration-file-lookup)\n", "# it will be loaded automatically when pysteps is imported.\n", "config_file_path = create_default_pystepsrc(\"pysteps_data\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "DAFUJgR5K1CS" }, "source": [ "Since pysteps was already initialized in this notebook, we need to load the new configuration file and update the default configuration." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "tMIbQLPAK42h" }, "outputs": [], "source": [ "# Import pysteps and load the new configuration file\n", "import pysteps\n", "\n", "_ = pysteps.load_config_file(config_file_path, verbose=True)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "SzSqp1DFJ0M9" }, "source": [ "Let's see what the default parameters look like (these are stored in the\n", "[pystepsrc file](https://pysteps.readthedocs.io/en/latest/user_guide/set_pystepsrc.html)). We will be using them to load the MRMS data set." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6Gr65nH4BnbP" }, "outputs": [], "source": [ "# The default parameters are stored in pysteps.rcparams.\n", "from pprint import pprint\n", "\n", "pprint(pysteps.rcparams.data_sources[\"mrms\"])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "9M_buv7WBnbf" }, "source": [ "This should have printed the following lines:\n", "\n", "- `fn_ext`: 'grib2' -- The file extension\n", "- `fn_pattern`: 'PrecipRate_00.00_%Y%m%d-%H%M%S' -- The file naming convention of the MRMS data.\n", "- `importer`: 'mrms_grib' -- The name of the importer for the MRMS data.\n", "- `importer_kwargs`: {} -- Extra options provided to the importer. None in this example.\n", "- `path_fmt`: '%Y/%m/%d' -- The folder structure in which the files are stored. Here, year/month/day/filename.\n", "- `root_path`: '/content/pysteps_data/mrms' -- The root path of the MRMS-data.\n", "- `timestep`: 2 -- The temporal interval of the (radar) rainfall data\n", "\n", "Note that the default `timestep` parameter is 2 minutes, which corresponds to the time interval at which the MRMS product is available.\n", "\n", "## Load the MRMS example data\n", "\n", "Now that we have installed the example data, let's import the example MRMS dataset using the [load_dataset()](https://pysteps.readthedocs.io/en/latest/generated/pysteps.datasets.load_dataset.html) helper function from the `pysteps.datasets` module.\n", "\n", "We import 1 hour and 10 minutes of data, which corresponds to a sequence of 35 frames of 2-D precipitation composites.\n", "Note that importing the data takes approximately 30 seconds." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "-8Q4e58VBnbl" }, "outputs": [], "source": [ "from pysteps.datasets import load_dataset\n", "\n", "# We'll import the time module to measure the time the importer needed\n", "import time\n", "\n", "start_time = time.time()\n", "\n", "# Import the data\n", "precipitation, metadata, timestep = load_dataset(\n", " \"mrms\", frames=35\n", ") # precipitation in mm/h\n", "\n", "end_time = time.time()\n", "\n", "print(\"Precipitation data imported\")\n", "print(\"Importing the data took \", (end_time - start_time), \" seconds\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "btiTxYYMBnby" }, "source": [ "Let's have a look at the values returned by the `load_dataset()` function. \n", "\n", "- `precipitation`: A numpy array with (time, latitude, longitude) dimensions.\n", "- `metadata`: A dictionary with additional information (pixel sizes, map projections, etc.).\n", "- `timestep`: Time separation between each sample (in minutes)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "WqUHbJ_qBnb3" }, "outputs": [], "source": [ "# Let's inspect the shape of the imported data array\n", "precipitation.shape" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "xa8woT0ABncD" }, "source": [ "Note that the shape of the precipitation is 4 times smaller than the raw MRMS data (3500 x 7000).\n", "The `load_dataset()` function uses the default parameters from `importers` to read the data. By default, the MRMS importer upscales the data 4x. That is, from ~1km resolution to ~4km. It also uses single precision to reduce the memory requirements.\n", "Thanks to the upscaling, the memory footprint of this example dataset is ~200Mb instead of the 3.1Gb of the raw (3500 x 7000) data. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "22O2YXrfBncG" }, "outputs": [], "source": [ "timestep # In minutes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "J8_4hwcXBncT" }, "outputs": [], "source": [ "pprint(metadata)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "uQREORtJBnch" }, "source": [ "# Time to make a nowcast\n", "\n", "So far, we have 1 hour and 10 minutes of precipitation images, separated 2 minutes from each other.\n", "But, how do we use that data to run a precipitation forecast? \n", "\n", "A simple way is by extrapolating the precipitation field, assuming it will continue to move as observed in the recent past, and without changes in intensity. This is commonly known as *Lagrangian persistence*.\n", "\n", "The first step to run our nowcast based on Lagrangian persistence, is the estimation of the motion field from a sequence of past precipitation observations.\n", "We use the Lucas-Kanade (LK) optical flow method implemented in pysteps.\n", "This method follows a local tracking approach that relies on the OpenCV package.\n", "Local features are tracked in a sequence of two or more radar images.\n", "The scheme includes a final interpolation step to produce a smooth field of motion vectors.\n", "Other optical flow methods are also available in pysteps. \n", "Check the full list [here](https://pysteps.readthedocs.io/en/latest/pysteps_reference/motion.html).\n", "\n", "Now let's use the first 5 precipitation images (10 min) to estimate the motion field of the radar pattern and the remaining 30 images (1h) to evaluate the quality of our forecast." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "jcb2Sf6xBnck" }, "outputs": [], "source": [ "# precipitation[0:5] -> Used to find motion (past data). Let's call it training precip.\n", "train_precip = precipitation[0:5]\n", "\n", "# precipitation[5:] -> Used to evaluate forecasts (future data, not available in \"real\" forecast situation)\n", "# Let's call it observed precipitation because we will use it to compare our forecast with the actual observations.\n", "observed_precip = precipitation[3:]" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "xt1TbB0RBncu" }, "source": [ "Let's see what this 'training' precipitation event looks like using the [pysteps.visualization.plot_precip_field](https://pysteps.readthedocs.io/en/latest/generated/pysteps.visualization.precipfields.plot_precip_field.html) function." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "bmNYLo1jBncw" }, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "from pysteps.visualization import plot_precip_field\n", "\n", "# Set a figure size that looks nice ;)\n", "plt.figure(figsize=(9, 5), dpi=100)\n", "\n", "# Plot the last rainfall field in the \"training\" data.\n", "# train_precip[-1] -> Last available composite for nowcasting.\n", "plot_precip_field(train_precip[-1], geodata=metadata, axis=\"off\")\n", "plt.show() # (This line is actually not needed if you are using jupyter notebooks)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "NVRfJm11Bnc7" }, "source": [ "Did you note the **shaded grey** regions? Those are the regions were no valid observations where available to estimate the precipitation (e.g., due to ground clutter, no radar coverage, or radar beam blockage).\n", "Those regions need to be handled with care when we run our nowcast.\n", "\n", "### Data exploration\n", "\n", "Before we produce a forecast, let's explore the precipitation data. In particular, let's see how the distribution of the rain rate values looks." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "WER6RttPBnc9" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "# Let's define some plotting default parameters for the next plots\n", "# Note: This is not strictly needed.\n", "plt.rc(\"figure\", figsize=(4, 4))\n", "plt.rc(\"figure\", dpi=100)\n", "plt.rc(\"font\", size=14) # controls default text sizes\n", "plt.rc(\"axes\", titlesize=14) # fontsize of the axes title\n", "plt.rc(\"axes\", labelsize=14) # fontsize of the x and y labels\n", "plt.rc(\"xtick\", labelsize=14) # fontsize of the tick labels\n", "plt.rc(\"ytick\", labelsize=14) # fontsize of the tick labels\n", "\n", "# Let's use the last available composite for nowcasting from the \"training\" data (train_precip[-1])\n", "# Also, we will discard any invalid value.\n", "valid_precip_values = train_precip[-1][~np.isnan(train_precip[-1])]\n", "\n", "# Plot the histogram\n", "bins = np.concatenate(([-0.01, 0.01], np.linspace(1, 40, 39)))\n", "plt.hist(valid_precip_values, bins=bins, log=True, edgecolor=\"black\")\n", "plt.autoscale(tight=True, axis=\"x\")\n", "plt.xlabel(\"Rainfall intensity [mm/h]\")\n", "plt.ylabel(\"Counts\")\n", "plt.title(\"Precipitation rain rate histogram in mm/h units\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "O6TvIXS3BndH" }, "source": [ "The histogram shows that rain rate values have a non-Gaussian and asymmetric distribution that is bounded at zero. Also, the probability of occurrence decays extremely fast with increasing rain rate values (note the logarithmic y-axis).\n", "\n", "\n", "For better performance of the motion estimation algorithms, we can convert the rain rate values (in mm/h) to a more log-normal distribution of rain rates by applying the following logarithmic transformation:\n", "\n", "\\begin{equation}\n", "R\\rightarrow\n", "\\begin{cases}\n", " 10\\log_{10}R, & \\text{if } R\\geq 0.1\\text{mm h$^{-1}$} \\\\\n", " -15, & \\text{otherwise}\n", "\\end{cases}\n", "\\end{equation}\n", "\n", "The transformed precipitation corresponds to logarithmic rain rates in units of dBR. The value of −15 dBR is equivalent to assigning a rain rate of approximately 0.03 mm h$^{−1}$ to the zeros. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "hgA4PeapBndK" }, "outputs": [], "source": [ "from pysteps.utils import transformation\n", "\n", "# Log-transform the data to dBR.\n", "# The threshold of 0.1 mm/h sets the fill value to -15 dBR.\n", "train_precip_dbr, metadata_dbr = transformation.dB_transform(\n", " train_precip, metadata, threshold=0.1, zerovalue=-15.0\n", ")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "Nx3VESBlBndU" }, "source": [ "Let's inspect the resulting **transformed precipitation** distribution." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "rYS5hBIGBndX" }, "outputs": [], "source": [ "# Only use the valid data!\n", "valid_precip_dbr = train_precip_dbr[-1][~np.isnan(train_precip_dbr[-1])]\n", "\n", "plt.figure(figsize=(4, 4), dpi=100)\n", "\n", "# Plot the histogram\n", "counts, bins, _ = plt.hist(valid_precip_dbr, bins=40, log=True, edgecolor=\"black\")\n", "plt.autoscale(tight=True, axis=\"x\")\n", "plt.xlabel(\"Rainfall intensity [dB]\")\n", "plt.ylabel(\"Counts\")\n", "plt.title(\"Precipitation rain rate histogram in dB units\")\n", "\n", "# Let's add a lognormal distribution that fits that data to the plot.\n", "import scipy\n", "\n", "bin_center = (bins[1:] + bins[:-1]) * 0.5\n", "bin_width = np.diff(bins)\n", "\n", "# We will only use one composite to fit the function to speed up things.\n", "# First, remove the no precip areas.\"\n", "precip_to_fit = valid_precip_dbr[valid_precip_dbr > -15]\n", "\n", "fit_params = scipy.stats.lognorm.fit(precip_to_fit)\n", "\n", "fitted_pdf = scipy.stats.lognorm.pdf(bin_center, *fit_params)\n", "\n", "# Multiply pdf by the bin width and the total number of grid points: pdf -> total counts per bin.\n", "fitted_pdf = fitted_pdf * bin_width * precip_to_fit.size\n", "\n", "# Plot the log-normal fit\n", "plt.plot(bin_center, fitted_pdf, label=\"Fitted log-normal\")\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "ZocO5zqUBndg" }, "source": [ "That looks more like a log-normal distribution. Note the large peak at -15dB. That peak corresponds to \"zero\" (below threshold) precipitation. The jump with no data in between -15 and -10 dB is caused by the precision of the data, which we had set to 1 decimal. Hence, the lowest precipitation intensities (above zero) are 0.1 mm/h (= -10 dB).\n", "\n", "## Compute the nowcast\n", "\n", "These are the minimal steps to compute a short-term forecast using Lagrangian extrapolation of the precipitation patterns:\n", " \n", " 1. Estimate the precipitation motion field.\n", " 1. Use the motion field to advect the most recent radar rainfall field and produce an extrapolation forecast.\n", "\n", "### Estimate the motion field\n", "\n", "Now we can estimate the motion field. Here we use a local feature-tracking approach (Lucas-Kanade).\n", "However, check the other methods available in the [pysteps.motion](https://pysteps.readthedocs.io/en/latest/pysteps_reference/motion.html) module." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "mnACmX_0Bndi" }, "outputs": [], "source": [ "# Estimate the motion field with Lucas-Kanade\n", "from pysteps import motion\n", "from pysteps.visualization import plot_precip_field, quiver\n", "\n", "# Import the Lucas-Kanade optical flow algorithm\n", "oflow_method = motion.get_method(\"LK\")\n", "\n", "# Estimate the motion field from the training data (in dBR)\n", "motion_field = oflow_method(train_precip_dbr)\n", "\n", "## Plot the motion field.\n", "# Use a figure size that looks nice ;)\n", "plt.figure(figsize=(9, 5), dpi=100)\n", "plt.title(\"Estimated motion field with the Lukas-Kanade algorithm\")\n", "\n", "# Plot the last rainfall field in the \"training\" data.\n", "# Remember to use the mm/h precipitation data since plot_precip_field assumes\n", "# mm/h by default. You can change this behavior using the \"units\" keyword.\n", "plot_precip_field(train_precip[-1], geodata=metadata, axis=\"off\")\n", "\n", "# Plot the motion field vectors\n", "quiver(motion_field, geodata=metadata, step=40)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "YObddRFCBnd1" }, "source": [ "### Extrapolate the observations\n", "\n", "We have all ingredients to make an extrapolation nowcast now. \n", "The final step is to advect the most recent radar rainfall field along the estimated motion field, producing an extrapolation forecast." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "erSLAzvNBnd3" }, "outputs": [], "source": [ "from pysteps import nowcasts\n", "\n", "start = time.time()\n", "\n", "# Extrapolate the last radar observation\n", "extrapolate = nowcasts.get_method(\"extrapolation\")\n", "\n", "# You can use the precipitation observations directly in mm/h for this step.\n", "last_observation = train_precip[-1]\n", "\n", "last_observation[~np.isfinite(last_observation)] = metadata[\"zerovalue\"]\n", "\n", "# We set the number of leadtimes (the length of the forecast horizon) to the\n", "# length of the observed/verification preipitation data. In this way, we'll get\n", "# a forecast that covers these time intervals.\n", "n_leadtimes = observed_precip.shape[0]\n", "\n", "# Advect the most recent radar rainfall field and make the nowcast.\n", "precip_forecast = extrapolate(train_precip[-1], motion_field, n_leadtimes)\n", "\n", "# This shows the shape of the resulting array with [time intervals, rows, cols]\n", "print(\"The shape of the resulting array is: \", precip_forecast.shape)\n", "\n", "end = time.time()\n", "print(\"Advecting the radar rainfall fields took \", (end - start), \" seconds\")" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "csy5s-yRBneB" }, "source": [ "Let's inspect the last forecast time (hence this is the forecast rainfall an hour ahead)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "MUiS5-HPBneD" }, "outputs": [], "source": [ "# Plot precipitation at the end of the forecast period.\n", "plt.figure(figsize=(9, 5), dpi=100)\n", "plot_precip_field(precip_forecast[-1], geodata=metadata, axis=\"off\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "mQEseXvhBneI" }, "source": [ "## Evaluate the forecast quality\n", "\n", "Many verification methods are already present in pysteps (see a complete list [here](https://pysteps.readthedocs.io/en/latest/pysteps_reference/verification.html)). We just have to import them. \n", "\n", "Here, we will evaluate our forecast using the Fractions Skill Score (FSS). \n", "This metric provides an intuitive assessment of the dependency of forecast skill on spatial scale and intensity. This makes the FSS an ideal skill score for high-resolution precipitation forecasts.\n", "\n", "More precisely, the FSS is a neighborhood spatial verification method that directly compares the fractional coverage of events in windows surrounding the observations and forecasts.\n", "The FSS varies from 0 (total mismatch) to 1 (perfect forecast).\n", "For most situations, an FSS value of > 0.5 serves as a good indicator of a useful forecast ([Roberts and Lean, 2008](https://journals.ametsoc.org/doi/full/10.1175/2007MWR2123.1) and [Skok and Roberts, 2016](https://rmets.onlinelibrary.wiley.com/doi/full/10.1002/qj.2849)). " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "No3qBjqSBneK" }, "outputs": [], "source": [ "from pysteps import verification\n", "\n", "fss = verification.get_method(\"FSS\")\n", "\n", "# Compute fractions skill score (FSS) for all lead times for different scales using a 1 mm/h detection threshold.\n", "scales = [\n", " 2,\n", " 4,\n", " 8,\n", " 16,\n", " 32,\n", " 64,\n", "] # In grid points.\n", "\n", "scales_in_km = np.array(scales) * 4\n", "\n", "# Set the threshold\n", "thr = 1.0 # in mm/h\n", "\n", "score = []\n", "\n", "# Calculate the FSS for every lead time and all predefined scales.\n", "for i in range(n_leadtimes):\n", " score_ = []\n", " for scale in scales:\n", " score_.append(\n", " fss(precip_forecast[i, :, :], observed_precip[i, :, :], thr, scale)\n", " )\n", " score.append(score_)\n", "\n", "# Now plot it\n", "plt.figure()\n", "x = np.arange(1, n_leadtimes + 1) * timestep\n", "plt.plot(x, score, lw=2.0)\n", "plt.xlabel(\"Lead time [min]\")\n", "plt.ylabel(\"FSS ( > 1.0 mm/h ) \")\n", "plt.title(\"Fractions Skill Score\")\n", "plt.legend(\n", " scales_in_km,\n", " title=\"Scale [km]\",\n", " loc=\"center left\",\n", " bbox_to_anchor=(1.01, 0.5),\n", " bbox_transform=plt.gca().transAxes,\n", ")\n", "plt.autoscale(axis=\"x\", tight=True)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As you can see, the FSS decreases with increasing lead time.\n", "This is expected, as the forecasting quality slowly decreases when we forecast further ahead.\n", "Upscaling the forecast, however, clearly leads to higher skill (up to longer ahead) compared to the forecast on the highest resolutions.\n", "\n", "## Concluding remarks\n", "Congratulations, you have successfully made your first nowcast using the pysteps library!\n", "This was a simple extrapolation-based nowcast and a lot more advanced options are possible too, see [the pysteps examples gallery](https://pysteps.readthedocs.io/en/latest/auto_examples/index.html) for some nice examples." ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "my_first_nowcast.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" }, "pycharm": { "stem_cell": { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [] } } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: examples/optical_flow_methods_convergence.py ================================================ # coding: utf-8 """ Optical flow methods convergence ================================ In this example we test the convergence of the optical flow methods available in pysteps using idealized motion fields. To test the convergence, using an example precipitation field we will: - Read precipitation field from a file - Morph the precipitation field using a given motion field (linear or rotor) to generate a sequence of moving precipitation patterns. - Using the available optical flow methods, retrieve the motion field from the precipitation time sequence (synthetic precipitation observations). Let's first load the libraries that we will use. """ from datetime import datetime import time import matplotlib.pyplot as plt import numpy as np from matplotlib.pyplot import get_cmap from scipy.ndimage import uniform_filter import pysteps as stp from pysteps import motion, io, rcparams from pysteps.motion.vet import morph from pysteps.visualization import plot_precip_field, quiver ################################################################################ # Load the reference precipitation data # ------------------------------------- # # First, we will import a radar composite from the archive. # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Selected case date = datetime.strptime("201505151630", "%Y%m%d%H%M") data_source = rcparams.data_sources["mch"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] # Find the reference field in the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=5, num_prev_files=0 ) # Read the reference radar composite importer = io.get_method(importer_name, "importer") reference_field, quality, metadata = io.read_timeseries( fns, importer, **importer_kwargs ) del quality # Not used reference_field = np.squeeze(reference_field) # Remove time dimension ############################################################################### # Preprocess the data # ~~~~~~~~~~~~~~~~~~~ # Convert to mm/h reference_field, metadata = stp.utils.to_rainrate(reference_field, metadata) # Mask invalid values reference_field = np.ma.masked_invalid(reference_field) # Plot the reference precipitation plot_precip_field(reference_field, title="Reference field") plt.show() # Log-transform the data [dBR] reference_field, metadata = stp.utils.dB_transform( reference_field, metadata, threshold=0.1, zerovalue=-15.0 ) print("Precip. pattern shape: " + str(reference_field.shape)) # This suppress nan conversion warnings in plot functions reference_field.data[reference_field.mask] = np.nan ################################################################################ # Synthetic precipitation observations # ------------------------------------ # # Now we need to create a series of precipitation fields by applying the ideal # motion field to the reference precipitation field "n" times. # # To evaluate the accuracy of the computed_motion vectors, we will use # a relative RMSE measure. # Relative MSE = <(expected_motion - computed_motion)^2> / # Relative RMSE = Rel_RMSE = sqrt(Relative MSE) # # - Rel_RMSE = 0%: no error # - Rel_RMSE = 100%: The retrieved motion field has an average error equal in # magnitude to the motion field. # # Relative RMSE is computed over a region surrounding the precipitation # field, were there is enough information to retrieve the motion field. # The "precipitation region" includes the precipitation pattern plus a margin of # approximately 20 grid points. ################################################################################ # Let's create a function to construct different motion fields. def create_motion_field(input_precip, motion_type): """ Create idealized motion fields to be applied to the reference image. Parameters ---------- input_precip: numpy array (lat, lon) motion_type: str The supported motion fields are: - linear_x: (u=2, v=0) - linear_y: (u=0, v=2) - rotor: rotor field Returns ------- ideal_motion : numpy array (u, v) """ # Create an imaginary grid on the image and create a motion field to be # applied to the image. ny, nx = input_precip.shape x_pos = np.arange(nx) y_pos = np.arange(ny) x, y = np.meshgrid(x_pos, y_pos, indexing="ij") ideal_motion = np.zeros((2, nx, ny)) if motion_type == "linear_x": ideal_motion[0, :] = 2 # Motion along x elif motion_type == "linear_y": ideal_motion[1, :] = 2 # Motion along y elif motion_type == "rotor": x_mean = x.mean() y_mean = y.mean() norm = np.sqrt(x * x + y * y) mask = norm != 0 ideal_motion[0, mask] = 2 * (y - y_mean)[mask] / norm[mask] ideal_motion[1, mask] = -2 * (x - x_mean)[mask] / norm[mask] else: raise ValueError("motion_type not supported.") # We need to swap the axes because the optical flow methods expect # (lat, lon) or (y,x) indexing convention. ideal_motion = ideal_motion.swapaxes(1, 2) return ideal_motion ################################################################################ # Let's create another function that construct the temporal series of # precipitation observations. def create_observations(input_precip, motion_type, num_times=9): """ Create synthetic precipitation observations by displacing the input field using an ideal motion field. Parameters ---------- input_precip: numpy array (lat, lon) Input precipitation field. motion_type: str The supported motion fields are: - linear_x: (u=2, v=0) - linear_y: (u=0, v=2) - rotor: rotor field num_times: int, optional Length of the observations sequence. Returns ------- synthetic_observations: numpy array Sequence of observations """ ideal_motion = create_motion_field(input_precip, motion_type) # The morph function expects (lon, lat) or (x, y) dimensions. # Hence, we need to swap the lat,lon axes. # NOTE: The motion field passed to the morph function can't have any NaNs. # Otherwise, it can result in a segmentation fault. morphed_field, mask = morph( input_precip.swapaxes(0, 1), ideal_motion.swapaxes(1, 2) ) mask = np.array(mask, dtype=bool) synthetic_observations = np.ma.MaskedArray(morphed_field, mask=mask) synthetic_observations = synthetic_observations[np.newaxis, :] for t in range(1, num_times): morphed_field, mask = morph( synthetic_observations[t - 1], ideal_motion.swapaxes(1, 2) ) mask = np.array(mask, dtype=bool) morphed_field = np.ma.MaskedArray( morphed_field[np.newaxis, :], mask=mask[np.newaxis, :] ) synthetic_observations = np.ma.concatenate( [synthetic_observations, morphed_field], axis=0 ) # Swap back to (lat, lon) synthetic_observations = synthetic_observations.swapaxes(1, 2) synthetic_observations = np.ma.masked_invalid(synthetic_observations) synthetic_observations.data[np.ma.getmaskarray(synthetic_observations)] = 0 return ideal_motion, synthetic_observations def plot_optflow_method_convergence(input_precip, optflow_method_name, motion_type): """ Test the convergence to the actual solution of the optical flow method used. Parameters ---------- input_precip: numpy array (lat, lon) Input precipitation field. optflow_method_name: str Optical flow method name motion_type: str The supported motion fields are: - linear_x: (u=2, v=0) - linear_y: (u=0, v=2) - rotor: rotor field """ if optflow_method_name.lower() != "darts": num_times = 2 else: num_times = 9 ideal_motion, precip_obs = create_observations( input_precip, motion_type, num_times=num_times ) oflow_method = motion.get_method(optflow_method_name) elapsed_time = time.perf_counter() computed_motion = oflow_method(precip_obs, verbose=False) print( f"{optflow_method_name} computation time: " f"{(time.perf_counter() - elapsed_time):.1f} [s]" ) precip_obs, _ = stp.utils.dB_transform(precip_obs, inverse=True) precip_data = precip_obs.max(axis=0) precip_data.data[precip_data.mask] = 0 precip_mask = (uniform_filter(precip_data, size=20) > 0.1) & ~precip_obs.mask.any( axis=0 ) cmap = get_cmap("jet").copy() cmap.set_under("grey", alpha=0.25) cmap.set_over("none") # Compare retrieved motion field with the ideal one plt.figure(figsize=(9, 4)) plt.subplot(1, 2, 1) ax = plot_precip_field(precip_obs[0], title="Reference motion") quiver(ideal_motion, step=25, ax=ax) plt.subplot(1, 2, 2) ax = plot_precip_field(precip_obs[0], title="Retrieved motion") quiver(computed_motion, step=25, ax=ax) # To evaluate the accuracy of the computed_motion vectors, we will use # a relative RMSE measure. # Relative MSE = < (expected_motion - computed_motion)^2 > / # Relative RMSE = sqrt(Relative MSE) mse = ((ideal_motion - computed_motion)[:, precip_mask] ** 2).mean() rel_mse = mse / (ideal_motion[:, precip_mask] ** 2).mean() plt.suptitle( f"{optflow_method_name} " f"Relative RMSE: {np.sqrt(rel_mse) * 100:.2f}%" ) plt.show() ################################################################################ # Lucas-Kanade # ------------ # # Constant motion x-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "LucasKanade", "linear_x") ################################################################################ # Constant motion y-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "LucasKanade", "linear_y") ################################################################################ # Rotational motion # ~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "LucasKanade", "rotor") ################################################################################ # Variational Echo Tracking (VET) # ------------------------------- # # Constant motion x-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "VET", "linear_x") ################################################################################ # Constant motion y-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "VET", "linear_y") ################################################################################ # Rotational motion # ~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "VET", "rotor") ################################################################################ # DARTS # ----- # # Constant motion x-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "DARTS", "linear_x") ################################################################################ # Constant motion y-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "DARTS", "linear_y") ################################################################################ # Rotational motion # ~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "DARTS", "rotor") ################################################################################ # Farneback # --------- # # Constant motion x-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "farneback", "linear_x") ################################################################################ # Constant motion y-direction # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "farneback", "linear_y") ################################################################################ # Rotational motion # ~~~~~~~~~~~~~~~~~ plot_optflow_method_convergence(reference_field, "farneback", "rotor") # sphinx_gallery_thumbnail_number = 5 ================================================ FILE: examples/plot_cascade_decomposition.py ================================================ #!/bin/env python """ Cascade decomposition ===================== This example script shows how to compute and plot the cascade decompositon of a single radar precipitation field in pysteps. """ from matplotlib import cm, pyplot as plt import numpy as np import os from pprint import pprint from pysteps.cascade.bandpass_filters import filter_gaussian from pysteps import io, rcparams from pysteps.cascade.decomposition import decomposition_fft from pysteps.utils import conversion, transformation from pysteps.visualization import plot_precip_field ############################################################################### # Read precipitation field # ------------------------ # # First thing, the radar composite is imported and transformed in units # of dB. # Import the example radar composite root_path = rcparams.data_sources["fmi"]["root_path"] filename = os.path.join( root_path, "20160928", "201609281600_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz" ) R, _, metadata = io.import_fmi_pgm(filename, gzipped=True) # Convert to rain rate R, metadata = conversion.to_rainrate(R, metadata) # Nicely print the metadata pprint(metadata) # Plot the rainfall field plot_precip_field(R, geodata=metadata) plt.show() # Log-transform the data R, metadata = transformation.dB_transform(R, metadata, threshold=0.1, zerovalue=-15.0) ############################################################################### # 2D Fourier spectrum # -------------------- # # Compute and plot the 2D Fourier power spectrum of the precipitaton field. # Set Nans as the fill value R[~np.isfinite(R)] = metadata["zerovalue"] # Compute the Fourier transform of the input field F = abs(np.fft.fftshift(np.fft.fft2(R))) # Plot the power spectrum M, N = F.shape fig, ax = plt.subplots() im = ax.imshow( np.log(F**2), vmin=4, vmax=24, cmap=cm.jet, extent=(-N / 2, N / 2, -M / 2, M / 2) ) cb = fig.colorbar(im) ax.set_xlabel("Wavenumber $k_x$") ax.set_ylabel("Wavenumber $k_y$") ax.set_title("Log-power spectrum of R") plt.show() ############################################################################### # Cascade decomposition # --------------------- # # First, construct a set of Gaussian bandpass filters and plot the corresponding # 1D filters. num_cascade_levels = 7 # Construct the Gaussian bandpass filters filter = filter_gaussian(R.shape, num_cascade_levels) # Plot the bandpass filter weights L = max(N, M) fig, ax = plt.subplots() for k in range(num_cascade_levels): ax.semilogx( np.linspace(0, L / 2, len(filter["weights_1d"][k, :])), filter["weights_1d"][k, :], "k-", base=pow(0.5 * L / 3, 1.0 / (num_cascade_levels - 2)), ) ax.set_xlim(1, L / 2) ax.set_ylim(0, 1) xt = np.hstack([[1.0], filter["central_wavenumbers"][1:]]) ax.set_xticks(xt) ax.set_xticklabels(["%.2f" % cf for cf in filter["central_wavenumbers"]]) ax.set_xlabel("Radial wavenumber $|\mathbf{k}|$") ax.set_ylabel("Normalized weight") ax.set_title("Bandpass filter weights") plt.show() ############################################################################### # Finally, apply the 2D Gaussian filters to decompose the radar rainfall field # into a set of cascade levels of decreasing spatial scale and plot them. decomp = decomposition_fft(R, filter, compute_stats=True) # Plot the normalized cascade levels for i in range(num_cascade_levels): mu = decomp["means"][i] sigma = decomp["stds"][i] decomp["cascade_levels"][i] = (decomp["cascade_levels"][i] - mu) / sigma fig, ax = plt.subplots(nrows=2, ncols=4) ax[0, 0].imshow(R, cmap=cm.RdBu_r, vmin=-5, vmax=5) ax[0, 1].imshow(decomp["cascade_levels"][0], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[0, 2].imshow(decomp["cascade_levels"][1], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[0, 3].imshow(decomp["cascade_levels"][2], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[1, 0].imshow(decomp["cascade_levels"][3], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[1, 1].imshow(decomp["cascade_levels"][4], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[1, 2].imshow(decomp["cascade_levels"][5], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[1, 3].imshow(decomp["cascade_levels"][6], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[0, 0].set_title("Observed") ax[0, 1].set_title("Level 1") ax[0, 2].set_title("Level 2") ax[0, 3].set_title("Level 3") ax[1, 0].set_title("Level 4") ax[1, 1].set_title("Level 5") ax[1, 2].set_title("Level 6") ax[1, 3].set_title("Level 7") for i in range(2): for j in range(4): ax[i, j].set_xticks([]) ax[i, j].set_yticks([]) plt.tight_layout() plt.show() # sphinx_gallery_thumbnail_number = 4 ================================================ FILE: examples/plot_custom_precipitation_range.py ================================================ #!/bin/env python """ Plot precipitation using custom colormap ============= This tutorial shows how to plot data using a custom colormap with a specific range of precipitation values. """ import os from datetime import datetime import matplotlib.pyplot as plt import pysteps from pysteps import io, rcparams from pysteps.utils import conversion from pysteps.visualization import plot_precip_field from pysteps.datasets import download_pysteps_data, create_default_pystepsrc ############################################################################### # Download the data if it is not available # ---------------------------------------- # # The following code block downloads datasets from the pysteps-data repository # if it is not available on the disk. The dataset is used to demonstrate the # plotting of precipitation data using a custom colormap. # Check if the pysteps-data repository is available (it would be pysteps-data in pysteps) # Implies that you are running this script from the `pysteps/examples` folder if not os.path.exists(rcparams.data_sources["mrms"]["root_path"]): download_pysteps_data("pysteps_data") config_file_path = create_default_pystepsrc("pysteps_data") print(f"Configuration file has been created at {config_file_path}") ############################################################################### # Read precipitation field # ------------------------ # # First thing, load a frame from Multi-Radar Multi-Sensor dataset and convert it # to precipitation rate in mm/h. # Define the dataset and the date for which you want to load data data_source = pysteps.rcparams.data_sources["mrms"] date = datetime(2019, 6, 10, 0, 2, 0) # Example date # Extract the parameters from the data source root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Find the frame in the archive for the specified date fns = io.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=1 ) # Read the frame from the archive importer = io.get_method(importer_name, "importer") R, _, metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Convert the reflectivity data to rain rate R, metadata = conversion.to_rainrate(R, metadata) # Plot the first rainfall field from the loaded data plt.figure(figsize=(10, 5), dpi=300) plt.axis("off") plot_precip_field(R[0, :, :], geodata=metadata, axis="off") plt.tight_layout() plt.show() ############################################################################### # Define the custom colormap # -------------------------- # # Assume that the default colormap does not represent the precipitation values # in the desired range. In this case, you can define a custom colormap that will # be used to plot the precipitation data and pass the class instance to the # `plot_precip_field` function. # # It essential for the custom colormap to have the following attributes: # # - `cmap`: The colormap object. # - `norm`: The normalization object. # - `clevs`: The color levels for the colormap. # # `plot_precip_field` can handle each of the classes defined in the `matplotlib.colors` # https://matplotlib.org/stable/api/colors_api.html#colormaps # There must be as many colors in the colormap as there are levels in the color levels. # Define the custom colormap from matplotlib import colors class ColormapConfig: def __init__(self): self.cmap = None self.norm = None self.clevs = None self.build_colormap() def build_colormap(self): # Define the colormap boundaries and colors # color_list = ['lightgrey', 'lightskyblue', 'blue', 'yellow', 'orange', 'red', 'darkred'] color_list = ["blue", "navy", "yellow", "orange", "green", "brown", "red"] self.clevs = [0.1, 0.5, 1.5, 2.5, 4, 6, 10] # mm/hr # Create a ListedColormap object with the defined colors self.cmap = colors.ListedColormap(color_list) self.cmap.name = "Custom Colormap" # Set the color for values above the maximum level self.cmap.set_over("darkmagenta") # Set the color for values below the minimum level self.cmap.set_under("none") # Set the color for missing values self.cmap.set_bad("gray", alpha=0.5) # Create a BoundaryNorm object to normalize the data values to the colormap boundaries self.norm = colors.BoundaryNorm(self.clevs, self.cmap.N) # Create an instance of the ColormapConfig class config = ColormapConfig() # Plot the precipitation field using the custom colormap plt.figure(figsize=(10, 5), dpi=300) plt.axis("off") plot_precip_field(R[0, :, :], geodata=metadata, axis="off", colormap_config=config) plt.tight_layout() plt.show() ================================================ FILE: examples/plot_ensemble_verification.py ================================================ #!/bin/env python """ Ensemble verification ===================== In this tutorial we perform a verification of a probabilistic extrapolation nowcast using MeteoSwiss radar data. """ from datetime import datetime import matplotlib.pyplot as plt import numpy as np from pprint import pprint from pysteps import io, nowcasts, rcparams, verification from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.postprocessing import ensemblestats from pysteps.utils import conversion, dimension, transformation from pysteps.visualization import plot_precip_field ############################################################################### # Read precipitation field # ------------------------ # # First, we will import the sequence of MeteoSwiss ("mch") radar composites. # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Selected case date = datetime.strptime("201607112100", "%Y%m%d%H%M") data_source = rcparams.data_sources["mch"] n_ens_members = 20 n_leadtimes = 6 seed = 24 ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # The data are upscaled to 2 km resolution to limit the memory usage and thus # be able to afford a larger number of ensemble members. root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Find the radar files in the archive fns = io.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=2 ) # Read the data from the archive importer = io.get_method(importer_name, "importer") R, _, metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Convert to rain rate R, metadata = conversion.to_rainrate(R, metadata) # Upscale data to 2 km R, metadata = dimension.aggregate_fields_space(R, metadata, 2000) # Plot the rainfall field plot_precip_field(R[-1, :, :], geodata=metadata) plt.show() # Log-transform the data to unit of dBR, set the threshold to 0.1 mm/h, # set the fill value to -15 dBR R, metadata = transformation.dB_transform(R, metadata, threshold=0.1, zerovalue=-15.0) # Set missing values with the fill value R[~np.isfinite(R)] = -15.0 # Nicely print the metadata pprint(metadata) ############################################################################### # Forecast # -------- # # We use the STEPS approach to produce a ensemble nowcast of precipitation fields. # Estimate the motion field V = dense_lucaskanade(R) # Perform the ensemble nowcast with STEPS nowcast_method = nowcasts.get_method("steps") R_f = nowcast_method( R[-3:, :, :], V, n_leadtimes, n_ens_members, n_cascade_levels=6, precip_thr=-10.0, kmperpixel=2.0, timestep=timestep, decomp_method="fft", bandpass_filter_method="gaussian", noise_method="nonparametric", vel_pert_method="bps", mask_method="incremental", seed=seed, ) # Back-transform to rain rates R_f = transformation.dB_transform(R_f, threshold=-10.0, inverse=True)[0] # Plot some of the realizations fig = plt.figure() for i in range(4): ax = fig.add_subplot(221 + i) ax.set_title("Member %02d" % i) plot_precip_field(R_f[i, -1, :, :], geodata=metadata, colorbar=False, axis="off") plt.tight_layout() plt.show() ############################################################################### # Verification # ------------ # # Pysteps includes a number of verification metrics to help users to analyze # the general characteristics of the nowcasts in terms of consistency and # quality (or goodness). # Here, we will verify our probabilistic forecasts using the ROC curve, # reliability diagrams, and rank histograms, as implemented in the verification # module of pysteps. # Find the files containing the verifying observations fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, 0, num_next_files=n_leadtimes, ) # Read the observations R_o, _, metadata_o = io.read_timeseries(fns, importer, **importer_kwargs) # Convert to mm/h R_o, metadata_o = conversion.to_rainrate(R_o, metadata_o) # Upscale data to 2 km R_o, metadata_o = dimension.aggregate_fields_space(R_o, metadata_o, 2000) # Compute the verification for the last lead time # compute the exceedance probability of 0.1 mm/h from the ensemble P_f = ensemblestats.excprob(R_f[:, -1, :, :], 0.1, ignore_nan=True) ############################################################################### # ROC curve # ~~~~~~~~~ roc = verification.ROC_curve_init(0.1, n_prob_thrs=10) verification.ROC_curve_accum(roc, P_f, R_o[-1, :, :]) fig, ax = plt.subplots() verification.plot_ROC(roc, ax, opt_prob_thr=True) ax.set_title("ROC curve (+%i min)" % (n_leadtimes * timestep)) plt.show() ############################################################################### # Reliability diagram # ~~~~~~~~~~~~~~~~~~~ reldiag = verification.reldiag_init(0.1) verification.reldiag_accum(reldiag, P_f, R_o[-1, :, :]) fig, ax = plt.subplots() verification.plot_reldiag(reldiag, ax) ax.set_title("Reliability diagram (+%i min)" % (n_leadtimes * timestep)) plt.show() ############################################################################### # Rank histogram # ~~~~~~~~~~~~~~ rankhist = verification.rankhist_init(R_f.shape[0], 0.1) verification.rankhist_accum(rankhist, R_f[:, -1, :, :], R_o[-1, :, :]) fig, ax = plt.subplots() verification.plot_rankhist(rankhist, ax) ax.set_title("Rank histogram (+%i min)" % (n_leadtimes * timestep)) plt.show() # sphinx_gallery_thumbnail_number = 5 ================================================ FILE: examples/plot_extrapolation_nowcast.py ================================================ #!/bin/env python """ Extrapolation nowcast ===================== This tutorial shows how to compute and plot an extrapolation nowcast using Finnish radar data. """ from datetime import datetime import matplotlib.pyplot as plt import numpy as np from pprint import pprint from pysteps import io, motion, nowcasts, rcparams, verification from pysteps.utils import conversion, transformation from pysteps.visualization import plot_precip_field, quiver ############################################################################### # Read the radar input images # --------------------------- # # First, we will import the sequence of radar composites. # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Selected case date = datetime.strptime("201609281600", "%Y%m%d%H%M") data_source = rcparams.data_sources["fmi"] n_leadtimes = 12 ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Find the input files from the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=2 ) # Read the radar composites importer = io.get_method(importer_name, "importer") Z, _, metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Convert to rain rate R, metadata = conversion.to_rainrate(Z, metadata) # Plot the rainfall field plot_precip_field(R[-1, :, :], geodata=metadata) plt.show() # Store the last frame for plotting it later later R_ = R[-1, :, :].copy() # Log-transform the data to unit of dBR, set the threshold to 0.1 mm/h, # set the fill value to -15 dBR R, metadata = transformation.dB_transform(R, metadata, threshold=0.1, zerovalue=-15.0) # Nicely print the metadata pprint(metadata) ############################################################################### # Compute the nowcast # ------------------- # # The extrapolation nowcast is based on the estimation of the motion field, # which is here performed using a local tracking approach (Lucas-Kanade). # The most recent radar rainfall field is then simply advected along this motion # field in oder to produce an extrapolation forecast. # Estimate the motion field with Lucas-Kanade oflow_method = motion.get_method("LK") V = oflow_method(R[-3:, :, :]) # Extrapolate the last radar observation extrapolate = nowcasts.get_method("extrapolation") R[~np.isfinite(R)] = metadata["zerovalue"] R_f = extrapolate(R[-1, :, :], V, n_leadtimes) # Back-transform to rain rate R_f = transformation.dB_transform(R_f, threshold=-10.0, inverse=True)[0] # Plot the motion field plot_precip_field(R_, geodata=metadata) quiver(V, geodata=metadata, step=50) plt.show() ############################################################################### # Verify with FSS # --------------- # # The fractions skill score (FSS) provides an intuitive assessment of the # dependency of skill on spatial scale and intensity, which makes it an ideal # skill score for high-resolution precipitation forecasts. # Find observations in the data archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=0, num_next_files=n_leadtimes, ) # Read the radar composites R_o, _, metadata_o = io.read_timeseries(fns, importer, **importer_kwargs) R_o, metadata_o = conversion.to_rainrate(R_o, metadata_o, 223.0, 1.53) # Compute fractions skill score (FSS) for all lead times, a set of scales and 1 mm/h fss = verification.get_method("FSS") scales = [2, 4, 8, 16, 32, 64, 128, 256, 512] thr = 1.0 score = [] for i in range(n_leadtimes): score_ = [] for scale in scales: score_.append(fss(R_f[i, :, :], R_o[i + 1, :, :], thr, scale)) score.append(score_) plt.figure() x = np.arange(1, n_leadtimes + 1) * timestep plt.plot(x, score) plt.legend(scales, title="Scale [km]") plt.xlabel("Lead time [min]") plt.ylabel("FSS ( > 1.0 mm/h ) ") plt.title("Fractions skill score") plt.show() # sphinx_gallery_thumbnail_number = 3 ================================================ FILE: examples/plot_linear_blending.py ================================================ # -*- coding: utf-8 -*- """ Linear blending =============== This tutorial shows how to construct a simple linear blending between a STEPS ensemble nowcast and a Numerical Weather Prediction (NWP) rainfall forecast. The used datasets are from the Bureau of Meteorology, Australia. """ import os from datetime import datetime from matplotlib import pyplot as plt import pysteps from pysteps import io, rcparams, nowcasts, blending from pysteps.utils import conversion from pysteps.visualization import plot_precip_field ################################################################################ # Read the radar images and the NWP forecast # ------------------------------------------ # # First, we import a sequence of 3 images of 10-minute radar composites # and the corresponding NWP rainfall forecast that was available at that time. # # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Additionally, the pysteps-nwp-importers plugin needs to be installed, see # https://github.com/pySTEPS/pysteps-nwp-importers. # Selected case date_radar = datetime.strptime("202010310400", "%Y%m%d%H%M") # The last NWP forecast was issued at 00:00 date_nwp = datetime.strptime("202010310000", "%Y%m%d%H%M") radar_data_source = rcparams.data_sources["bom"] nwp_data_source = rcparams.data_sources["bom_nwp"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = radar_data_source["root_path"] path_fmt = "prcp-c10/66/%Y/%m/%d" fn_pattern = "66_%Y%m%d_%H%M00.prcp-c10" fn_ext = radar_data_source["fn_ext"] importer_name = radar_data_source["importer"] importer_kwargs = radar_data_source["importer_kwargs"] timestep = 10.0 # Find the radar files in the archive fns = io.find_by_date( date_radar, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=2 ) # Read the radar composites importer = io.get_method(importer_name, "importer") radar_precip, _, radar_metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Import the NWP data filename = os.path.join( nwp_data_source["root_path"], datetime.strftime(date_nwp, nwp_data_source["path_fmt"]), datetime.strftime(date_nwp, nwp_data_source["fn_pattern"]) + "." + nwp_data_source["fn_ext"], ) nwp_importer = io.get_method("bom_nwp", "importer") nwp_precip, _, nwp_metadata = nwp_importer(filename) # Only keep the NWP forecasts from the last radar observation time (2020-10-31 04:00) # End of the forecast is 18 time steps (+3 hours) in advance. precip_nwp = nwp_precip[24:43, :, :] ################################################################################ # Pre-processing steps # -------------------- # Make sure the units are in mm/h converter = pysteps.utils.get_method("mm/h") radar_precip, radar_metadata = converter(radar_precip, radar_metadata) precip_nwp, nwp_metadata = converter(precip_nwp, nwp_metadata) # Threshold the data radar_precip[radar_precip < 0.1] = 0.0 precip_nwp[precip_nwp < 0.1] = 0.0 # Plot the radar rainfall field and the first time step of the NWP forecast. # For the initial time step (t=0), the NWP rainfall forecast is not that different # from the observed radar rainfall, but it misses some of the locations and # shapes of the observed rainfall fields. Therefore, the NWP rainfall forecast will # initially get a low weight in the blending process. date_str = datetime.strftime(date_radar, "%Y-%m-%d %H:%M") plt.figure(figsize=(10, 5)) plt.subplot(121) plot_precip_field( radar_precip[-1, :, :], geodata=radar_metadata, title=f"Radar observation at {date_str}", ) plt.subplot(122) plot_precip_field( precip_nwp[0, :, :], geodata=nwp_metadata, title=f"NWP forecast at {date_str}" ) plt.tight_layout() plt.show() # Only keep the NWP forecasts from 2020-10-31 04:05 onwards, because the first # forecast lead time starts at 04:05. precip_nwp = precip_nwp[1:] # Transform the radar data to dB - this transformation is useful for the motion # field estimation and the subsequent nowcasts. The NWP forecast is not # transformed, because the linear blending code sets everything back in mm/h # after the nowcast. transformer = pysteps.utils.get_method("dB") radar_precip, radar_metadata = transformer(radar_precip, radar_metadata, threshold=0.1) ################################################################################ # Determine the velocity field for the radar rainfall nowcast # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ oflow_method = pysteps.motion.get_method("lucaskanade") velocity_radar = oflow_method(radar_precip) ################################################################################ # The linear blending of nowcast and NWP rainfall forecast # -------------------------------------------------------- # Calculate the blended precipitation field precip_blended = blending.linear_blending.forecast( precip=radar_precip[-1, :, :], precip_metadata=radar_metadata, velocity=velocity_radar, timesteps=18, timestep=10, nowcast_method="extrapolation", # simple advection nowcast precip_nwp=precip_nwp, precip_nwp_metadata=nwp_metadata, start_blending=60, # in minutes (this is an arbritrary choice) end_blending=120, # in minutes (this is an arbritrary choice) ) ################################################################################ # The salient blending of nowcast and NWP rainfall forecast # --------------------------------------------------------- # # This method follows the saliency-based blending procedure described in :cite:`Hwang2015`. The # blending is based on intensities and forecast times. The blended product preserves pixel # intensities with time if they are strong enough based on their ranked salience. Saliency is # the property of an object to be outstanding with respect to its surroundings. The ranked salience # is calculated by first determining the difference in the normalized intensity of the nowcasts # and NWP. Next, the pixel intensities are ranked, in which equally comparable values receive # the same ranking number. # Calculate the salient blended precipitation field precip_salient_blended = blending.linear_blending.forecast( precip=radar_precip[-1, :, :], precip_metadata=radar_metadata, velocity=velocity_radar, timesteps=18, timestep=10, nowcast_method="extrapolation", # simple advection nowcast precip_nwp=precip_nwp, precip_nwp_metadata=nwp_metadata, start_blending=60, # in minutes (this is an arbritrary choice) end_blending=120, # in minutes (this is an arbritrary choice) saliency=True, ) ################################################################################ # Visualize the output # -------------------- ################################################################################ # Calculate the radar rainfall nowcasts for visualization nowcast_method_func = nowcasts.get_method("extrapolation") precip_nowcast = nowcast_method_func( precip=radar_precip[-1, :, :], velocity=velocity_radar, timesteps=18, ) # Make sure that precip_nowcast are in mm/h precip_nowcast, _ = conversion.to_rainrate(precip_nowcast, metadata=radar_metadata) ################################################################################ # The linear blending starts at 60 min, so during the first 60 minutes the # blended forecast only consists of the extrapolation forecast (consisting of an # extrapolation nowcast). Between 60 and 120 min, the NWP forecast gradually gets more # weight, whereas the extrapolation forecasts gradually gets less weight. In addition, # the saliency-based blending takes also the difference in pixel intensities into account, # which are preserved over time if they are strong enough based on their ranked salience. # Furthermore, pixels with relative low intensities get a lower weight and stay smaller in # the saliency-based blending compared to linear blending. After 120 min, the blended # forecast entirely consists of the NWP rainfall forecast. fig = plt.figure(figsize=(8, 12)) leadtimes_min = [30, 60, 80, 100, 120] n_leadtimes = len(leadtimes_min) for n, leadtime in enumerate(leadtimes_min): # Extrapolation plt.subplot(n_leadtimes, 4, n * 4 + 1) plot_precip_field( precip_nowcast[int(leadtime / timestep) - 1, :, :], geodata=radar_metadata, title=f"Nowcast + {leadtime} min", axis="off", colorbar=False, ) # Nowcast with blending into NWP plt.subplot(n_leadtimes, 4, n * 4 + 2) plot_precip_field( precip_blended[int(leadtime / timestep) - 1, :, :], geodata=radar_metadata, title=f"Linear + {leadtime} min", axis="off", colorbar=False, ) # Nowcast with salient blending into NWP plt.subplot(n_leadtimes, 4, n * 4 + 3) plot_precip_field( precip_salient_blended[int(leadtime / timestep) - 1, :, :], geodata=radar_metadata, title=f"Salient + {leadtime} min", axis="off", colorbar=False, ) # Raw NWP forecast plt.subplot(n_leadtimes, 4, n * 4 + 4) plot_precip_field( precip_nwp[int(leadtime / timestep) - 1, :, :], geodata=nwp_metadata, title=f"NWP + {leadtime} min", axis="off", colorbar=False, ) plt.tight_layout() plt.show() ################################################################################ # Note that the NaN values of the extrapolation forecast are replaced with NWP data # in the blended forecast, even before the blending starts. ================================================ FILE: examples/plot_noise_generators.py ================================================ #!/bin/env python """ Generation of stochastic noise ============================== This example script shows how to run the stochastic noise field generators included in pysteps. These noise fields are used as perturbation terms during an extrapolation nowcast in order to represent the uncertainty in the evolution of the rainfall field. """ from matplotlib import cm, pyplot as plt import numpy as np import os from pprint import pprint from pysteps import io, rcparams from pysteps.noise.fftgenerators import initialize_param_2d_fft_filter from pysteps.noise.fftgenerators import initialize_nonparam_2d_fft_filter from pysteps.noise.fftgenerators import generate_noise_2d_fft_filter from pysteps.utils import conversion, rapsd, transformation from pysteps.visualization import plot_precip_field, plot_spectrum1d ############################################################################### # Read precipitation field # ------------------------ # # First thing, the radar composite is imported and transformed in units # of dB. # This image will be used to train the Fourier filters that are necessary to # produce the fields of spatially correlated noise. # Import the example radar composite root_path = rcparams.data_sources["mch"]["root_path"] filename = os.path.join(root_path, "20160711", "AQC161932100V_00005.801.gif") R, _, metadata = io.import_mch_gif(filename, product="AQC", unit="mm", accutime=5.0) # Convert to mm/h R, metadata = conversion.to_rainrate(R, metadata) # Nicely print the metadata pprint(metadata) # Plot the rainfall field plot_precip_field(R, geodata=metadata) plt.show() # Log-transform the data R, metadata = transformation.dB_transform(R, metadata, threshold=0.1, zerovalue=-15.0) # Assign the fill value to all the Nans R[~np.isfinite(R)] = metadata["zerovalue"] ############################################################################### # Parametric filter # ----------------- # # In the parametric approach, a power-law model is used to approximate the power # spectral density (PSD) of a given rainfall field. # # The parametric model uses a piece-wise linear function with two spectral # slopes (beta1 and beta2) and one breaking point # Fit the parametric PSD to the observation Fp = initialize_param_2d_fft_filter(R) # Compute the observed and fitted 1D PSD L = np.max(Fp["input_shape"]) if L % 2 == 1: wn = np.arange(0, int(L / 2) + 1) else: wn = np.arange(0, int(L / 2)) R_, freq = rapsd(R, fft_method=np.fft, return_freq=True) f = np.exp(Fp["model"](np.log(wn), *Fp["pars"])) # Extract the scaling break in km, beta1 and beta2 w0 = L / np.exp(Fp["pars"][0]) b1 = Fp["pars"][2] b2 = Fp["pars"][3] # Plot the observed power spectrum and the model fig, ax = plt.subplots() plot_scales = [512, 256, 128, 64, 32, 16, 8, 4] plot_spectrum1d( freq, R_, x_units="km", y_units="dBR", color="k", ax=ax, label="Observed", wavelength_ticks=plot_scales, ) plot_spectrum1d( freq, f, x_units="km", y_units="dBR", color="r", ax=ax, label="Fit", wavelength_ticks=plot_scales, ) plt.legend() ax.set_title( "Radially averaged log-power spectrum of R\n" r"$\omega_0=%.0f km, \beta_1=%.1f, \beta_2=%.1f$" % (w0, b1, b2) ) plt.show() ############################################################################### # Nonparametric filter # -------------------- # # In the nonparametric approach, the Fourier filter is obtained directly # from the power spectrum of the observed precipitation field R. Fnp = initialize_nonparam_2d_fft_filter(R) ############################################################################### # Noise generator # --------------- # # The parametric and nonparametric filters obtained above can now be used # to produce N realizations of random fields of prescribed power spectrum, # hence with the same correlation structure as the initial rainfall field. seed = 42 num_realizations = 3 # Generate noise Np = [] Nnp = [] for k in range(num_realizations): Np.append(generate_noise_2d_fft_filter(Fp, seed=seed + k)) Nnp.append(generate_noise_2d_fft_filter(Fnp, seed=seed + k)) # Plot the generated noise fields fig, ax = plt.subplots(nrows=2, ncols=3) # parametric noise ax[0, 0].imshow(Np[0], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[0, 1].imshow(Np[1], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[0, 2].imshow(Np[2], cmap=cm.RdBu_r, vmin=-3, vmax=3) # nonparametric noise ax[1, 0].imshow(Nnp[0], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[1, 1].imshow(Nnp[1], cmap=cm.RdBu_r, vmin=-3, vmax=3) ax[1, 2].imshow(Nnp[2], cmap=cm.RdBu_r, vmin=-3, vmax=3) for i in range(2): for j in range(3): ax[i, j].set_xticks([]) ax[i, j].set_yticks([]) plt.tight_layout() plt.show() ############################################################################### # The above figure highlights the main limitation of the parametric approach # (top row), that is, the assumption of an isotropic power law scaling # relationship, meaning that anisotropic structures such as rainfall bands # cannot be represented. # # Instead, the nonparametric approach (bottom row) allows generating # perturbation fields with anisotropic structures, but it also requires a # larger sample size and is sensitive to the quality of the input data, e.g. # the presence of residual clutter in the radar image. # # In addition, both techniques assume spatial stationarity of the covariance # structure of the field. # sphinx_gallery_thumbnail_number = 3 ================================================ FILE: examples/plot_optical_flow.py ================================================ """ Optical flow ============ This tutorial offers a short overview of the optical flow routines available in pysteps and it will cover how to compute and plot the motion field from a sequence of radar images. """ from datetime import datetime from pprint import pprint import matplotlib.pyplot as plt import numpy as np from pysteps import io, motion, rcparams from pysteps.utils import conversion, transformation from pysteps.visualization import plot_precip_field, quiver ################################################################################ # Read the radar input images # --------------------------- # # First, we will import the sequence of radar composites. # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Selected case date = datetime.strptime("201505151630", "%Y%m%d%H%M") data_source = rcparams.data_sources["mch"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Find the input files from the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=5, num_prev_files=9 ) # Read the radar composites importer = io.get_method(importer_name, "importer") R, quality, metadata = io.read_timeseries(fns, importer, **importer_kwargs) del quality # Not used ############################################################################### # Preprocess the data # ~~~~~~~~~~~~~~~~~~~ # Convert to mm/h R, metadata = conversion.to_rainrate(R, metadata) # Store the reference frame R_ = R[-1, :, :].copy() # Log-transform the data [dBR] R, metadata = transformation.dB_transform(R, metadata, threshold=0.1, zerovalue=-15.0) # Nicely print the metadata pprint(metadata) ################################################################################ # Lucas-Kanade (LK) # ----------------- # # The Lucas-Kanade optical flow method implemented in pysteps is a local # tracking approach that relies on the OpenCV package. # Local features are tracked in a sequence of two or more radar images. The # scheme includes a final interpolation step in order to produce a smooth # field of motion vectors. oflow_method = motion.get_method("LK") V1 = oflow_method(R[-3:, :, :]) # Plot the motion field on top of the reference frame plot_precip_field(R_, geodata=metadata, title="LK") quiver(V1, geodata=metadata, step=25) plt.show() ################################################################################ # Variational echo tracking (VET) # ------------------------------- # # This module implements the VET algorithm presented # by Laroche and Zawadzki (1995) and used in the McGill Algorithm for # Prediction by Lagrangian Extrapolation (MAPLE) described in # Germann and Zawadzki (2002). # The approach essentially consists of a global optimization routine that seeks # at minimizing a cost function between the displaced and the reference image. oflow_method = motion.get_method("VET") V2 = oflow_method(R[-3:, :, :]) # Plot the motion field plot_precip_field(R_, geodata=metadata, title="VET") quiver(V2, geodata=metadata, step=25) plt.show() ################################################################################ # Dynamic and adaptive radar tracking of storms (DARTS) # ----------------------------------------------------- # # DARTS uses a spectral approach to optical flow that is based on the discrete # Fourier transform (DFT) of a temporal sequence of radar fields. # The level of truncation of the DFT coefficients controls the degree of # smoothness of the estimated motion field, allowing for an efficient # motion estimation. DARTS requires a longer sequence of radar fields for # estimating the motion, here we are going to use all the available 10 fields. oflow_method = motion.get_method("DARTS") R[~np.isfinite(R)] = metadata["zerovalue"] V3 = oflow_method(R) # needs longer training sequence # Plot the motion field plot_precip_field(R_, geodata=metadata, title="DARTS") quiver(V3, geodata=metadata, step=25) plt.show() ################################################################################ # Anisotropic diffusion method (Proesmans et al 1994) # --------------------------------------------------- # # This module implements the anisotropic diffusion method presented in Proesmans # et al. (1994), a robust optical flow technique which employs the notion of # inconsistency during the solution of the optical flow equations. oflow_method = motion.get_method("proesmans") R[~np.isfinite(R)] = metadata["zerovalue"] V4 = oflow_method(R[-2:, :, :]) # Plot the motion field plot_precip_field(R_, geodata=metadata, title="Proesmans") quiver(V4, geodata=metadata, step=25) plt.show() ################################################################################ # Farnebäck smoothed method # ------------------------- # # This module implements the pyramidal decomposition method for motion estimation # of Farnebäck as implemented in OpenCV, with an option for smoothing and # renormalization of the motion fields proposed by Driedger et al.: # https://cmosarchives.ca/Congress_P_A/program_abstracts2022.pdf (p. 392). oflow_method = motion.get_method("farneback") R[~np.isfinite(R)] = metadata["zerovalue"] V5 = oflow_method(R[-2:, :, :], verbose=True) # Plot the motion field plot_precip_field(R_, geodata=metadata, title="Farneback") quiver(V5, geodata=metadata, step=25) plt.show() # sphinx_gallery_thumbnail_number = 1 ================================================ FILE: examples/plot_steps_nowcast.py ================================================ #!/bin/env python """ STEPS nowcast ============= This tutorial shows how to compute and plot an ensemble nowcast using Swiss radar data. """ import matplotlib.pyplot as plt import numpy as np from datetime import datetime from pprint import pprint from pysteps import io, nowcasts, rcparams from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.postprocessing.ensemblestats import excprob from pysteps.utils import conversion, dimension, transformation from pysteps.visualization import plot_precip_field # Set nowcast parameters n_ens_members = 20 n_leadtimes = 6 seed = 24 ############################################################################### # Read precipitation field # ------------------------ # # First thing, the sequence of Swiss radar composites is imported, converted and # transformed into units of dBR. date = datetime.strptime("201701311200", "%Y%m%d%H%M") data_source = "mch" # Load data source config root_path = rcparams.data_sources[data_source]["root_path"] path_fmt = rcparams.data_sources[data_source]["path_fmt"] fn_pattern = rcparams.data_sources[data_source]["fn_pattern"] fn_ext = rcparams.data_sources[data_source]["fn_ext"] importer_name = rcparams.data_sources[data_source]["importer"] importer_kwargs = rcparams.data_sources[data_source]["importer_kwargs"] timestep = rcparams.data_sources[data_source]["timestep"] # Find the radar files in the archive fns = io.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=2 ) # Read the data from the archive importer = io.get_method(importer_name, "importer") R, _, metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Convert to rain rate R, metadata = conversion.to_rainrate(R, metadata) # Upscale data to 2 km to limit memory usage R, metadata = dimension.aggregate_fields_space(R, metadata, 2000) # Plot the rainfall field plot_precip_field(R[-1, :, :], geodata=metadata) plt.show() # Log-transform the data to unit of dBR, set the threshold to 0.1 mm/h, # set the fill value to -15 dBR R, metadata = transformation.dB_transform(R, metadata, threshold=0.1, zerovalue=-15.0) # Set missing values with the fill value R[~np.isfinite(R)] = -15.0 # Nicely print the metadata pprint(metadata) ############################################################################### # Deterministic nowcast with S-PROG # --------------------------------- # # First, the motiong field is estimated using a local tracking approach based # on the Lucas-Kanade optical flow. # The motion field can then be used to generate a deterministic nowcast with # the S-PROG model, which implements a scale filtering appraoch in order to # progressively remove the unpredictable spatial scales during the forecast. # Estimate the motion field V = dense_lucaskanade(R) # The S-PROG nowcast nowcast_method = nowcasts.get_method("sprog") R_f = nowcast_method( R[-3:, :, :], V, n_leadtimes, n_cascade_levels=6, precip_thr=-10.0, ) # Back-transform to rain rate R_f = transformation.dB_transform(R_f, threshold=-10.0, inverse=True)[0] # Plot the S-PROG forecast plot_precip_field( R_f[-1, :, :], geodata=metadata, title="S-PROG (+ %i min)" % (n_leadtimes * timestep), ) plt.show() ############################################################################### # As we can see from the figure above, the forecast produced by S-PROG is a # smooth field. In other words, the forecast variance is lower than the # variance of the original observed field. # However, certain applications demand that the forecast retain the same # statistical properties of the observations. In such cases, the S-PROG # forecasts are of limited use and a stochatic approach might be of more # interest. ############################################################################### # Stochastic nowcast with STEPS # ----------------------------- # # The S-PROG approach is extended to include a stochastic term which represents # the variance associated to the unpredictable development of precipitation. This # approach is known as STEPS (short-term ensemble prediction system). # The STEPS nowcast nowcast_method = nowcasts.get_method("steps") R_f = nowcast_method( R[-3:, :, :], V, n_leadtimes, n_ens_members, n_cascade_levels=6, precip_thr=-10.0, kmperpixel=2.0, timestep=timestep, noise_method="nonparametric", vel_pert_method="bps", mask_method="incremental", seed=seed, ) # Back-transform to rain rates R_f = transformation.dB_transform(R_f, threshold=-10.0, inverse=True)[0] # Plot the ensemble mean R_f_mean = np.mean(R_f[:, -1, :, :], axis=0) plot_precip_field( R_f_mean, geodata=metadata, title="Ensemble mean (+ %i min)" % (n_leadtimes * timestep), ) plt.show() ############################################################################### # The mean of the ensemble displays similar properties as the S-PROG # forecast seen above, although the degree of smoothing also depends on # the ensemble size. In this sense, the S-PROG forecast can be seen as # the mean of an ensemble of infinite size. # Plot some of the realizations fig = plt.figure() for i in range(4): ax = fig.add_subplot(221 + i) ax = plot_precip_field( R_f[i, -1, :, :], geodata=metadata, colorbar=False, axis="off" ) ax.set_title("Member %02d" % i) plt.tight_layout() plt.show() ############################################################################### # As we can see from these two members of the ensemble, the stochastic forecast # mantains the same variance as in the observed rainfall field. # STEPS also includes a stochatic perturbation of the motion field in order # to quantify the its uncertainty. ############################################################################### # Finally, it is possible to derive probabilities from our ensemble forecast. # Compute exceedence probabilities for a 0.5 mm/h threshold P = excprob(R_f[:, -1, :, :], 0.5) # Plot the field of probabilities plot_precip_field( P, geodata=metadata, ptype="prob", units="mm/h", probthr=0.5, title="Exceedence probability (+ %i min)" % (n_leadtimes * timestep), ) plt.show() # sphinx_gallery_thumbnail_number = 5 ================================================ FILE: examples/probability_forecast.py ================================================ #!/bin/env python """ Probability forecasts ===================== This example script shows how to forecast the probability of exceeding an intensity threshold. The method is based on the local Lagrangian approach described in Germann and Zawadzki (2004). """ import matplotlib.pyplot as plt import numpy as np from pysteps.nowcasts.lagrangian_probability import forecast from pysteps.visualization import plot_precip_field ############################################################################### # Numerical example # ----------------- # # First, we use some dummy data to show the basic principle of this approach. # The probability forecast is produced by sampling a spatial neighborhood that is # increased as a function of lead time. As a result, the edges of # the yellow square becomes more and more smooth as t increases. This represents # the strong loss of predictability with lead time of any extrapolation nowcast. # parameters precip = np.zeros((100, 100)) precip[10:50, 10:50] = 1 velocity = np.ones((2, *precip.shape)) timesteps = [0, 2, 6, 12] thr = 0.5 slope = 1 # pixels / timestep # compute probability forecast out = forecast(precip, velocity, timesteps, thr, slope=slope) # plot for n, frame in enumerate(out): plt.subplot(2, 2, n + 1) plt.imshow(frame, interpolation="nearest", vmin=0, vmax=1) plt.title(f"t={timesteps[n]}") plt.xticks([]) plt.yticks([]) plt.show() ############################################################################### # Real-data example # ----------------- # # We now apply the same method to real data. We use a slope of 1 km / minute # as suggested by Germann and Zawadzki (2004), meaning that after 30 minutes, # the probabilities are computed by using all pixels within a neighborhood of 30 # kilometers. from datetime import datetime from pysteps import io, rcparams, utils from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.verification import reldiag_init, reldiag_accum, plot_reldiag # data source source = rcparams.data_sources["mch"] root = rcparams.data_sources["mch"]["root_path"] fmt = rcparams.data_sources["mch"]["path_fmt"] pattern = rcparams.data_sources["mch"]["fn_pattern"] ext = rcparams.data_sources["mch"]["fn_ext"] timestep = rcparams.data_sources["mch"]["timestep"] importer_name = rcparams.data_sources["mch"]["importer"] importer_kwargs = rcparams.data_sources["mch"]["importer_kwargs"] # read precip field date = datetime.strptime("201607112100", "%Y%m%d%H%M") fns = io.find_by_date(date, root, fmt, pattern, ext, timestep, num_prev_files=2) importer = io.get_method(importer_name, "importer") precip, __, metadata = io.read_timeseries(fns, importer, **importer_kwargs) precip, metadata = utils.to_rainrate(precip, metadata) # precip[np.isnan(precip)] = 0 # motion motion = dense_lucaskanade(precip) # parameters nleadtimes = 6 thr = 1 # mm / h slope = 1 * timestep # km / min # compute probability forecast extrap_kwargs = dict(allow_nonfinite_values=True) fct = forecast( precip[-1], motion, nleadtimes, thr, slope=slope, extrap_kwargs=extrap_kwargs ) # plot for n, frame in enumerate(fct): plt.subplot(2, 3, n + 1) plt.imshow(frame, interpolation="nearest", vmin=0, vmax=1) plt.xticks([]) plt.yticks([]) plt.show() ################################################################################ # Let's plot one single leadtime in more detail using the pysteps visualization # functionality. plt.close() # Plot the field of probabilities plot_precip_field( fct[2], geodata=metadata, ptype="prob", probthr=thr, title="Exceedence probability (+ %i min)" % (nleadtimes * timestep), ) plt.show() ############################################################################### # Verification # ------------ # verifying observations importer = io.get_method(importer_name, "importer") fns = io.find_by_date( date, root, fmt, pattern, ext, timestep, num_next_files=nleadtimes ) obs, __, metadata = io.read_timeseries(fns, importer, **importer_kwargs) obs, metadata = utils.to_rainrate(obs, metadata) obs[np.isnan(obs)] = 0 # reliability diagram reldiag = reldiag_init(thr) reldiag_accum(reldiag, fct, obs[1:]) fig, ax = plt.subplots() plot_reldiag(reldiag, ax) ax.set_title("Reliability diagram") plt.show() ############################################################################### # References # ---------- # Germann, U. and I. Zawadzki, 2004: # Scale Dependence of the Predictability of Precipitation from Continental # Radar Images. Part II: Probability Forecasts. # Journal of Applied Meteorology, 43(1), 74-89. # sphinx_gallery_thumbnail_number = 3 ================================================ FILE: examples/rainfarm_downscale.py ================================================ #!/bin/env python """ Precipitation downscaling with RainFARM ======================================= This example script shows how to use the stochastic downscaling method RainFARM available in pysteps. RainFARM is a downscaling algorithm for rainfall fields developed by Rebora et al. (2006). The method can represent the realistic small-scale variability of the downscaled precipitation field by means of Gaussian random fields. Steps: 1. Read the input precipitation data. 2. Upscale the precipitation field. 3. Downscale the field to its original resolution using RainFARM with defaults. 4. Downscale with smoothing. 5. Downscale with spectral fusion. 6. Downscale with smoothing and spectral fusion. References: Rebora, N., L. Ferraris, J. von Hardenberg, and A. Provenzale, 2006: RainFARM: Rainfall downscaling by a filtered autoregressive model. J. Hydrometeor., 7, 724–738. D D'Onofrio, E Palazzi, J von Hardenberg, A Provenzale, and S Calmanti, 2014: Stochastic rainfall downscaling of climate models. J. Hydrometeorol., 15(2):830–843. """ import matplotlib.pyplot as plt import numpy as np import os from pprint import pprint import logging from pysteps import io, rcparams from pysteps.utils import aggregate_fields_space, square_domain, to_rainrate from pysteps.downscaling import rainfarm from pysteps.visualization import plot_precip_field # Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) ############################################################################### # Read the input data # ------------------- # # As first step, we need to import the precipitation field that we are going # to use in this example. def read_precipitation_data(file_path): """Read and process precipitation data from a file.""" precip, _, metadata = io.import_mch_gif( file_path, product="AQC", unit="mm", accutime=5.0 ) precip, metadata = to_rainrate(precip, metadata) precip, metadata = square_domain(precip, metadata, "crop") return precip, metadata # Import the example radar composite root_path = rcparams.data_sources["mch"]["root_path"] filename = os.path.join(root_path, "20160711", "AQC161932100V_00005.801.gif") # Read and process data precip, metadata = read_precipitation_data(filename) # Nicely print the metadata pprint(metadata) # Plot the original rainfall field plot_precip_field(precip, geodata=metadata) plt.title("Original Rainfall Field") plt.show() # Assign the fill value to all the Nans precip[~np.isfinite(precip)] = metadata["zerovalue"] ############################################################################### # Upscale the field # ----------------- # # To test our downscaling method, we first need to upscale the original field to # a lower resolution. This is only for demo purposes, as we need to artificially # create a lower resolution field to apply our downscaling method. # We are going to use a factor of 16 x. def upscale_field(precip, metadata, scale_factor): """Upscale the precipitation field by a given scale factor.""" upscaled_resolution = metadata["xpixelsize"] * scale_factor precip_lr, metadata_lr = aggregate_fields_space( precip, metadata, upscaled_resolution ) return precip_lr, metadata_lr scale_factor = 16 precip_lr, metadata_lr = upscale_field(precip, metadata, scale_factor) # Plot the upscaled rainfall field plt.figure() plot_precip_field(precip_lr, geodata=metadata_lr) plt.title("Upscaled Rainfall Field") plt.show() ############################################################################### # Downscale the field # ------------------- # # We can now use RainFARM to downscale the precipitation field. # Basic downscaling precip_hr = rainfarm.downscale(precip_lr, ds_factor=scale_factor) # Plot the downscaled rainfall field plt.figure() plot_precip_field(precip_hr, geodata=metadata) plt.title("Downscaled Rainfall Field") plt.show() ############################################################################### # Downscale with smoothing # ------------------------ # # Add smoothing with a Gaussian kernel during the downscaling process. precip_hr_smooth = rainfarm.downscale( precip_lr, ds_factor=scale_factor, kernel_type="gaussian" ) # Plot the downscaled rainfall field with smoothing plt.figure() plot_precip_field(precip_hr_smooth, geodata=metadata) plt.title("Downscaled Rainfall Field with Gaussian Smoothing") plt.show() ############################################################################### # Downscale with spectral fusion # ------------------------------ # # Apply spectral merging as described in D'Onofrio et al. (2014). precip_hr_fusion = rainfarm.downscale( precip_lr, ds_factor=scale_factor, spectral_fusion=True ) # Plot the downscaled rainfall field with spectral fusion plt.figure() plot_precip_field(precip_hr_fusion, geodata=metadata) plt.title("Downscaled Rainfall Field with Spectral Fusion") plt.show() ############################################################################### # Combined Downscale with smoothing and spectral fusion # ----------------------------------------------------- # # Apply both smoothing with a Gaussian kernel and spectral fusion during the # downscaling process to observe the combined effect. precip_hr_combined = rainfarm.downscale( precip_lr, ds_factor=scale_factor, kernel_type="gaussian", spectral_fusion=True ) # Plot the downscaled rainfall field with smoothing and spectral fusion plt.figure() plot_precip_field(precip_hr_combined, geodata=metadata) plt.title("Downscaled Rainfall Field with Gaussian Smoothing and Spectral Fusion") plt.show() ############################################################################### # Remarks # ------- # # Currently, the pysteps implementation of RainFARM only covers spatial downscaling. # That is, it can improve the spatial resolution of a rainfall field. However, unlike # the original algorithm from Rebora et al. (2006), it cannot downscale the temporal # dimension. # sphinx_gallery_thumbnail_number = 2 ================================================ FILE: examples/steps_blended_forecast.py ================================================ # -*- coding: utf-8 -*- """ Blended forecast ==================== This tutorial shows how to construct a blended forecast from an ensemble nowcast using the STEPS approach and a Numerical Weather Prediction (NWP) rainfall forecast. The used datasets are from the Bureau of Meteorology, Australia. """ import os from datetime import datetime import numpy as np from matplotlib import pyplot as plt import pysteps from pysteps import io, rcparams, blending, nowcasts from pysteps.visualization import plot_precip_field ################################################################################ # Read the radar images and the NWP forecast # ------------------------------------------ # # First, we import a sequence of 3 images of 10-minute radar composites # and the corresponding NWP rainfall forecast that was available at that time. # # You need the pysteps-data archive downloaded and the pystepsrc file # configured with the data_source paths pointing to data folders. # Additionally, the pysteps-nwp-importers plugin needs to be installed, see # https://github.com/pySTEPS/pysteps-nwp-importers. # Selected case date_radar = datetime.strptime("202010310400", "%Y%m%d%H%M") # The last NWP forecast was issued at 00:00 date_nwp = datetime.strptime("202010310000", "%Y%m%d%H%M") radar_data_source = rcparams.data_sources["bom"] nwp_data_source = rcparams.data_sources["bom_nwp"] ############################################################################### # Load the data from the archive # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ root_path = radar_data_source["root_path"] path_fmt = "prcp-c10/66/%Y/%m/%d" fn_pattern = "66_%Y%m%d_%H%M00.prcp-c10" fn_ext = radar_data_source["fn_ext"] importer_name = radar_data_source["importer"] importer_kwargs = radar_data_source["importer_kwargs"] timestep = 10.0 # Find the radar files in the archive fns = io.find_by_date( date_radar, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=2 ) # Read the radar composites importer = io.get_method(importer_name, "importer") radar_precip, _, radar_metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Import the NWP data filename = os.path.join( nwp_data_source["root_path"], datetime.strftime(date_nwp, nwp_data_source["path_fmt"]), datetime.strftime(date_nwp, nwp_data_source["fn_pattern"]) + "." + nwp_data_source["fn_ext"], ) nwp_importer = io.get_method("bom_nwp", "importer") nwp_precip, _, nwp_metadata = nwp_importer(filename) # Only keep the NWP forecasts from the last radar observation time (2020-10-31 04:00) # onwards nwp_precip = nwp_precip[24:43, :, :] ################################################################################ # Pre-processing steps # -------------------- # Make sure the units are in mm/h converter = pysteps.utils.get_method("mm/h") radar_precip, radar_metadata = converter(radar_precip, radar_metadata) nwp_precip, nwp_metadata = converter(nwp_precip, nwp_metadata) # Threshold the data radar_precip[radar_precip < 0.1] = 0.0 nwp_precip[nwp_precip < 0.1] = 0.0 # Plot the radar rainfall field and the first time step of the NWP forecast. date_str = datetime.strftime(date_radar, "%Y-%m-%d %H:%M") plt.figure(figsize=(10, 5)) plt.subplot(121) plot_precip_field( radar_precip[-1, :, :], geodata=radar_metadata, title=f"Radar observation at {date_str}", colorscale="STEPS-NL", ) plt.subplot(122) plot_precip_field( nwp_precip[0, :, :], geodata=nwp_metadata, title=f"NWP forecast at {date_str}", colorscale="STEPS-NL", ) plt.tight_layout() plt.show() # transform the data to dB transformer = pysteps.utils.get_method("dB") radar_precip, radar_metadata = transformer(radar_precip, radar_metadata, threshold=0.1) nwp_precip, nwp_metadata = transformer(nwp_precip, nwp_metadata, threshold=0.1) # r_nwp has to be four dimentional (n_models, time, y, x). # If we only use one model: if nwp_precip.ndim == 3: nwp_precip = nwp_precip[None, :] ############################################################################### # For the initial time step (t=0), the NWP rainfall forecast is not that different # from the observed radar rainfall, but it misses some of the locations and # shapes of the observed rainfall fields. Therefore, the NWP rainfall forecast will # initially get a low weight in the blending process. # # Determine the velocity fields # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ oflow_method = pysteps.motion.get_method("lucaskanade") # First for the radar images velocity_radar = oflow_method(radar_precip) # Then for the NWP forecast velocity_nwp = [] # Loop through the models for n_model in range(nwp_precip.shape[0]): # Loop through the timesteps. We need two images to construct a motion # field, so we can start from timestep 1. Timestep 0 will be the same # as timestep 1. _v_nwp_ = [] for t in range(1, nwp_precip.shape[1]): v_nwp_ = oflow_method(nwp_precip[n_model, t - 1 : t + 1, :]) _v_nwp_.append(v_nwp_) v_nwp_ = None # Add the velocity field at time step 1 to time step 0. _v_nwp_ = np.insert(_v_nwp_, 0, _v_nwp_[0], axis=0) velocity_nwp.append(_v_nwp_) velocity_nwp = np.stack(velocity_nwp) ################################################################################ # The blended forecast # ~~~~~~~~~~~~~~~~~~~~ precip_forecast = blending.steps.forecast( precip=radar_precip, precip_models=nwp_precip, velocity=velocity_radar, velocity_models=velocity_nwp, timesteps=18, timestep=timestep, issuetime=date_radar, n_ens_members=1, precip_thr=radar_metadata["threshold"], kmperpixel=radar_metadata["xpixelsize"] / 1000.0, noise_stddev_adj="auto", vel_pert_method=None, ) # Transform the data back into mm/h precip_forecast, _ = converter(precip_forecast, radar_metadata) radar_precip_mmh, _ = converter(radar_precip, radar_metadata) nwp_precip_mmh, _ = converter(nwp_precip, nwp_metadata) ################################################################################ # Visualize the output # ~~~~~~~~~~~~~~~~~~~~ # # The NWP rainfall forecast has a lower weight than the radar-based extrapolation # forecast at the issue time of the forecast (+0 min). Therefore, the first time # steps consist mostly of the extrapolation. # However, near the end of the forecast (+180 min), the NWP share in the blended # forecast has become more important and the forecast starts to resemble the # NWP forecast more. fig = plt.figure(figsize=(5, 12)) leadtimes_min = [30, 60, 90, 120, 150, 180] n_leadtimes = len(leadtimes_min) for n, leadtime in enumerate(leadtimes_min): # Nowcast with blending into NWP ax1 = plt.subplot(n_leadtimes, 2, n * 2 + 1) plot_precip_field( precip_forecast[0, int(leadtime / timestep) - 1, :, :], geodata=radar_metadata, title=f"Nowcast +{leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax1.axis("off") # Raw NWP forecast plt.subplot(n_leadtimes, 2, n * 2 + 2) ax2 = plot_precip_field( nwp_precip_mmh[0, int(leadtime / timestep) - 1, :, :], geodata=nwp_metadata, title=f"NWP +{leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax2.axis("off") plt.tight_layout() plt.show() ############################################################################### # It is also possible to blend a deterministic or probabilistic external nowcast # (e.g. a pre-made nowcast or a deterministic AI-based nowcast) with NWP using # the STEPS algorithm. In that case, we add a `precip_nowcast` to # `blending.steps.forecast`. By providing an external nowcast, the STEPS blending # method will omit the autoregression and advection step for the extrapolation # cascade and use the existing external nowcast instead (which will thus be # decomposed into multiplicative cascades!). The weights determination and # possible post-processings steps will remain the same. # # Start with creating an external nowcast # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # We go for a simple advection-only nowcast for the example, but this setup can # be replaced with any external deterministic or probabilistic nowcast. extrapolate = nowcasts.get_method("extrapolation") radar_precip_to_advect = radar_precip.copy() radar_metadata_to_advect = radar_metadata.copy() # Make sure the data has no nans radar_precip_to_advect[~np.isfinite(radar_precip_to_advect)] = -15 radar_precip_to_advect = radar_precip_to_advect.data # Create the extrapolation fc_lagrangian_extrapolation = extrapolate( radar_precip_to_advect[-1, :, :], velocity_radar, 18 ) # Insert an additional timestep at the start, as t0, which is the same as the current first slice. fc_lagrangian_extrapolation = np.insert( fc_lagrangian_extrapolation, 0, fc_lagrangian_extrapolation[0:1, :, :], axis=0 ) fc_lagrangian_extrapolation[~np.isfinite(fc_lagrangian_extrapolation)] = ( radar_metadata_to_advect["zerovalue"] ) ################################################################################ # Blend the external nowcast with NWP - deterministic mode # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ precip_forecast = blending.steps.forecast( precip=radar_precip, precip_nowcast=np.array( [fc_lagrangian_extrapolation] ), # Add an extra dimension, becuase precip_nowcast has to be 4-dimensional precip_models=nwp_precip, velocity=velocity_radar, velocity_models=velocity_nwp, timesteps=18, timestep=timestep, issuetime=date_radar, n_ens_members=1, precip_thr=radar_metadata["threshold"], kmperpixel=radar_metadata["xpixelsize"] / 1000.0, noise_stddev_adj="auto", vel_pert_method=None, nowcasting_method="external_nowcast", noise_method=None, probmatching_method=None, mask_method=None, weights_method="bps", ) # Transform the data back into mm/h precip_forecast, _ = converter(precip_forecast, radar_metadata) radar_precip_mmh, _ = converter(radar_precip, radar_metadata) fc_lagrangian_extrapolation_mmh, _ = converter( fc_lagrangian_extrapolation, radar_metadata_to_advect ) nwp_precipfc_lagrangian_extrapolation_mmh_mmh, _ = converter(nwp_precip, nwp_metadata) ################################################################################ # Visualize the output # ~~~~~~~~~~~~~~~~~~~~ # # The NWP rainfall forecast has a lower weight than the radar-based extrapolation # forecast at the issue time of the forecast (+0 min). Therefore, the first time # steps consist mostly of the extrapolation. # However, near the end of the forecast (+180 min), the NWP share in the blended # forecast has become more important and the forecast starts to resemble the # NWP forecast more. fig = plt.figure(figsize=(6, 12)) leadtimes_min = [30, 60, 90, 120, 150, 180] n_leadtimes = len(leadtimes_min) for n, leadtime in enumerate(leadtimes_min): idx = int(leadtime / timestep) - 1 # Blended nowcast ax1 = plt.subplot(n_leadtimes, 3, n * 3 + 1) plot_precip_field( precip_forecast[0, idx, :, :], geodata=radar_metadata, title=f"Blended +{leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax1.axis("off") # Raw extrapolated nowcast ax2 = plt.subplot(n_leadtimes, 3, n * 3 + 2) plot_precip_field( fc_lagrangian_extrapolation_mmh[idx, :, :], geodata=radar_metadata, title=f"NWC +{leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax2.axis("off") # Raw NWP forecast plt.subplot(n_leadtimes, 3, n * 3 + 3) ax3 = plot_precip_field( nwp_precip_mmh[0, idx, :, :], geodata=nwp_metadata, title=f"NWP +{leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax3.axis("off") ################################################################################ # Blend the external nowcast with NWP - ensemble mode # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ precip_forecast = blending.steps.forecast( precip=radar_precip, precip_nowcast=np.array( [fc_lagrangian_extrapolation] ), # Add an extra dimension, becuase precip_nowcast has to be 4-dimensional precip_models=nwp_precip, velocity=velocity_radar, velocity_models=velocity_nwp, timesteps=18, timestep=timestep, issuetime=date_radar, n_ens_members=5, precip_thr=radar_metadata["threshold"], kmperpixel=radar_metadata["xpixelsize"] / 1000.0, noise_stddev_adj="auto", vel_pert_method=None, nowcasting_method="external_nowcast", noise_method="nonparametric", probmatching_method="cdf", mask_method="incremental", weights_method="bps", ) # Transform the data back into mm/h precip_forecast, _ = converter(precip_forecast, radar_metadata) radar_precip_mmh, _ = converter(radar_precip, radar_metadata) fc_lagrangian_extrapolation_mmh, _ = converter( fc_lagrangian_extrapolation, radar_metadata_to_advect ) nwp_precipfc_lagrangian_extrapolation_mmh_mmh, _ = converter(nwp_precip, nwp_metadata) ################################################################################ # Visualize the output # ~~~~~~~~~~~~~~~~~~~~ fig = plt.figure(figsize=(8, 12)) leadtimes_min = [30, 60, 90, 120, 150, 180] n_leadtimes = len(leadtimes_min) for n, leadtime in enumerate(leadtimes_min): idx = int(leadtime / timestep) - 1 # Blended nowcast member 1 ax1 = plt.subplot(n_leadtimes, 4, n * 4 + 1) plot_precip_field( precip_forecast[0, idx, :, :], geodata=radar_metadata, title="Blend Mem. 1", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax1.axis("off") # Blended nowcast member 5 ax2 = plt.subplot(n_leadtimes, 4, n * 4 + 2) plot_precip_field( precip_forecast[4, idx, :, :], geodata=radar_metadata, title="Blend Mem. 5", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax2.axis("off") # Raw extrapolated nowcast ax3 = plt.subplot(n_leadtimes, 4, n * 4 + 3) plot_precip_field( fc_lagrangian_extrapolation_mmh[idx, :, :], geodata=radar_metadata, title=f"NWC + {leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax3.axis("off") # Raw NWP forecast ax4 = plt.subplot(n_leadtimes, 4, n * 4 + 4) plot_precip_field( nwp_precip_mmh[0, idx, :, :], geodata=nwp_metadata, title=f"NWP + {leadtime} min", axis="off", colorscale="STEPS-NL", colorbar=False, ) ax4.axis("off") plt.show() print("Done.") ################################################################################ # References # ~~~~~~~~~~ # # Bowler, N. E., and C. E. Pierce, and A. W. Seed. 2004. "STEPS: A probabilistic # precipitation forecasting scheme which merges an extrapolation nowcast with # downscaled NWP." Forecasting Research Technical Report No. 433. Wallingford, UK. # # Bowler, N. E., and C. E. Pierce, and A. W. Seed. 2006. "STEPS: A probabilistic # precipitation forecasting scheme which merges an extrapolation nowcast with # downscaled NWP." Quarterly Journal of the Royal Meteorological Society 132(16): # 2127-2155. https://doi.org/10.1256/qj.04.100 # # Seed, A. W., and C. E. Pierce, and K. Norman. 2013. "Formulation and evaluation # of a scale decomposition-based stochastic precipitation nowcast scheme." Water # Resources Research 49(10): 6624-664. https://doi.org/10.1002/wrcr.20536 # # Imhoff, R.O., L. De Cruz, W. Dewettinck, C.C. Brauer, R. Uijlenhoet, K-J. van # Heeringen, C. Velasco-Forero, D. Nerini, M. Van Ginderachter, and A.H. Weerts. # 2023. "Scale-dependent blending of ensemble rainfall nowcasts and NWP in the # open-source pysteps library". Quarterly Journal of the Royal Meteorological # Society 149(753): 1-30. https://doi.org/10.1002/qj.4461 ================================================ FILE: examples/thunderstorm_detection_and_tracking.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Thunderstorm Detection and Tracking - T-DaTing ============================================ This example shows how to use the thunderstorm DaTing module. The example is based on MeteoSwiss radar data and uses the Cartesian composite of maximum reflectivity on a 1 km grid. All default values are tuned to this grid, but can be modified. The first section demonstrates thunderstorm cell detection and how to plot contours. The second section demonstrates detection and tracking in combination, as well as how to plot the resulting tracks. This module was implemented following the procedures used in the TRT Thunderstorms Radar Tracking algorithm (:cite:`TRT2004`) used operationally at MeteoSwiss. Modifications include advecting the identified thunderstorms with the optical flow obtained from pysteps, as well as additional options in the thresholding. A detailed description is published in Appendix A of :cite:`Feldmann2021`. References .......... :cite:`TRT2004` :cite:`Feldmann2021` @author: feldmann-m """ ################################################################################ # Import all required functions # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from datetime import datetime from pprint import pprint import matplotlib.pyplot as plt import numpy as np from pysteps import io, rcparams from pysteps.feature import tstorm as tstorm_detect from pysteps.tracking import tdating as tstorm_dating from pysteps.utils import to_reflectivity from pysteps.visualization import plot_precip_field, plot_track, plot_cart_contour ################################################################################ # Read the radar input images # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # A series of 20 files containing Swiss Cartesian gridded rain rates are imported. Since # the algorithm is tuned to Swiss max-reflectivity data, the rain rates are transformed # to reflectivity fields using the 'to_reflectivity' utility in pysteps.utils. # Select the input data date = datetime.strptime("201607112100", "%Y%m%d%H%M") data_source = rcparams.data_sources["mch"] # Extract corresponding settings root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] # Load the data from the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_next_files=20 ) importer = io.get_method(importer_name, "importer") R, _, metadata = io.read_timeseries(fns, importer, **importer_kwargs) # Convert to reflectivity (it is possible to give the a- and b- parameters of the # Marshall-Palmer relationship here: zr_a = and zr_b =). Z, metadata = to_reflectivity(R, metadata) # Extract the list of timestamps timelist = metadata["timestamps"] pprint(metadata) ############################################################################### # Example of thunderstorm identification in a single timestep # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # The function tstorm_detect.detection requires a 2-D input image, all further inputs are # optional. input_image = Z[2, :, :].copy() time = timelist[2] cells_id, labels = tstorm_detect.detection(input_image, time=time) ############################################################################### # Properties of one of the identified cells: print(cells_id.iloc[0]) ############################################################################### # Optionally, one can also ask to consider splits and merges of thunderstorm cells. # A cell at time t is considered to split if it will verlap more than 10% with more than # one cell at time t+1. Conversely, a cell is considered to be a merge, if more # than one cells fron time t will overlap more than 10% with it. cells_id, labels = tstorm_detect.detection( input_image, time=time, output_splits_merges=True ) print(cells_id.iloc[0]) ############################################################################### # Example of thunderstorm tracking over a timeseries # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # The tstorm-dating function requires the entire pre-loaded time series. # The first two timesteps are required to initialize the # flow prediction and are not used to compute tracks. track_list, cell_list, label_list = tstorm_dating.dating( input_video=Z, timelist=timelist ) ############################################################################### # Plotting the results # ~~~~~~~~~~~~~~~~~~~~ # Plot precipitation field plot_precip_field(Z[2, :, :], geodata=metadata, units=metadata["unit"]) plt.xlabel("Swiss easting [m]") plt.ylabel("Swiss northing [m]") # Add the identified cells plot_cart_contour(cells_id.cont, geodata=metadata) # Filter the tracks to only contain cells existing in this timestep IDs = cells_id.ID.values track_filt = [] for track in track_list: if np.unique(track.ID) in IDs: track_filt.append(track) # Add their tracks plot_track(track_filt, geodata=metadata) plt.show() ################################################################################ # Evaluating temporal behaviour of cell # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Maximum reflectivity of cells in time # Make an empty list tlen = [] # Get a list of colors that we will use for the plot color = iter(plt.cm.ocean(np.linspace(0, 0.8, len(track_filt)))) # Now, loop through all the tracks and plot the maximum reflectivity of the cell # in time. for track in track_filt: plt.plot(np.arange(len(track)), track.max_ref, c=next(color)) tlen.append(len(track)) plt.xticks(np.arange(max(tlen) + 1), labels=np.arange(max(tlen) + 1) * 5) plt.ylabel("Maximum reflectivity (dBZ)") plt.xlabel("Time since cell detection (min)") plt.legend(IDs, loc="lower right", ncol=3, title="Track number") plt.show() ############################################################################### # The size of the thunderstorm cells in time # Make an empty list tlen = [] # Get a list of colors that we will use for the plot color = iter(plt.cm.ocean(np.linspace(0, 0.8, len(track_filt)))) # Now, loop through all the tracks and plot the cell size of the thunderstorms # in time. for track in track_filt: size = [] for ID, t in track.iterrows(): size.append(len(t.x)) plt.plot(np.arange(len(track)), size, c=next(color)) tlen.append(len(track)) plt.xticks(np.arange(max(tlen) + 1), labels=np.arange(max(tlen) + 1) * 5) plt.ylabel("Thunderstorm cell size (pixels)") plt.xlabel("Time since cell detection (min)") plt.legend(IDs, loc="upper left", ncol=3, title="Track number") plt.show() ================================================ FILE: pyproject.toml ================================================ [build-system] requires = [ "wheel", "setuptools>=40.8.0", "Cython>=0.29.2", "numpy>=1.13" ] # setuptools 40.8.0 is the first version of setuptools that offers # a PEP 517 backend that closely mimics directly executing setup.py. build-backend = "setuptools.build_meta:__legacy__" #https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support # Define black parameters for the project # https://black.readthedocs.io/en/stable/pyproject_toml.html#configuration-format [tool.black] target-version = ['py36'] line-length = 88 exclude = ''' /( \.eggs | \.git | _build | build | dist )/ ''' ================================================ FILE: pysteps/__init__.py ================================================ import json import os import stat import sys import warnings from jsmin import jsmin from jsonschema import Draft4Validator # import subpackages from . import blending from . import cascade from . import datasets from . import decorators from . import downscaling from . import exceptions from . import extrapolation from . import io from . import motion from . import noise from . import nowcasts from . import postprocessing from . import timeseries from . import utils from . import verification as vf from . import visualization as plt def _get_config_file_schema(): """ Return the path to the parameters file json schema. """ module_file = _decode_filesystem_path(__file__) return os.path.join(os.path.dirname(module_file), "pystepsrc_schema.json") def _fconfig_candidates_generator(): """ Configuration files candidates generator. See :py:func:~config_fname for more details. """ yield os.path.join(os.getcwd(), "pystepsrc") try: pystepsrc = os.environ["PYSTEPSRC"] except KeyError: pass else: yield pystepsrc yield os.path.join(pystepsrc, "pystepsrc") if os.name == "nt": # Windows environment env_variable = "USERPROFILE" subdir = "pysteps" else: # UNIX like env_variable = "HOME" subdir = ".pysteps" try: pystepsrc = os.environ[env_variable] except KeyError: pass else: yield os.path.join(pystepsrc, subdir, "pystepsrc") module_file = _decode_filesystem_path(__file__) yield os.path.join(os.path.dirname(module_file), "pystepsrc") yield None # Function adapted from matplotlib's *matplotlib_fname* function. def config_fname(): """ Get the location of the config file. Looks for pystepsrc file in the following order: - $PWD/pystepsrc: Looks for the file in the current directory - $PYSTEPSRC: If the system variable $PYSTEPSRC is defined and it points to a file, it is used.. - $PYSTEPSRC/pystepsrc: If $PYSTEPSRC points to a directory, it looks for the pystepsrc file inside that directory. - $HOME/.pysteps/pystepsrc (unix and Mac OS X) : If the system variable $HOME is defined, it looks for the configuration file in this path. - $USERPROFILE/pysteps/pystepsrc (windows only): It looks for the configuration file in the pysteps directory located user's home directory. - Lastly, it looks inside the library in pysteps/pystepsrc for a system-defined copy. """ file_name = None for file_name in _fconfig_candidates_generator(): if file_name is not None: if os.path.exists(file_name): st_mode = os.stat(file_name).st_mode if stat.S_ISREG(st_mode) or stat.S_ISFIFO(st_mode): return file_name # Return first candidate that is a file, # or last candidate if none is valid # (in that case, a warning is raised at startup by `rc_params`). return file_name def _decode_filesystem_path(path): if not isinstance(path, str): return path.decode(sys.getfilesystemencoding()) else: return path class _DotDictify(dict): """ Class used to recursively access dict via attributes as well as index access. This is introduced to maintain backward compatibility with older pysteps configuration parameters implementations. Code adapted from: https://stackoverflow.com/questions/3031219/recursively-access-dict-via-attributes-as-well-as-index-access Credits: `Curt Hagenlocher`_ .. _`Curt Hagenlocher`: https://stackoverflow.com/users/533/curt-hagenlocher """ def __setitem__(self, key, value): if isinstance(value, dict) and not isinstance(value, _DotDictify): value = _DotDictify(value) super().__setitem__(key, value) def __getitem__(self, key): value = super().__getitem__(key) if isinstance(value, dict) and not isinstance(value, _DotDictify): value = _DotDictify(value) super().__setitem__(key, value) return value __setattr__, __getattr__ = __setitem__, __getitem__ rcparams = dict() def load_config_file(params_file=None, verbose=False, dryrun=False): """ Load the pysteps configuration file. The configuration parameters are available as a DotDictify instance in the `pysteps.rcparams` variable. Parameters ---------- params_file: str Path to the parameters file to load. If `params_file=None`, it looks for a configuration file in the default locations. verbose: bool Print debugging information. False by default. This flag is overwritten by the silent_import=False in the pysteps configuration file. dryrun: bool If False, perform a dry run that does not update the `pysteps.rcparams` attribute. Returns ------- rcparams: _DotDictify Configuration parameters loaded from file. """ global rcparams if params_file is None: # Load default configuration params_file = config_fname() if params_file is None: warnings.warn( "pystepsrc file not found." + "The defaults parameters are left empty", category=ImportWarning, ) _rcparams = dict() return with open(params_file, "r") as f: _rcparams = json.loads(jsmin(f.read())) if (not _rcparams.get("silent_import", False)) or verbose: print("Pysteps configuration file found at: " + params_file + "\n") with open(_get_config_file_schema(), "r") as f: schema = json.loads(jsmin(f.read())) validator = Draft4Validator(schema) error_msg = "Error reading pystepsrc file." error_count = 0 for error in validator.iter_errors(_rcparams): error_msg += "\nError in " + "/".join(list(error.path)) error_msg += ": " + error.message error_count += 1 if error_count > 0: raise RuntimeError(error_msg) _rcparams = _DotDictify(_rcparams) if not dryrun: rcparams = _rcparams return _rcparams # Load default configuration load_config_file() # After the sub-modules are loaded, register the discovered importers plugin. io.interface.discover_importers() postprocessing.interface.discover_postprocessors() ================================================ FILE: pysteps/blending/__init__.py ================================================ # -*- coding: utf-8 -*- """Methods for blending NWP model(s) with nowcasts.""" from pysteps.blending.interface import get_method from .clim import * from .skill_scores import * from .utils import * ================================================ FILE: pysteps/blending/clim.py ================================================ """ pysteps.blending.clim ===================== Module with methods to read, write and compute past and climatological NWP model skill scores. The module stores the average daily skill score for the past t days and updates it every day. The resulting average climatological skill score is the skill the NWP model skill regresses to during the blended forecast. If no climatological values are present, the default skill from :cite:`BPS2006` is used. .. autosummary:: :toctree: ../generated/ get_default_skill save_skill calc_clim_skill """ import pickle from pathlib import Path import numpy as np def get_default_skill(n_cascade_levels=6, n_models=1): """ Get the default climatological skill values as given in :cite:`BPS2006`. Take subset of n_cascade_levels or add entries with small values (1e-4) if n_cascade_levels differs from 8. Parameters ---------- n_cascade_levels: int, optional Number of cascade levels. Defaults to 6, see issue #385 on GitHub. n_models: int, optional Number of NWP models. Defaults to 1. Returns ------- default_skill: array-like Array of shape [model, scale_level] containing the climatological skill values. """ default_skill = np.array( [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] ) n_skill = default_skill.shape[0] if n_cascade_levels < n_skill: default_skill = default_skill[0:n_cascade_levels] elif n_cascade_levels > n_skill: default_skill = np.append( default_skill, np.repeat(1e-4, n_cascade_levels - n_skill) ) return np.resize(default_skill, (n_models, n_cascade_levels)) def save_skill( current_skill, validtime, outdir_path, window_length=30, **kwargs, ): """ Add the current NWP skill to update today's daily average skill. If the day is over, update the list of daily average skill covering a rolling window. Parameters ---------- current_skill: array-like Array of shape [model, scale_level, ...] containing the current skill of the different NWP models per cascade level. validtime: datetime Datetime object containing the date and time for which the current skill are valid. outdir_path: string Path to folder where the historical skill are stored. Defaults to path_workdir from rcparams. window_length: int, optional Length of window (in days) of daily skill that should be retained. Defaults to 30. Returns ------- None """ n_cascade_levels = current_skill.shape[1] # Load skill_today, a dictionary containing {mean_skill, n, last_validtime} new_skill_today_file = False skill_today_file = Path(outdir_path) / "NWP_skill_today.pkl" if skill_today_file.is_file(): with open(skill_today_file, "rb") as f: skill_today = pickle.load(f) if skill_today["mean_skill"].shape != current_skill.shape: new_skill_today_file = True else: new_skill_today_file = True if new_skill_today_file: skill_today = { "mean_skill": np.copy(current_skill), "n": 0, "last_validtime": validtime, } # Load the past skill which is an array with dimensions day x model x scale_level past_skill_file = Path(outdir_path) / "NWP_skill_window.npy" past_skill = None if past_skill_file.is_file(): past_skill = np.load(past_skill_file) # First check if we have started a new day wrt the last written skill, in which # case we should update the daily skill file and reset daily statistics. if skill_today["last_validtime"].date() < validtime.date(): # Append skill to the list of the past X daily averages. if ( past_skill is not None and past_skill.shape[2] == n_cascade_levels and past_skill.shape[1] == skill_today["mean_skill"].shape[0] ): past_skill = np.append(past_skill, [skill_today["mean_skill"]], axis=0) else: past_skill = np.array([skill_today["mean_skill"]]) # Remove oldest if the number of entries exceeds the window length. if past_skill.shape[0] > window_length: past_skill = np.delete(past_skill, 0, axis=0) # FIXME also write out last_validtime.date() in this file? # In that case it will need pickling or netcdf. # Write out the past skill within the rolling window. np.save(past_skill_file, past_skill) # Reset statistics for today. skill_today["n"] = 0 skill_today["mean_skill"] = 0 # Reset today's skill if needed and/or compute online average from the # current skill using numerically stable algorithm skill_today["n"] += 1 skill_today["mean_skill"] += ( current_skill - skill_today["mean_skill"] ) / skill_today["n"] skill_today["last_validtime"] = validtime # Make path if path does not exist skill_today_file.parent.mkdir(exist_ok=True, parents=True) # Open and write to skill file with open(skill_today_file, "wb") as f: pickle.dump(skill_today, f) return None def calc_clim_skill( outdir_path, n_cascade_levels=6, n_models=1, window_length=30, ): """ Return the climatological skill based on the daily average skill in the rolling window. This is done using a geometric mean. Parameters ---------- n_cascade_levels: int, optional Number of cascade levels. Defaults to 6, see issue #385 on GitHub. outdir_path: string Path to folder where the historical skill are stored. Defaults to path_workdir from rcparams. n_models: int, optional Number of NWP models. Defaults to 1. window_length: int, optional Length of window (in days) over which to compute the climatological skill. Defaults to 30. Returns ------- climatological_mean_skill: array-like Array of shape [model, scale_level, ...] containing the climatological (geometric) mean skill. """ past_skill_file = Path(outdir_path) / "NWP_skill_window.npy" # past_skill has dimensions date x model x scale_level x .... if past_skill_file.is_file(): past_skill = np.load(past_skill_file) else: past_skill = np.array(None) # check if there is enough data to compute the climatological skill if not past_skill.any(): return get_default_skill(n_cascade_levels, n_models) elif past_skill.shape[0] < window_length: return get_default_skill(n_cascade_levels, n_models) # reduce window if necessary else: past_skill = past_skill[-window_length:] # Make sure past_skill cannot be lower than 10e-5 past_skill = np.where(past_skill < 10e-5, 10e-5, past_skill) # Calculate climatological skill from the past_skill using the # geometric mean. geomean_skill = np.exp(np.log(past_skill).mean(axis=0)) # Make sure skill is always a positive value and a finite value geomean_skill = np.where(geomean_skill < 10e-5, 10e-5, geomean_skill) geomean_skill = np.nan_to_num( geomean_skill, copy=True, nan=10e-5, posinf=10e-5, neginf=10e-5 ) return geomean_skill ================================================ FILE: pysteps/blending/ens_kalman_filter_methods.py ================================================ # -*- coding: utf-8 -*- """ pysteps.blending.ens_kalman_filter_methods ============================================= Methods to calculate the ensemble Kalman filter based correction methods for blending between nowcast and NWP. The core of the method occurs in the EnsembleKalmanFilter class. The specific method to use this core class can be selected. Currently, only the implementation of the ensemble Kalman filter from :cite:`Nerini2019MWR` is available. Additional keyword arguments for the ensemble Kalman filter are: n_tapering: int, default=0 Tapering parameter controlling the number of covariance pairs (i, i ± n_tapering) retained in the covariance matrix. With n_tapering=0, only the variances (main diagonal) of the principal components are kept. non_precip_mask: bool, (True) Flag to specify whether the computation should be truncated on grid boxes where at least a minimum number (configurable) of ensemble members forecast precipitation. Defaults to True. n_ens_prec: int, (1) Minimum number of ensemble members that forecast precipitation for the above mentioned mask. Defaults to 1. lien_criterion: bool, (True) Flag to specify whether Lien criterion (Lien et al., 2013) should be applied for the computation of the update step within the ensemble Kalman filter. Defaults to True. n_lien: int, (n_ens_members/2) Minimum number of ensemble members that forecast precipitation for the Lien criterion. Defaults to half of the ensemble members. prob_matching: str, {'iterative','post_forecast','none'} Specify the probability matching method that should be applied as an additional processing step of the forecast computation. Defaults to 'iterative'. inflation_factor_bg: float, (1.0) Inflation factor of the background (NWC) covariance matrix. This factor increases the covariances of the background ensemble and, thus, supports a faster convergence towards the observation ensemble (NWP). Defaults to 1.0. inflation_factor_obs: float, (1.0) Inflation factor of the observation (NWP) covariance matrix. This factor increases the covariances of the observation ensemble (NWP) and, thus, supports a slower convergence towards the observation ensemble. Defaults to 1.0. offset_bg: float, (0.0) Offset of the background (NWC) covariance matrix. This offset supports a faster convergence towards the observation ensemble (NWP) by linearly increasing all elements of the background covariance matrix. Defaults to 0.0. offset_obs: float, (0.0) Offset of the observation (NWP) covariance matrix. This offset supports a slower convergence towards the observation ensemble (NWP) by linearly incerasing all elements of the observation covariance matrix. Defaults to 0.0. nwp_hres_eff: float Effective horizontal resolution of the utilized NWP model. sampling_prob_source: str, {'ensemble','explained_var'} Computation method of the sampling probability for the probability matching. 'ensemble' computes this probability as the ratio between the ensemble differences of analysis_ensemble - background_ensemble and observation_ensemble - background_ensemble. 'explained_var' uses the sum of the Kalman gain weighted by the explained variance ratio. use_accum_sampling_prob: bool, (False) Flag to specify whether the current sampling probability should be used for the probability matching or a probability integrated over the previous forecast time. Defaults to True. ensure_full_nwp_weight: bool, (True) Flag to specify whether the end of the combination should be represent the pure NWP forecast. Defaults to True. """ import numpy as np from pysteps import utils from pysteps.postprocessing import probmatching try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False class EnsembleKalmanFilter: def __init__(self, config, params): self._config = config # Check for combination kwargs in params self.__n_tapering = params.combination_kwargs.get("n_tapering", 0) self.__non_precip_mask = params.combination_kwargs.get("non_precip_mask", True) self.__n_ens_prec = params.combination_kwargs.get("n_ens_prec", 1) self.__lien_criterion = params.combination_kwargs.get("lien_criterion", True) self.__n_lien = params.combination_kwargs.get( "n_lien", self._config.n_ens_members // 2 ) print("Initialize ensemble Kalman filter") print("=================================") print("") print(f"Non-tapered diagonals: {self.__n_tapering}") print(f"Non precip mask: {self.__non_precip_mask}") print(f"No. ens mems with precipitation: {self.__n_ens_prec}") print(f"Lien Criterion: {self.__lien_criterion}") print(f"No. ens mems with precip (Lien): {self.__n_lien}") print("") def update( self, background_ensemble: np.ndarray, observation_ensemble: np.ndarray, inflation_factor_bg: float, inflation_factor_obs: float, offset_bg: float, offset_obs: float, background_ensemble_valid_lien: np.ndarray | None = None, observation_ensemble_valid_lien: np.ndarray | None = None, ): """ Compute the ensemble Kalman filter update step. Parameters ---------- background_ensemble: np.ndarray Two-dimensional array of shape (n_ens, n_pc) containing the background ensemble that corresponds to the Nowcast ensemble forecast. observation_ensemble: np.ndarray Two-dimensional array of shape (n_ens, n_pc) containing the observations that correspond to the NWP ensemble forecast. inflation_factor_bg: float Inflation factor of the background ensemble covariance matrix. inflation_factor_obs: float Inflation factor of the observation covariance matrix. offset_bg: float Offset of the background ensemble covariance matrix. offset_obs: float Offset of the observation covariance matrix. Other Parameters ---------------- background_ensemble_valid_lien: np.ndarray Two-dimensional array of shape (n_ens, n_pc) containing the background ensemble that consists only of grid boxes at which the Lien criterion is satisfied. observation_ensemble_valid_lien: np.ndarray Two-dimensional array of shape (n_ens, n_pc) containing the observations that consists only of grid boxes at which the Lien criterion is satisfied. Returns ------- analysis_ensemble: np.ndarray Two-dimensional array of shape (n_ens, n_pc) containing the updated analysis matrix. """ # If the masked background and observation arrays are given, compute the # covariance matrices P and R only on these values. if ( background_ensemble_valid_lien is not None and observation_ensemble_valid_lien is not None ): # Equation 13 in Nerini et al. (2019) P = self.get_covariance_matrix( background_ensemble_valid_lien, inflation_factor=inflation_factor_bg, offset=offset_bg, ) # Equation 14 in Nerini et al. (2019) R = self.get_covariance_matrix( observation_ensemble_valid_lien, inflation_factor=inflation_factor_obs, offset=offset_obs, ) # Otherwise use the complete arrays. else: # Equation 13 in Nerini et al. (2019) P = self.get_covariance_matrix( background_ensemble, inflation_factor=inflation_factor_bg, offset=offset_bg, ) # Equation 14 in Nerini et al. (2019) R = self.get_covariance_matrix( observation_ensemble, inflation_factor=inflation_factor_obs, offset=offset_obs, ) # Estimate the Kalman gain (eq. 15 in Nerini et al., 2019) self.K = np.dot(P, np.linalg.inv(P + R)) # Update the background ensemble (eq. 16 in Nerini et al., 2019) analysis_ensemble = background_ensemble.T + np.dot( self.K, (observation_ensemble - background_ensemble).T ) return analysis_ensemble def get_covariance_matrix( self, forecast_array: np.ndarray, inflation_factor: float, offset: float ): """ Compute the covariance matrix of a given ensemble forecast along the grid boxes or principal components as it is done by Eq. 13 and 14 in Nerini et al., 2019. Parameters ---------- forecast_array: np.ndarray Two-dimensional array of shape (n_ens, n_pc) containing an ensemble forecast of one lead time. inflation_factor: float Factor to increase the covariance and therefore the ensemble spread. offset: float Offset to shift the covariance. Returns ------- Cov: np.ndarray Two-dimensional array of shape (n_pc, n_pc) containg the covariance matrix of the given ensemble forecast. """ # Compute the ensemble mean ensemble_mean = np.mean(forecast_array, axis=0) # Center the ensemble forecast and multiply with the given inflation factor centered_ensemble = (forecast_array - ensemble_mean) * inflation_factor # Compute the covariance matrix and add the respective offset and filter # unwanted diagonals, respectively. Cov = ( 1 / (forecast_array.shape[0] - 1) * np.dot(centered_ensemble.T, centered_ensemble) + offset ) * self.get_tapering(forecast_array.shape[1]) return Cov def get_tapering(self, n: int): """ Create a window function to clip unwanted diagonals of the covariance matrix. Parameters ---------- n: integer Number of grid boxes/principal components of the ensemble forecast for that the covariance matrix is computed. Returns ------- window_function: np.ndarray Two-dimensional array of shape (n_pc, n_pc) containing the window function to filter unwanted diagonals of the covariance matrix. """ # Create an n-dimensional I-matrix as basis of the window function window_function = np.eye(n) # Get the weightings of a hanning window function with respect to the number of # diagonals that on want to keep hanning_values = np.hanning(self.__n_tapering * 2 + 1)[ (self.__n_tapering + 1) : ] # Add the respective values to I for d in range(self.__n_tapering): window_function += np.diag(np.ones(n - d - 1) * hanning_values[d], k=d + 1) window_function += np.diag(np.ones(n - d - 1) * hanning_values[d], k=-d - 1) return window_function def get_precipitation_mask(self, forecast_array: np.ndarray): """ Create the set of grid boxes where at least a minimum number (configurable) of ensemble members forecast precipitation. Parameters ---------- forecast_array: np.ndarray Two-dimensional array of shape (n_ens, n_grid) containg the ensemble forecast for one lead time. Returns ------- idx_prec: np.ndarray One-dimensional array of shape (n_grid) that is set to True if the minimum number of ensemble members predict precipitation. """ # Check the number of ensemble members forecast precipitation at each grid box. forecast_array_sum = np.sum( forecast_array >= self._config.precip_threshold, axis=0 ) # If the masking of areas without precipitation is requested, mask grid boxes # where less ensemble members predict precipitation than the set limit n_ens_prec. if self.__non_precip_mask == True: idx_prec = forecast_array_sum >= self.__n_ens_prec # Else, set all to True. else: idx_prec = np.ones_like(forecast_array_sum).astype(bool) return idx_prec def get_lien_criterion(self, nwc_ensemble: np.ndarray, nwp_ensemble: np.ndarray): """ Create the set of grid boxes where the Lien criterion is satisfied (Lien et al., 2013) and thus, at least half (configurable) of the ensemble members of each forecast (Nowcast and NWP) predict precipitation. Parameters ---------- nwc_ensemble: np.ndarray Two-dimensional array (n_ens, n_grid) containing the nowcast ensemble forecast for one lead time. nwp_ensemble: np.ndarray Two-dimensional array (n_ens, n_grid) containg the NWP ensemble forecast for one lead time. Returns ------- idx_lien: np.ndarray One-dimensional array of shape (n_grid) that is set to True at grid boxes where the Lien criterion is satisfied. """ # Check the number of ensemble members forecast precipitation at each grid box. nwc_ensemble_sum = np.sum(nwc_ensemble >= self._config.precip_threshold, axis=0) nwp_ensemble_sum = np.sum(nwp_ensemble >= self._config.precip_threshold, axis=0) # If the masking of areas without precipitation is requested, mask grid boxes # where less ensemble members predict precipitation than the set limit of n_ens_fc_prec. if self.__lien_criterion: idx_lien = np.logical_and( nwc_ensemble_sum >= self.__n_lien, nwp_ensemble_sum >= self.__n_lien ) # Else, set all to True. else: idx_lien = np.ones_like(nwc_ensemble_sum).astype(bool) return idx_lien def get_weighting_for_probability_matching( self, background_ensemble: np.ndarray, analysis_ensemble: np.ndarray, observation_ensemble: np.ndarray, ): """ Compute the weighting between background (nowcast) and observation (NWP) ensemble that results to the updated analysis ensemble in physical space for an optional probability matching. See equation 17 in Nerini et al. (2019). Parameters ---------- background_ensemble: np.ndarray Two-dimensional array of shape (n_ens, n_grid) containing the background ensemble (Original nowcast). analysis_ensemble: np.ndarray Two-dimensional array of shape (n_ens, n_grid) containing the updated analysis ensemble. observation_ensemble: np.ndarray Two-dimensional array of shape (n_ens, n_grid) containing the observation ensemble (NWP). Returns ------- prob_matching_weight: float A weighting of which elements of the input ensemble contributed to the updated analysis ensemble with respect to observation_ensemble. Therefore, 0 means that the contribution comes entirely from the background_ensemble (the original nowcast). 1 means that the contribution comes entirely from the observation_ensemble (the NWP forecast). """ # Compute the sum of differences between analysis_ensemble and background_ensemble # as well as observation_ensemble and background_ensemble along the grid boxes. w1 = np.sum(analysis_ensemble - background_ensemble, axis=0) w2 = np.sum(observation_ensemble - background_ensemble, axis=0) # Check for infinitesimal differences between w1 and w2 as well as 0. w_close = np.isclose(w1, w2) w_zero = np.logical_and(w_close, np.isclose(w2, 0.0)) # Compute the fraction of w1 and w2 and set values on grid boxes marked by # w_close or w_zero to 1 and 0, respectively. prob_matching_weight = np.zeros_like(w1) prob_matching_weight[~w_zero] = w1[~w_zero] / w2[~w_zero] prob_matching_weight[w_close] = 1.0 # Even now we have at some grid boxes weights outside the range between 0 # and 1. Therefore, we leave them out in the calculation of the averaged # weighting. valid_values = np.logical_and( prob_matching_weight >= 0.0, prob_matching_weight <= 1.0 ) prob_matching_weight = np.nanmean(prob_matching_weight[valid_values]) # If there is no finite prob_matching_weight, we are switching to the NWP if not np.isfinite(prob_matching_weight): prob_matching_weight = 1.0 return prob_matching_weight class MaskedEnKF(EnsembleKalmanFilter): def __init__(self, config, params): EnsembleKalmanFilter.__init__(self, config, params) self.__params = params # Read arguments from combination kwargs or set standard values if kwargs not # given self.__iterative_prob_matching = self.__params.combination_kwargs.get( "iterative_prob_matching", True ) self.__inflation_factor_bg = self.__params.combination_kwargs.get( "inflation_factor_bg", 1.0 ) self.__inflation_factor_obs = self.__params.combination_kwargs.get( "inflation_factor_obs", 1.0 ) self.__offset_bg = self.__params.combination_kwargs.get("offset_bg", 0.0) self.__offset_obs = self.__params.combination_kwargs.get("offset_obs", 0.0) self.__sampling_prob_source = self.__params.combination_kwargs.get( "sampling_prob_source", "ensemble" ) self.__use_accum_sampling_prob = self.__params.combination_kwargs.get( "use_accum_sampling_prob", False ) self.__ensure_full_nwp_weight = self.__params.combination_kwargs.get( "ensure_full_nwp_weight", True ) self.__sampling_probability = 0.0 self.__accumulated_sampling_prob = 0.0 self.__degradation_timestep = 0.2 self.__inflation_factor_obs_tmp = 1.0 print("Initialize masked ensemble Kalman filter") print("========================================") print("") print(f"Iterative probability matching: {self.__iterative_prob_matching}") print(f"Background inflation factor: {self.__inflation_factor_bg}") print(f"Observation inflation factor: {self.__inflation_factor_obs}") print(f"Background offset: {self.__offset_bg}") print(f"Observation offset: {self.__offset_obs}") print(f"Sampling probability source: {self.__sampling_prob_source}") print(f"Use accum. sampling probability: {self.__use_accum_sampling_prob}") print(f"Ensure full NWP weight: {self.__ensure_full_nwp_weight}") return def correct_step( self, background_ensemble, observation_ensemble, resampled_forecast ): """ Prepare input ensembles of Nowcast and NWP for the ensemble Kalman filter update step. Parameters ---------- background_ensemble: np.ndarray Three-dimensional array of shape (n_ens, m, n) containing the background (Nowcast) ensemble forecast for one timestep. This data is used as background ensemble in the ensemble Kalman filter. observation_ensemble: np.ndarray Three-dimensional array of shape (n_ens, m, n) containing the observation (NWP) ensemble forecast for one timestep. This data is used as observation ensemble in the ensemble Kalman filter. resampled_forecast: np.ndarray Three-dimensional array of shape (n_ens, m, n) containing the resampled (post- processed) ensemble forecast for one timestep. Returns ------- analysis_ensemble: np.ndarray Three-dimensional array of shape (n_ens, m, n) containing the Nowcast ensemble forecast corrected by NWP ensemble data. resampled_forecast: np.ndarray Three-dimensional array of shape (n_ens, m, n) containing the resampled (post- processed) ensemble forecast for one timestep. """ # Get indices with predicted precipitation. idx_prec = np.logical_or( self.get_precipitation_mask(background_ensemble), self.get_precipitation_mask(observation_ensemble), ) # Get indices with satisfied Lien criterion and truncate the array onto the # precipitation area. idx_lien = self.get_lien_criterion(background_ensemble, observation_ensemble)[ idx_prec ] # Stack both ensemble forecasts and truncate them onto the precipitation area. forecast_ens_stacked = np.vstack((background_ensemble, observation_ensemble))[ :, idx_prec ] # Remove possible non-finite values forecast_ens_stacked[~np.isfinite(forecast_ens_stacked)] = ( self._config.norain_threshold ) # Check whether there are more rainy grid boxes as two times the ensemble # members if np.sum(idx_prec) <= forecast_ens_stacked.shape[0]: # If this is the case, the NWP ensemble forecast is returned return observation_ensemble # Transform both ensemble forecasts into the PC space. kwargs = {"n_components": forecast_ens_stacked.shape[0], "svd_solver": "full"} forecast_ens_stacked_pc, pca_params = utils.pca.pca_transform( forecast_ens=forecast_ens_stacked, get_params=True, **kwargs ) # And do that transformation also for the Lien criterion masked values. forecast_ens_lien_pc = utils.pca.pca_transform( forecast_ens=forecast_ens_stacked, mask=idx_lien, pca_params=pca_params, **kwargs, ) if not np.isclose(self.__accumulated_sampling_prob, 1.0, rtol=1e-2): self.__inflation_factor_obs_tmp = ( self.__inflation_factor_obs - self.__accumulated_sampling_prob * (self.__inflation_factor_obs - 1.0) ) else: self.__inflation_factor_obs_tmp = np.cos(self.__degradation_timestep) self.__degradation_timestep += 0.2 # Get the updated background ensemble (Nowcast ensemble) in PC space. analysis_ensemble_pc = self.update( background_ensemble=forecast_ens_stacked_pc[: background_ensemble.shape[0]], observation_ensemble=forecast_ens_stacked_pc[ background_ensemble.shape[0] : ], inflation_factor_bg=self.__inflation_factor_bg, inflation_factor_obs=self.__inflation_factor_obs_tmp, offset_bg=self.__offset_bg, offset_obs=self.__offset_obs, background_ensemble_valid_lien=forecast_ens_lien_pc[ : background_ensemble.shape[0] ], observation_ensemble_valid_lien=forecast_ens_lien_pc[ background_ensemble.shape[0] : ], ) # Transform the analysis ensemble back into physical space. analysis_ensemble = utils.pca.pca_backtransform( forecast_ens_pc=analysis_ensemble_pc.T, pca_params=pca_params ) # Get the sampling probability either based on the ensembles... if self.__sampling_prob_source == "ensemble": sampling_probability_single_step = ( self.get_weighting_for_probability_matching( background_ensemble=forecast_ens_stacked[ : background_ensemble.shape[0] ][:, idx_lien], analysis_ensemble=analysis_ensemble[:, idx_lien], observation_ensemble=forecast_ens_stacked[ background_ensemble.shape[0] : ][:, idx_lien], ) ) # ...or based on the explained variance weighted Kalman gain. elif self.__sampling_prob_source == "explained_var": sampling_probability_single_step = np.sum( np.diag(self.K) * pca_params["explained_variance"] ) else: raise ValueError( f"Sampling probability source should be either 'ensemble' or 'explained_var', but is {self.__sampling_prob_source}!" ) # Adjust sampling probability when the accumulation flag is set if self.__use_accum_sampling_prob == True: self.__sampling_probability = ( 1 - sampling_probability_single_step ) * self.__sampling_probability + sampling_probability_single_step else: self.__sampling_probability = sampling_probability_single_step # The accumulation is divided for cases one would not use the accumulated # sampling probability for the probability matching, but still wants to have # the pure NWP forecast at the end of a combined forecast. if self.__ensure_full_nwp_weight == True: self.__accumulated_sampling_prob = ( 1 - sampling_probability_single_step ) * self.__accumulated_sampling_prob + sampling_probability_single_step print(f"Sampling probability: {self.__sampling_probability:1.4f}") # Apply probability matching to the analysis ensemble if self.__iterative_prob_matching: def worker(j): # Get the combined distribution based on the input weight resampled_forecast[j] = probmatching.resample_distributions( first_array=background_ensemble[j], second_array=observation_ensemble[j], probability_first_array=1 - self.__sampling_probability, ).reshape(self.__params.len_y, self.__params.len_x) dask_worker_collection = [] if DASK_IMPORTED and self._config.n_ens_members > 1: for j in range(self._config.n_ens_members): dask_worker_collection.append(dask.delayed(worker)(j)) dask.compute( *dask_worker_collection, num_workers=self.__params.num_ensemble_workers, ) else: for j in range(self._config.n_ens_members): worker(j) dask_worker_collection = None # Set analysis ensemble into the Nowcast ensemble background_ensemble[:, idx_prec] = analysis_ensemble return background_ensemble, resampled_forecast def get_inflation_factor_obs(self): """ Helper function for ensuring the full NWP weight at the end of a combined forecast. If an accumulated sampling probability of 1 is reached, the observation inflation factor is reduced to 0 by a cosine function. """ return self.__inflation_factor_obs_tmp ================================================ FILE: pysteps/blending/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.blending.interface ========================== Interface for the blending module. It returns a callable function for computing blended nowcasts with NWP models. .. autosummary:: :toctree: ../generated/ get_method """ from functools import partial from pysteps.blending import linear_blending from pysteps.blending import steps from pysteps.blending import pca_ens_kalman_filter _blending_methods = dict() _blending_methods["linear_blending"] = linear_blending.forecast _blending_methods["salient_blending"] = partial(linear_blending.forecast, saliency=True) _blending_methods["steps"] = steps.forecast _blending_methods["pca_enkf"] = pca_ens_kalman_filter.forecast def get_method(name): """ Return a callable function for computing nowcasts blending into an NWP forecast. Implemented methods: +------------------+------------------------------------------------------+ | Name | Description | +==================+======================================================+ | linear_blending | the linear blending of a nowcast method with other | | | data (e.g. NWP data). | +------------------+------------------------------------------------------+ | salient_blending | the salient blending of a nowcast method with other | | | data (e.g. NWP data) described in :cite:`Hwang2015`. | | | The blending is based on intensities and forecast | | | times. The blended product preserves pixel | | | intensities with time if they are strong enough based| | | on their ranked salience. | +------------------+------------------------------------------------------+ | steps | the STEPS stochastic nowcasting blending method | | | described in :cite:`Seed2003`, :cite:`BPS2006` and | | | :cite:`SPN2013`. The blending weights approach | | | currently follows :cite:`BPS2006`. | +------------------+------------------------------------------------------+ | pca_enkf | the reduced-space EnKF combination method described | | | in :cite:`Nerini2019MWR`. | +------------------+------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available names:" + str(list(_blending_methods.keys())) ) from None try: return _blending_methods[name] except KeyError: raise ValueError( f"Unknown blending method {name}." "The available methods are: " f"{*list(_blending_methods.keys()),}" ) from None ================================================ FILE: pysteps/blending/linear_blending.py ================================================ # -*- coding: utf-8 -*- """ pysteps.nowcasts.linear_blending ================================ Linear blending method to blend (ensemble) nowcasts and (ensemble) Numerical Weather Prediction (NWP) forecasts. The method uses a predefined start and end time for the blending. Before this start time, the resulting blended forecasts only consists of the nowcast(s). In between the start and end time, the nowcast(s) weight decreases and NWP forecasts weight increases linearly from 1(0) to 0(1). After the end time, the blended forecast entirely consists of the NWP forecasts. The saliency-based blending method also takes into account the pixel intensities and preserves them if they are strong enough based on their ranked salience. Implementation of the linear blending and saliency-based blending between nowcast and NWP data. .. autosummary:: :toctree: ../generated/ forecast """ import numpy as np from pysteps import nowcasts from pysteps.utils import conversion from scipy.stats import rankdata def forecast( precip, precip_metadata, velocity, timesteps, timestep, nowcast_method, precip_nwp=None, precip_nwp_metadata=None, start_blending=120, end_blending=240, fill_nwp=True, saliency=False, nowcast_kwargs=None, ): """Generate a forecast by linearly or saliency-based blending of nowcasts with NWP data Parameters ---------- precip: array_like Array containing the input precipitation field(s) ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. precip_metadata: dict Metadata dictionary containing (at least) the transform, unit and threshold attributes as described in the documentation of :py:mod:`pysteps.io.importers`. velocity; array_like Array of shape (2, m, n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int Number of time steps to forecast. timestep: int or float The time difference (in minutes) between consecutive forecast fields. nowcast_method: str Name of the nowcasting method. See :py:mod:`pysteps.nowcasts.interface` for the list of available methods. precip_nwp: array_like or NoneType, optional Array of shape (timesteps, m, n) in the case of no ensemble or of shape (n_ens_members, timesteps, m, n) in the case of an ensemble containing the NWP precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular (and identical to the time step between the nowcasts). If no NWP data is given the value of precip_nwp is None and no blending will be performed. precip_nwp_metadata: dict or NoneType, optional NWP metadata dictionary containing (at least) the transform, unit and threshold attributes as described in the documentation of :py:mod:`pysteps.io.importers`. start_blending: int, optional Time stamp (in minutes) after which the blending should start. Before this only the nowcast data is used. end_blending: int, optional Time stamp (in minutes) after which the blending should end. Between start_blending and end_blending the nowcasts and NWP data are linearly merged with each other. After end_blending only the NWP data is used. fill_nwp: bool, optional Standard value is True. If True, the NWP data will be used to fill in the no data mask of the nowcast. saliency: bool, optional Default value is False. If True, saliency will be used for blending. The blending is based on intensities and forecast times as described in :cite:`Hwang2015`. The blended product preserves pixel intensities with time if they are strong enough based on their ranked salience. nowcast_kwargs: dict, optional Dictionary containing keyword arguments for the nowcast method. Returns ------- precip_blended: ndarray Array of shape (timesteps, m, n) in the case of no ensemble or of shape (n_ens_members, timesteps, m, n) in the case of an ensemble containing the precipation forecast generated by linearly blending the nowcasts and the NWP data. n_ens_members equals the maximum no. of ensemble members in either the nowcast or nwp model(s). """ if nowcast_kwargs is None: nowcast_kwargs = dict() # Ensure that only the most recent precip timestep is used if len(precip.shape) == 3: precip = precip[-1, :, :] # First calculate the number of needed timesteps (up to end_blending) for the nowcast # to ensure that the nowcast calculation time is limited. timesteps_nowcast = int(end_blending / timestep) nowcast_method_func = nowcasts.get_method(nowcast_method) # Check if NWP data is given as input if precip_nwp is not None: # Calculate the nowcast precip_nowcast = nowcast_method_func( precip, velocity, timesteps_nowcast, **nowcast_kwargs, ) # Make sure that precip_nowcast and precip_nwp are in mm/h precip_nowcast, _ = conversion.to_rainrate( precip_nowcast, metadata=precip_metadata ) precip_nwp, _ = conversion.to_rainrate(precip_nwp, metadata=precip_nwp_metadata) if len(precip_nowcast.shape) == 4: n_ens_members_nowcast = precip_nowcast.shape[0] if n_ens_members_nowcast == 1: precip_nowcast = np.squeeze(precip_nowcast) else: n_ens_members_nowcast = 1 if len(precip_nwp.shape) == 4: # Ensure precip_nwp has t = n_timesteps precip_nwp = precip_nwp[:, 0:timesteps, :, :] # Set the number of ensemble members n_ens_members_nwp = precip_nwp.shape[0] if n_ens_members_nwp == 1: precip_nwp = np.squeeze(precip_nwp) else: # Ensure precip_nwp has t = n_timesteps precip_nwp = precip_nwp[0:timesteps, :, :] # Set the number of ensemble members n_ens_members_nwp = 1 # Now, repeat the nowcast ensemble members or the nwp models/members until # it has the same amount of members as n_ens_members_max. For instance, if # you have 10 ensemble nowcasts members and 3 NWP members, the output will # be an ensemble of 10 members. Hence, the three NWP members are blended # with the first three members of the nowcast (member one with member one, # two with two, etc.), subsequently, the same NWP members are blended with # the next three members (NWP member one with member 4, NWP member 2 with # member 5, etc.), until 10 is reached. n_ens_members_max = max(n_ens_members_nowcast, n_ens_members_nwp) n_ens_members_min = min(n_ens_members_nowcast, n_ens_members_nwp) if n_ens_members_min != n_ens_members_max: if n_ens_members_nwp == 1: precip_nwp = np.repeat( precip_nwp[np.newaxis, :, :], n_ens_members_max, axis=0 ) elif n_ens_members_nowcast == 1: precip_nowcast = np.repeat( precip_nowcast[np.newaxis, :, :], n_ens_members_max, axis=0 ) else: repeats = [ (n_ens_members_max + i) // n_ens_members_min for i in range(n_ens_members_min) ] if n_ens_members_nwp == n_ens_members_min: precip_nwp = np.repeat(precip_nwp, repeats, axis=0) elif n_ens_members_nowcast == n_ens_members_min: precip_nowcast = np.repeat(precip_nowcast, repeats, axis=0) # Check if dimensions are correct assert ( precip_nwp.shape[-2:] == precip_nowcast.shape[-2:] ), "The x and y dimensions of precip_nowcast and precip_nwp need to be identical: dimension of precip_nwp = {} and dimension of precip_nowcast = {}".format( precip_nwp.shape[-2:], precip_nowcast.shape[-2:] ) # Ensure we are not working with nans in the bleding. # Check if the NWP data contains any nans. If so, fill them with 0.0. precip_nwp = np.nan_to_num(precip_nwp, nan=0.0) # Fill nans in precip_nowcast nan_mask = np.isnan(precip_nowcast) if fill_nwp: if len(precip_nwp.shape) == 4: precip_nowcast[nan_mask] = precip_nwp[:, 0:timesteps_nowcast, :, :][ nan_mask ] else: precip_nowcast[nan_mask] = precip_nwp[0:timesteps_nowcast, :, :][ nan_mask ] else: precip_nowcast[nan_mask] = 0.0 # Initialise output precip_blended = np.zeros_like(precip_nwp) # Calculate the weights for i in range(timesteps): # Calculate what time we are at t = (i + 1) * timestep if n_ens_members_max == 1: ref_dim = 0 else: ref_dim = 1 # apply blending # compute the slice indices slc_id = _get_slice(precip_blended.ndim, ref_dim, i) # Calculate the weight with a linear relation (weight_nwp at start_blending = 0.0) # and (weight_nwp at end_blending = 1.0) weight_nwp = (t - start_blending) / (end_blending - start_blending) # Set weights at times before start_blending and after end_blending if weight_nwp <= 0.0: weight_nwp = 0.0 precip_blended[slc_id] = precip_nowcast[slc_id] elif weight_nwp >= 1.0: weight_nwp = 1.0 precip_blended[slc_id] = precip_nwp[slc_id] else: # Calculate weight_nowcast weight_nowcast = 1.0 - weight_nwp # Calculate output by combining precip_nwp and precip_nowcast, # while distinguishing between ensemble and non-ensemble methods if saliency: ranked_salience = _get_ranked_salience( precip_nowcast[slc_id], precip_nwp[slc_id] ) ws = _get_ws(weight_nowcast, ranked_salience) precip_blended[slc_id] = ( ws * precip_nowcast[slc_id] + (1 - ws) * precip_nwp[slc_id] ) else: precip_blended[slc_id] = ( weight_nwp * precip_nwp[slc_id] + weight_nowcast * precip_nowcast[slc_id] ) else: # Calculate the nowcast precip_nowcast = nowcast_method_func( precip, velocity, timesteps, **nowcast_kwargs, ) # Make sure that precip_nowcast and precip_nwp are in mm/h precip_nowcast, _ = conversion.to_rainrate( precip_nowcast, metadata=precip_metadata ) # If no NWP data is given, the blended field is simply equal to the nowcast field precip_blended = precip_nowcast return precip_blended def _get_slice(n_dims, ref_dim, ref_id): """source: https://stackoverflow.com/a/24399139/4222370""" slc = [slice(None)] * n_dims slc[ref_dim] = ref_id return tuple(slc) def _get_ranked_salience(precip_nowcast, precip_nwp): """Calculate ranked salience, which show how close the pixel is to the maximum intensity difference [r(x,y)=1] or the minimum intensity difference [r(x,y)=0] Parameters ---------- precip_nowcast: array_like Array of shape (m,n) containing the extrapolated precipitation field at a specified timestep precip_nwp: array_like Array of shape (m,n) containing the NWP fields at a specified timestep Returns ------- ranked_salience: Array of shape (m,n) containing ranked salience """ # calcutate normalized intensity if np.max(precip_nowcast) == 0: norm_nowcast = np.zeros_like(precip_nowcast) else: norm_nowcast = precip_nowcast / np.max(precip_nowcast) if np.max(precip_nwp) == 0: norm_nwp = np.zeros_like(precip_nwp) else: norm_nwp = precip_nwp / np.max(precip_nwp) diff = norm_nowcast - norm_nwp # Calculate ranked salience, based on dense ranking method, in which equally comparable values receive the same ranking number ranked_salience = rankdata(diff, method="dense").reshape(diff.shape).astype("float") ranked_salience /= ranked_salience.max() return ranked_salience def _get_ws(weight, ranked_salience): """Calculate salience weight based on linear weight and ranked salience as described in :cite:`Hwang2015`. Cells with higher intensities result in larger weights. Parameters ---------- weight: int Varying between 0 and 1 ranked_salience: array_like Array of shape (m,n) containing ranked salience Returns ------- ws: array_like Array of shape (m,n) containing salience weight, which preserves pixel intensties with time if they are strong enough based on the ranked salience. """ # Calculate salience weighte ws = 0.5 * ( (weight * ranked_salience) / (weight * ranked_salience + (1 - weight) * (1 - ranked_salience)) + ( np.sqrt(ranked_salience**2 + weight**2) / ( np.sqrt(ranked_salience**2 + weight**2) + np.sqrt((1 - ranked_salience) ** 2 + (1 - weight) ** 2) ) ) ) return ws ================================================ FILE: pysteps/blending/pca_ens_kalman_filter.py ================================================ # -*- coding: utf-8 -*- """ pysteps.blending.pca_ens_kalman_filter ====================================== Implementation of the reduced-space ensemble Kalman filter method described in :cite:`Nerini2019MWR`. The nowcast is iteratively corrected by NWP data using an ensemble Kalman filter in principal component (PC) space. The reduced-space ensemble Kalman filter method consists of the following main steps: Initialization step ------------------- 1. Set the radar rainfall fields in a Lagrangian space. 2. Perform the cascade decomposition for the input radar rainfall fields. 3. Estimate AR parameters for the extrapolation nowcast and noise cascade. 4. Initialize the noise method and precompute a set of noise fields. 5. Initialize forecast models equal to the number of ensemble members. 6. Initialize the ensemble Kalman filter method. 7. Start the forecasting loop. Forecast step ------------- 1. Decompose the rainfall forecast field of the previous timestep. 2. Update the common precipitation mask of nowcast and NWP fields for noise imprint. 3. Iterate the AR model. 4. Recompose the rainfall forecast field. 5. (Optional) Apply probability matching. 6. Extrapolate the recomposed rainfall field to the current timestep. Correction step --------------- 1. Identify grid boxes where rainfall is forecast. 2. Reduce nowcast and NWP ensembles onto these grid boxes and apply principal component analysis to further reduce the dimensionality. 3. Apply the update step of the ensemble Kalman filter. Finalization ------------ 1. Set no-data values in the final forecast fields. 2. The original approach iterates between forecast and correction steps. However, to reduce smoothing effects in this implementation, a pure forecast step is computed at the first forecast timestep, and afterwards the method alternates between correction and forecast steps. The smoothing effects arise due to the NWP effective horizontal resolution and due to the spatial decomposition at each forecast timestep. .. autosummary:: :toctree: ../generated/ forecast """ import datetime import time from copy import deepcopy import numpy as np from scipy.ndimage import ( binary_dilation, gaussian_filter, ) from pysteps import blending, cascade, extrapolation, noise, utils from pysteps.blending.ens_kalman_filter_methods import MaskedEnKF from pysteps.nowcasts import utils as nowcast_utils from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation from pysteps.utils.check_norain import check_norain try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False from dataclasses import dataclass, field from typing import Any, Callable @dataclass(frozen=True) class EnKFCombinationConfig: """ Parameters ---------- n_ens_members: int The number of ensemble members to generate. This number should always be equal to the number of NWP ensemble members / number of NWP models. n_cascade_levels: int The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. precip_threshold: float Specifies the threshold value for minimum observable precipitation intensity. norain_threshold: float Specifies the threshold value for the fraction of rainy (see above) pixels in the radar rainfall field below which we consider there to be no rain. Depends on the amount of clutter typically present. precip_mask_dilation: int Number of grid boxes by which the precipitation mask should be extended per timestep. smooth_radar_mask_range: int, Default is 0. Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise blend near the edge of the radar domain (radar mask), where the radar data is either not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range should be a positive value, representing a buffer band of a number of pixels by which the mask is cropped and smoothed. The smooth radar mask removes the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results. extrapolation_method: str Name of the extrapolation method to use. See the documentation of :py:mod:`pysteps.extrapolation.interface`. decomposition_method: str, {'fft'} Name of the cascade decomposition method to use. See the documentation of :py:mod:`pysteps.cascade.interface`. bandpass_filter_method: str, {'gaussian', 'uniform'} Name of the bandpass filter method to use with the cascade decomposition. See the documentation of :py:mod:`pysteps.cascade.interface`. noise_method: str, {'parametric','nonparametric','ssft','nested',None} Name of the noise generator to use for perturbating the precipitation field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None, no noise is generated. enkf_method: str, {'masked_enkf'} Name of the ensemble Kalman filter method to use for the correction step. Currently, only 'masked_enkf' is implemented. This method corresponds to the reduced-space ensemble Kalman filter method described by Nerini et al., 2019. enable_combination: bool Flag to specify whether the correction step or only the forecast steps should be processed. noise_stddev_adj: str, {'auto','fixed',None} Optional adjustment for the standard deviations of the noise fields added to each cascade level. This is done to compensate incorrect std. dev. estimates of casace levels due to presence of no-rain areas. 'auto'=use the method implemented in :py:func:`pysteps.noise.utils. compute_noise_stddev_adjs`. 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable noise std. dev adjustment. ar_order: int The order of the autoregressive model to use. Currently, only AR(1) is implemented. seed: int Optional seed number for the random generators. num_workers: int The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str A string defining the FFT method to use (see FFT methods in :py:func:`pysteps.utils.interface.get_method`). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: str, {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical STEPS model). If "spectral", the AR(2) models and stochastic perturbations are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019b`. extrapolation_kwargs: dict Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of :py:func:`pysteps.extrapolation.interface`. filter_kwargs: dict Optional dictionary containing keyword arguments for the filter method. See the documentation of :py:mod:`pysteps.cascade.bandpass_filters`. noise_kwargs: dict Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of :py:mod:`pysteps.noise. fftgenerators`. combination_kwargs: dict Optional dictionary containing keyword arguments for the initializer of the correction step. Options are: {nwp_hres_eff: float, the effective horizontal resolution of the utilized NWP model; prob_matching: str, specifies the probability matching method that should be applied}. See the documentation of :py:mod:`pysteps.blending.ens_kalman_filter_methods`. measure_time: bool If set to True, measure, print and return the computation time. verbose_output: bool If set to True, return additionally the background ensemble of the EnKF for further statistics. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input field precip, respectively. This can be used, for instance, writing the outputs into files. return_output: bool Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. (Call back function is currently not implemented.) n_noise_fields: int Number of precomputed noise fields. A number of 30 is adequate to generate sufficient spread in the Nowcast. """ n_ens_members: int n_cascade_levels: int precip_threshold: float | None norain_threshold: float precip_mask_dilation: int smooth_radar_mask_range: int extrapolation_method: str decomposition_method: str bandpass_filter_method: str noise_method: str | None enkf_method: str | None enable_combination: bool noise_stddev_adj: str | None ar_order: int seed: int | None num_workers: int fft_method: str domain: str extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) filter_kwargs: dict[str, Any] = field(default_factory=dict) noise_kwargs: dict[str, Any] = field(default_factory=dict) combination_kwargs: dict[str, Any] = field(default_factory=dict) measure_time: bool = False verbose_output: bool = False callback: Any | None = None return_output: bool = True n_noise_fields: int = 30 @dataclass class EnKFCombinationParams: noise_std_coeffs: np.ndarray | None = None bandpass_filter: Any | None = None fft: Any | None = None perturbation_generator: Callable[..., np.ndarray] | None = None noise_generator: Callable[..., np.ndarray] | None = None PHI: np.ndarray | None = None extrapolation_method: Callable[..., Any] | None = None decomposition_method: Callable[..., dict] | None = None recomposition_method: Callable[..., np.ndarray] | None = None fft_objs: list[Any] = field(default_factory=list) xy_coordinates: np.ndarray | None = None precip_threshold: float | None = None mask_threshold: np.ndarray | None = None num_ensemble_workers: int | None = None domain_mask: np.ndarray | None = None extrapolation_kwargs: dict | None = None filter_kwargs: dict | None = None noise_kwargs: dict | None = None combination_kwargs: dict | None = None len_y: int | None = None len_x: int | None = None no_rain_case: str | None = None class ForecastInitialization: """ Class to bundle the steps necessary for the forecast initialization. These steps are: #. Set the radar rainfall fields in a Lagrangian space. #. Perform the cascade decomposition for the input radar rainfall fields. #. Estimate AR parameters for the extrapolation nowcast and noise cascade. #. Initialize the noise method and precompute a set of noise fields. """ def __init__( self, enkf_combination_config: EnKFCombinationConfig, enkf_combination_params: EnKFCombinationParams, obs_precip: np.ndarray, obs_velocity: np.ndarray, ): self.__config = enkf_combination_config self.__params = enkf_combination_params self.__obs_precip = obs_precip self.__obs_velocity = obs_velocity # Measure time for initialization. if self.__config.measure_time: self.__start_time_init = time.time() self.__initialize_nowcast_components() self.__prepare_radar_data_and_ar_parameters() self.__initialize_noise() self.__initialize_noise_field_pool() if self.__config.measure_time: print( f"Elapsed time for initialization: {time.time() - self.__start_time_init}" ) # Initialize FFT, bandpass filters, decomposition methods, and extrapolation # method. def __initialize_nowcast_components(self): # Initialize number of ensemble workers self.__params.num_ensemble_workers = min( self.__config.n_ens_members, self.__config.num_workers, ) # Extract the spatial dimensions of the observed precipitation (x, y) self.__params.len_y, self.__params.len_x = self.__obs_precip.shape[1:] # Generate the mesh grid for spatial coordinates x_values, y_values = np.meshgrid( np.arange(self.__params.len_x), np.arange(self.__params.len_y), ) self.__params.xy_coordinates = np.stack([x_values, y_values]) # Initialize FFT method self.__params.fft = utils.get_method( self.__config.fft_method, shape=( self.__params.len_y, self.__params.len_x, ), n_threads=self.__config.num_workers, ) # Initialize the band-pass filter for the cascade decomposition filter_method = cascade.get_method(self.__config.bandpass_filter_method) self.__params.bandpass_filter = filter_method( (self.__params.len_y, self.__params.len_x), self.__config.n_cascade_levels, **(self.__params.filter_kwargs or {}), ) # Get the decomposition method (e.g., FFT) ( self.__params.decomposition_method, self.__params.recomposition_method, ) = cascade.get_method(self.__config.decomposition_method) # Get the extrapolation method (e.g., semilagrangian) self.__params.extrapolation_method = extrapolation.get_method( self.__config.extrapolation_method ) # Determine the domain mask from non-finite values in the precipitation data self.__params.domain_mask = np.logical_or.reduce( [ ~np.isfinite(self.__obs_precip[i, :]) for i in range(self.__obs_precip.shape[0]) ] ) print("Nowcast components initialized successfully.") # Prepare radar precipitation fields for nowcasting and estimate the AR # parameters. def __prepare_radar_data_and_ar_parameters(self): """ Prepare radar and NWP precipitation fields for nowcasting. This includes generating a threshold mask, transforming fields into Lagrangian coordinates, cascade decomposing/recomposing, and checking for zero-precip areas. The results are stored in class attributes. Estimate autoregressive (AR) parameters for the radar rainfall field. If precipitation exists, compute temporal auto-correlations; otherwise, use predefined climatological values. Adjust coefficients if necessary and estimate AR model parameters. """ # Start with the radar rainfall fields. We want the fields in a Lagrangian # space. Advect the previous precipitation fields to the same position with # the most recent one (i.e. transform them into the Lagrangian coordinates). self.__params.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates self.__params.extrapolation_kwargs["outval"] = ( self.__config.precip_threshold - 2.0 ) res = [] def transform_to_lagrangian(precip, i): return self.__params.extrapolation_method( precip[i, :, :], self.__obs_velocity, self.__config.ar_order - i, allow_nonfinite_values=True, **self.__params.extrapolation_kwargs.copy(), )[-1] if not DASK_IMPORTED: # Process each earlier precipitation field directly for i in range(self.__config.ar_order): self.__obs_precip[i, :, :] = transform_to_lagrangian( self.__obs_precip, i ) else: # Use Dask delayed for parallelization if DASK_IMPORTED is True for i in range(self.__config.ar_order): res.append(dask.delayed(transform_to_lagrangian)(self.__obs_precip, i)) num_workers_ = ( len(res) if self.__config.num_workers > len(res) else self.__config.num_workers ) self.__obs_precip = np.stack( list(dask.compute(*res, num_workers=num_workers_)) + [self.__obs_precip[-1, :, :]] ) # Mask the observations obs_mask = np.logical_or( ~np.isfinite(self.__obs_precip), self.__obs_precip < self.__config.precip_threshold, ) self.__obs_precip[obs_mask] = self.__config.precip_threshold - 2.0 # Compute the cascade decompositions of the input precipitation fields precip_forecast_decomp = [] for i in range(self.__config.ar_order + 1): precip_forecast = self.__params.decomposition_method( self.__obs_precip[i, :, :], self.__params.bandpass_filter, mask=self.__params.mask_threshold, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=False, ) precip_forecast_decomp.append(precip_forecast) # Rearrange the cascaded into a four-dimensional array of shape # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model self.precip_cascades = nowcast_utils.stack_cascades( precip_forecast_decomp, self.__config.n_cascade_levels ) # Set the mean and standard deviations based on the most recent field. precip_forecast_decomp = precip_forecast_decomp[-1] self.mean_extrapolation = np.array(precip_forecast_decomp["means"]) self.std_extrapolation = np.array(precip_forecast_decomp["stds"]) if self.__params.no_rain_case == "obs": GAMMA = np.ones((self.__config.n_cascade_levels, self.__config.ar_order)) else: # If there are values in the radar fields, compute the auto-correlations GAMMA = np.empty((self.__config.n_cascade_levels, self.__config.ar_order)) # compute lag-l temporal auto-correlation coefficients for each cascade level for i in range(self.__config.n_cascade_levels): GAMMA[i, :] = correlation.temporal_autocorrelation( self.precip_cascades[i], mask=self.__params.mask_threshold ) # Print the GAMMA value nowcast_utils.print_corrcoefs(GAMMA) if self.__config.ar_order == 2: # Adjust the lag-2 correlation coefficient to ensure that the AR(p) # process is stationary for i in range(self.__config.n_cascade_levels): GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2( GAMMA[i, 0], GAMMA[i, 1] ) # Estimate the parameters of the AR(p) model from the auto-correlation # coefficients self.__params.PHI = np.empty( (self.__config.n_cascade_levels, self.__config.ar_order + 1) ) for i in range(self.__config.n_cascade_levels): self.__params.PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :]) nowcast_utils.print_ar_params(self.__params.PHI) # Initialize the noise generation and get n_noise_fields. def __initialize_noise(self): """ Initialize noise-based perturbations if configured, computing any required adjustment coefficients and setting up the perturbation generator. """ if ( self.__config.noise_method is not None and self.__params.no_rain_case != "obs" ): # get methods for perturbations init_noise, self.__params.noise_generator = noise.get_method( self.__config.noise_method ) self.__precip_noise_input = self.__obs_precip.copy() # initialize the perturbation generator for the precipitation field self.__params.perturbation_generator = init_noise( self.__precip_noise_input, fft_method=self.__params.fft, **self.__params.noise_kwargs, ) if self.__config.noise_stddev_adj == "auto": print("Computing noise adjustment coefficients... ", end="", flush=True) precip_forecast_min = np.min(self.__precip_noise_input) self.__params.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( self.__precip_noise_input[-1, :, :], self.__params.precip_threshold, precip_forecast_min, self.__params.bandpass_filter, self.__params.decomposition_method, self.__params.perturbation_generator, self.__params.noise_generator, 20, conditional=True, num_workers=self.__config.num_workers, seed=self.__config.seed, ) elif self.__config.noise_stddev_adj == "fixed": f = lambda k: 1.0 / (0.75 + 0.09 * k) self.__params.noise_std_coeffs = [ f(k) for k in range(1, self.__config.n_cascade_levels + 1) ] else: self.__params.noise_std_coeffs = np.ones(self.__config.n_cascade_levels) if self.__config.noise_stddev_adj is not None: print(f"noise std. dev. coeffs: {self.__params.noise_std_coeffs}") else: self.__params.perturbation_generator = None self.__params.noise_generator = None self.__params.noise_std_coeffs = None # Create a pool of n noise fields. def __initialize_noise_field_pool(self): """ Initialize a pool of noise fields avoiding the separate generation of noise fields for each time step and ensemble member. A pool of 30 fields is sufficient to generate adequate spread in the nowcast for combination. """ self.noise_field_pool = np.zeros( ( self.__config.n_noise_fields, self.__config.n_cascade_levels, self.__params.len_y, self.__params.len_x, ) ) # Get a seed value for each ensemble member seed = self.__config.seed if self.__config.noise_method is not None: self.__randgen_precip = [] # for j in range(self.__config.n_ens_members): for j in range(self.__config.n_noise_fields): rs = np.random.RandomState(seed) self.__randgen_precip.append(rs) seed = rs.randint(0, high=1e9) # Get the decomposition method self.__params.fft_objs = [] for _ in range(self.__config.n_noise_fields): self.__params.fft_objs.append( utils.get_method( self.__config.fft_method, shape=self.precip_cascades.shape[-2:], ) ) if self.__params.noise_generator is not None: # Determine the noise field for each ensemble member for j in range(self.__config.n_noise_fields): epsilon = self.__params.noise_generator( self.__params.perturbation_generator, randstate=self.__randgen_precip[j], fft_method=self.__params.fft_objs[j], domain=self.__config.domain, ) # Decompose the noise field into a cascade self.noise_field_pool[j] = self.__params.decomposition_method( epsilon, self.__params.bandpass_filter, fft_method=self.__params.fft_objs[j], input_domain=self.__config.domain, output_domain=self.__config.domain, compute_stats=False, normalize=True, compact_output=True, )["cascade_levels"] class ForecastState: """ Common memory of ForecastModel instances. """ def __init__( self, enkf_combination_config: EnKFCombinationConfig, enkf_combination_params: EnKFCombinationParams, noise_field_pool: np.ndarray, latest_obs: np.ndarray, precip_mask: np.ndarray, ): self.config = enkf_combination_config self.params = enkf_combination_params self.noise_field_pool = noise_field_pool self.precip_mask = np.repeat( precip_mask[None, :], self.config.n_ens_members, axis=0 ) latest_obs[~np.isfinite(latest_obs)] = self.config.precip_threshold - 2.0 self.nwc_prediction = np.repeat( latest_obs[None, :, :], self.config.n_ens_members, axis=0 ) self.fc_resampled = np.repeat( latest_obs[None, :, :], self.config.n_ens_members, axis=0 ) self.nwc_prediction_btf = self.nwc_prediction.copy() self.final_combined_forecast = [] self.background_ensemble = {} return class ForecastModel: """ Class to manage the forecast step of each ensemble member. """ def __init__( self, forecast_state: ForecastState, precip_cascades: np.ndarray, velocity: np.ndarray, mu: np.ndarray, sigma: np.ndarray, ens_member: int, ): # Initialize instance variables self.__forecast_state = forecast_state self.__precip_cascades = precip_cascades self.__velocity = velocity self.__mu = mu self.__sigma = sigma self.__previous_displacement = np.zeros( (2, self.__forecast_state.params.len_y, self.__forecast_state.params.len_x) ) # Get NWP effective horizontal resolution and type of probability matching from # combination kwargs. # It's not the best practice to mix parameters. Maybe the cascade mask as well # as the probability matching should be implemented at another location. self.__nwp_hres_eff = self.__forecast_state.params.combination_kwargs.get( "nwp_hres_eff", 0.0 ) self.__prob_matching = self.__forecast_state.params.combination_kwargs.get( "prob_matching", "iterative" ) # Get spatial scales whose central wavelengths are above the effective # horizontal resolution of the NWP model. # Factor 3 on the effective resolution is similar to that factor of the # localization of AR parameters and scaling parameters. self.__resolution_mask = ( self.__forecast_state.params.len_y / self.__forecast_state.params.bandpass_filter["central_wavenumbers"] >= self.__nwp_hres_eff * 3.0 ) self.__ens_member = ens_member # Bundle single steps of the forecast. def run_forecast_step(self, nwp, is_correction_timestep=False): # Decompose precipitation field. self.__decompose(is_correction_timestep) # Update precipitation mask. self.__update_precip_mask(nwp=nwp) # Iterate through the AR process. self.__iterate() # Recompose the precipitation field for the correction step. self.__forecast_state.nwc_prediction[self.__ens_member] = ( blending.utils.recompose_cascade( combined_cascade=self.__precip_cascades[:, -1], combined_mean=self.__mu, combined_sigma=self.__sigma, ) ) # Apply probability matching if self.__prob_matching == "iterative": self.__probability_matching() # Extrapolate the precipitation field onto the position of the current timestep. # If smooth_radar_mask_range is not zero, ensure the extrapolation kwargs use # a constant value instead of "nearest" for the coordinate mapping, otherwise # there are possibly no nans in the domain. if self.__forecast_state.config.smooth_radar_mask_range != 0: self.__forecast_state.params.extrapolation_kwargs[ "map_coordinates_mode" ] = "constant" self.__advect() # The extrapolation components are NaN outside the advected # radar domain. This results in NaN values in the blended # forecast outside the radar domain. Therefore, fill these # areas with the defined minimum value, if requested. nan_mask = np.isnan(self.__forecast_state.nwc_prediction[self.__ens_member]) self.__forecast_state.nwc_prediction[self.__ens_member][nan_mask] = ( self.__forecast_state.config.precip_threshold - 2.0 ) # Create the resulting precipitation field and set no data area. In future, when # transformation between linear and logarithmic scale will be necessary, it will be # implemented in this function. # TODO: once this transformation is needed, adjust the smoothed transition between # radar mask and NWP as performed at the end of the run_forecast_step function. def backtransform(self): # Set the resulting field as shallow copy of the field that is used # continuously for forecast computation. if self.__forecast_state.config.smooth_radar_mask_range == 0: self.__forecast_state.nwc_prediction_btf[self.__ens_member] = ( self.__forecast_state.nwc_prediction[self.__ens_member] ) # Set no data area self.__set_no_data() # Call spatial decomposition function and compute an adjusted standard deviation of # each spatial scale at timesteps where NWP information is incorporated. def __decompose(self, is_correction_timestep): # Call spatial decomposition method. precip_extrap_decomp = self.__forecast_state.params.decomposition_method( self.__forecast_state.nwc_prediction[self.__ens_member], self.__forecast_state.params.bandpass_filter, fft_method=self.__forecast_state.params.fft_objs[self.__ens_member], input_domain=self.__forecast_state.config.domain, output_domain=self.__forecast_state.config.domain, compute_stats=False, normalize=True, compact_output=False, ) # Set decomposed field onto the latest precipitation cascade. self.__precip_cascades[:, -1] = precip_extrap_decomp["cascade_levels"] # If NWP information is incorporated, use the current mean of the decomposed # field and adjust standard deviation on spatial scales that have a central # wavelength below the effective horizontal resolution of the NWP model. if is_correction_timestep: # Set the mean of the spatial scales onto the mean values of the currently # decomposed field. self.__mu = np.array(precip_extrap_decomp["means"]) # Compute the standard deviation evolved by an AR(1)-process. self.__sigma = np.sqrt( self.__forecast_state.params.PHI[:, 0] ** 2.0 * self.__sigma**2.0 + self.__forecast_state.params.PHI[:, 1] ** 2.0 * self.__forecast_state.params.noise_std_coeffs**2.0 ) # Use the standard deviations of the currently decomposed field for spatial # scales above the effective horizontal resolution of the NWP model. self.__sigma[self.__resolution_mask] = np.array( precip_extrap_decomp["stds"] )[self.__resolution_mask] # Else, keep mean and standard deviation constant for pure nowcasting forecast steps. # It's not necessary but describes better the handling of the scaling # parameters. else: self.__mu = self.__mu self.__sigma = self.__sigma # Call extrapolation function to extrapolate the precipitation field onto the # position of the current timestep. def __advect(self): # Since previous displacement is the sum of displacement over all previous # timesteps, we have to compute the differences between the displacements to # get the motion vector field for one time step. displacement_tmp = self.__previous_displacement.copy() # Call the extrapolation method ( self.__forecast_state.nwc_prediction[self.__ens_member], self.__previous_displacement, ) = self.__forecast_state.params.extrapolation_method( self.__forecast_state.nwc_prediction[self.__ens_member], self.__velocity, [1], allow_nonfinite_values=True, displacement_previous=self.__previous_displacement, **self.__forecast_state.params.extrapolation_kwargs, ) if ( self.__forecast_state.config.smooth_radar_mask_range > 0 and self.__ens_member == 0 ): self.__forecast_state.params.domain_mask = ( self.__forecast_state.params.extrapolation_method( self.__forecast_state.params.domain_mask, self.__velocity, [1], interp_order=1, outval=True, )[0] ) # Get the difference of the previous displacement field. self.__previous_displacement -= displacement_tmp # Get a noise field out of the respective pool and iterate through the AR(1) # process. def __iterate(self): # Get a noise field out of the noise field pool and multiply it with # precipitation mask and the standard deviation coefficients. epsilon = ( self.__forecast_state.noise_field_pool[ np.random.randint(self.__forecast_state.config.n_noise_fields) ] * self.__forecast_state.precip_mask[self.__ens_member][None, :, :] * self.__forecast_state.params.noise_std_coeffs[:, None, None] ) # Iterate through the AR(1) process for each cascade level. for i in range(self.__forecast_state.config.n_cascade_levels): self.__precip_cascades[i] = autoregression.iterate_ar_model( self.__precip_cascades[i], self.__forecast_state.params.PHI[i], epsilon[i], ) # Update the precipitation mask for the forecast step by incorporating areas # where the NWP model forecast precipitation. def __update_precip_mask(self, nwp): # Get the area where the NWP ensemble member forecast precipitation above # precipitation threshold and dilate it by a configurable range. precip_mask = ( binary_dilation( nwp > self.__forecast_state.config.precip_threshold, structure=np.ones( ( self.__forecast_state.config.precip_mask_dilation, self.__forecast_state.config.precip_mask_dilation, ), dtype=int, ), ) * 1.0 ) # Get the area where the combined member forecast precipitation above the # precipitation threshold and dilate it by a configurable range. precip_mask += ( binary_dilation( self.__forecast_state.nwc_prediction[self.__ens_member] > self.__forecast_state.config.precip_threshold, structure=np.ones( ( self.__forecast_state.config.precip_mask_dilation, self.__forecast_state.config.precip_mask_dilation, ), dtype=int, ), ) * 1.0 ) # Set values above 1 to 1 for conversion into bool. precip_mask[precip_mask >= 1.0] = 1.0 # Some additional dilation of the precipitation mask. precip_mask = gaussian_filter(precip_mask, (1, 1)) # Set the mask outside the radar domain to 0. precip_mask[self.__forecast_state.params.domain_mask] = 0.0 # Convert mask into bool. self.__forecast_state.precip_mask[self.__ens_member] = np.array( precip_mask, dtype=bool ) # Apply probability matching def __probability_matching(self): # Apply probability matching self.__forecast_state.nwc_prediction[self.__ens_member] = ( probmatching.nonparam_match_empirical_cdf( self.__forecast_state.nwc_prediction[self.__ens_member], self.__forecast_state.fc_resampled[self.__ens_member], ) ) # Set no data area in the resulting precipitation field. def __set_no_data(self): self.__forecast_state.nwc_prediction_btf[self.__ens_member][ self.__forecast_state.params.domain_mask ] = np.nan # Fill edge zones of the domain with NWP data if smooth_radar_mask_range is > 0 def fill_backtransform(self, nwp): # For a smoother transition at the edge, we can slowly dilute the nowcast # component into NWP at the edges # Compute the smooth dilated mask new_mask = blending.utils.compute_smooth_dilated_mask( self.__forecast_state.params.domain_mask, max_padding_size_in_px=self.__forecast_state.config.smooth_radar_mask_range, ) new_mask = np.nan_to_num(new_mask, nan=0) # Ensure mask values are between 0 and 1 mask_model = np.clip(new_mask, 0, 1) mask_radar = np.clip(1 - new_mask, 0, 1) # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step nwp_temp = np.nan_to_num(nwp, nan=0) nwc_temp = np.nan_to_num( self.__forecast_state.nwc_prediction[self.__ens_member], nan=0 ) # Perform the blending of radar and model inside the radar domain using a weighted combination self.__forecast_state.nwc_prediction_btf[self.__ens_member] = np.nansum( [ mask_model * nwp_temp, mask_radar * nwc_temp, ], axis=0, ) class EnKFCombinationNowcaster: def __init__( self, obs_precip: np.ndarray, obs_timestamps: np.ndarray, nwp_precip: np.ndarray, nwp_timestamps: np.ndarray, obs_velocity: np.ndarray, fc_period: int, fc_init: datetime.datetime, enkf_combination_config: EnKFCombinationConfig, ): """ Initialize EnKFCombinationNowcaster with inputs and configurations. """ # Store inputs self.__obs_precip = obs_precip self.__nwp_precip = nwp_precip self.__obs_velocity = obs_velocity self.__fc_period = fc_period self.__fc_init = fc_init # Store config self.__config = enkf_combination_config # Initialize Params self.__params = EnKFCombinationParams() # Store input timestamps self.__obs_timestamps = obs_timestamps self.__nwp_timestamps = nwp_timestamps def compute_forecast(self): """ Generate a combined nowcast ensemble by using the reduced-space ensemble Kalman filter method. Parameters ---------- obs_precip: np.ndarray Array of shape (ar_order+1,m,n) containing the observed input precipitation fields ordered by timestamp from oldest to newst. The time steps between the inputs are assumed to be regular. obs_timestamps: np.ndarray Array of shape (ar_order+1) containing the corresponding time stamps of observed input precipitation fields as datetime objects. nwp_precip: np.ndarray Array of shape (n_ens,n_times,m,n) containing the (NWP) ensemble model forecast. nwp_timestamps: np.ndarray Array of shape (n_times) containing the corresponding time stamps of the (NWP) ensemble model forecast as datetime objects. obs_velocity: np.ndarray Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are based on the observed input precipitation fields and are assumed to represent one time step between the inputs. All values are required to be finite. fc_period: int Forecast range in minutes. fc_init: datetime object Issuetime of the combined forecast to compute. enkf_combination_config: EnKFCombinationConfig Provides a set of configuration parameters for the nowcast ensemble generation. Returns ------- out: np.ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0. The timestep is taken from the input precipitation fields precip. See also -------- :py:mod:`pysteps.extrapolation.interface`, :py:mod:`pysteps.cascade.interface`, :py:mod:`pysteps.noise.interface`, :py:func:`pysteps.noise.utils. compute_noise_stddev_adjs` References ---------- :cite:`Nerini2019MWR` Notes ----- 1. The combination method currently supports only an AR(1) process for the forecast step. """ # Check for the inputs. self.__check_inputs() # Check timestamps of radar and nwp input and determine forecast and correction # timesteps as well as the temporal resolution self.__check_input_timestamps() # Check wehther there is no precipitation in observation, but in NWP or the other way around self.__check_no_rain_case() # Print forecast information. self.__print_forecast_info() # Initialize and compute the forecast initialization. self.FI = ForecastInitialization( self.__config, self.__params, self.__obs_precip, self.__obs_velocity ) # NWP: Set values below precip thr and nonfinite values to norain thr. nwp_mask = np.logical_or( ~np.isfinite(self.__nwp_precip), self.__nwp_precip < self.__config.precip_threshold, ) self.__nwp_precip[nwp_mask] = self.__config.precip_threshold - 2.0 # Set an initial precipitation mask for the NWC models. precip_mask = binary_dilation( self.__obs_precip[-1] > self.__config.precip_threshold, structure=np.ones( (self.__config.precip_mask_dilation, self.__config.precip_mask_dilation) ), ) # Initialize an instance of NWC forecast model class for each ensemble member. self.FS = ForecastState( enkf_combination_config=self.__config, enkf_combination_params=self.__params, noise_field_pool=self.FI.noise_field_pool, latest_obs=self.__obs_precip[-1, :, :], precip_mask=precip_mask.copy(), ) self.FC_Models = {} for j in range(self.__config.n_ens_members): FC = ForecastModel( forecast_state=self.FS, precip_cascades=deepcopy(self.FI.precip_cascades), velocity=self.__obs_velocity, mu=deepcopy(self.FI.mean_extrapolation), sigma=deepcopy(self.FI.std_extrapolation), ens_member=j, ) self.FC_Models[j] = FC # Initialize the combination model. if self.__config.enkf_method == "masked_enkf": kalman_filter_model = MaskedEnKF else: raise ValueError( "Currently, only 'masked_enkf' is implemented as ensemble" "Kalman filter method!" ) self.KalmanFilterModel = kalman_filter_model(self.__config, self.__params) # Start the main forecast loop. self.__integrated_nowcast_main_loop() # Stack and return the forecast output. if self.__config.return_output: self.FS.final_combined_forecast = np.array( self.FS.final_combined_forecast ).swapaxes(0, 1) if self.__config.measure_time: return ( self.FS.final_combined_forecast, self.__fc_init, self.__mainloop_time, ) if self.__config.verbose_output: return self.FS.final_combined_forecast, self.FS.background_ensemble return self.FS.final_combined_forecast # Else, return None return None def __check_inputs(self): """ Validates user's input. """ # Check dimensions of obs precip if self.__obs_precip.ndim != 3: raise ValueError( "Precipitation observation must be a three-dimensional " "array of shape (ar_order + 1, m, n)" ) if self.__obs_precip.shape[0] < self.__config.ar_order + 1: raise ValueError( f"Precipitation observation must have at least " f"{self.__config.ar_order + 1} time steps in the first" f"dimension to match the autoregressive order " f"(ar_order={self.__config.ar_order})" ) # If it is necessary, slice the precipitation field to only use the last # ar_order +1 time steps. if self.__obs_precip.shape[0] > self.__config.ar_order + 1: self.__obs_precip = np.delete( self.__obs_precip, np.arange( 0, self.__obs_precip.shape[0] - (self.__config.ar_order + 1), 1 ), axis=0, ) # Check NWP data dimensions NWP_shape = self.__nwp_precip.shape NWP_timestamps_len = len(self.__nwp_timestamps) if not NWP_timestamps_len in NWP_shape: raise ValueError( f"nwp_timestamps has not the same length as NWP data!" f"nwp_timestamps length: {NWP_timestamps_len}" f"nwp_precip shape: {NWP_shape}" ) # Ensure that model has shape: [n_ens_members, t, y, x] # n_ens_members and t can sometimes be swapped when using grib datasets. # Check for temporal resolution of NWP data if NWP_shape[0] == NWP_timestamps_len: self.__nwp_precip = self.__nwp_precip.swapaxes(0, 1) # Check dimensions of obs velocity if self.__obs_velocity.ndim != 3: raise ValueError( "The velocity field must be a three-dimensional array of shape (2, m, n)" ) # Check whether the spatial dimensions match between obs precip and # obs velocity if self.__obs_precip.shape[1:3] != self.__obs_velocity.shape[1:3]: raise ValueError( f"Spatial dimension of Precipitation observation and the" "velocity field do not match: " f"{self.__obs_precip.shape[1:3]} vs. {self.__obs_velocity.shape[1:3]}" ) # Check velocity field for non-finite values if np.any(~np.isfinite(self.__obs_velocity)): raise ValueError("Velocity contains non-finite values") # Check whether there are extrapolation kwargs if self.__config.extrapolation_kwargs is None: self.__params.extrapolation_kwargs = dict() else: self.__params.extrapolation_kwargs = deepcopy( self.__config.extrapolation_kwargs ) # Check whether there are filter kwargs if self.__config.filter_kwargs is None: self.__params.filter_kwargs = dict() else: self.__params.filter_kwargs = deepcopy(self.__config.filter_kwargs) # Check for noise kwargs if self.__config.noise_kwargs is None: self.__params.noise_kwargs = {"win_fun": "tukey"} else: self.__params.noise_kwargs = deepcopy(self.__config.noise_kwargs) # Check for combination kwargs if self.__config.combination_kwargs is None: self.__params.combination_kwargs = dict() else: self.__params.combination_kwargs = deepcopy( self.__config.combination_kwargs ) # Set the precipitation threshold also in params self.__params.precip_threshold = self.__config.precip_threshold # Check for the standard deviation adjustment of the noise fields if self.__config.noise_stddev_adj not in ["auto", "fixed", None]: raise ValueError( f"Unknown noise_std_dev_adj method {self.__config.noise_stddev_adj}. " "Must be 'auto', 'fixed', or None" ) def __check_input_timestamps(self): """ Check for timestamps of radar data and NWP data, determine forecasts and correction timesteps as well as the temporal resolution of the combined forecast """ # Check for temporal resolution of radar data obs_time_diff = np.unique(np.diff(self.__obs_timestamps)) if obs_time_diff.size > 1: raise ValueError( "Observation data has a different temporal resolution or " "observations are missing!" ) self.__temporal_res = int(obs_time_diff[0].total_seconds() / 60) # Check for temporal resolution of NWP data nwp_time_diff = np.unique(np.diff(self.__nwp_timestamps)) if nwp_time_diff.size > 1: raise ValueError( "NWP data has a different temporal resolution or some time steps are missing!" ) nwp_temporal_res = int(nwp_time_diff[0].total_seconds() / 60) # Check whether all necessary timesteps are included in the observation if self.__obs_timestamps[-1] != self.__fc_init: raise ValueError( "The last observation timestamp differs from forecast issue time!" ) if self.__obs_timestamps.size < self.__config.ar_order + 1: raise ValueError( f"Precipitation observation must have at least " f"{self.__config.ar_order + 1} time steps in the first" f"dimension to match the autoregressive order " f"(ar_order={self.__config.ar_order})" ) # Check whether the NWP forecasts includes the combined forecast range if np.logical_or( self.__fc_init < self.__nwp_timestamps[0], self.__fc_init > self.__nwp_timestamps[-1], ): raise ValueError("Forecast issue time is not included in the NWP forecast!") max_nwp_fc_period = ( self.__nwp_timestamps.size - np.where(self.__nwp_timestamps == self.__fc_init)[0][0] - 1 ) * nwp_temporal_res if max_nwp_fc_period < self.__fc_period - nwp_temporal_res: raise ValueError( "The remaining NWP forecast is not sufficient for the combined forecast period" ) # Truncate the NWP dataset if there sufficient remaining timesteps are available self.__nwp_precip = np.delete( self.__nwp_precip, np.logical_or( self.__nwp_timestamps < self.__fc_init, self.__nwp_timestamps > self.__fc_init + datetime.timedelta(minutes=self.__fc_period), ), axis=1, ) # Define forecast and correction timesteps assuming that temporal resolution of # the combined forecast is equal to that of the radar data self.__forecast_leadtimes = np.arange( 0, self.__fc_period + 1, self.__temporal_res ) trunc_nwp_timestamps = ( self.__nwp_timestamps[ np.logical_and( self.__nwp_timestamps >= self.__fc_init, self.__nwp_timestamps <= self.__fc_init + datetime.timedelta(minutes=self.__fc_period), ) ] - self.__fc_init ) self.__correction_leadtimes = np.array( [int(timestamp.total_seconds() / 60) for timestamp in trunc_nwp_timestamps] ) def __check_no_rain_case(self): print("Test for no rain cases") print("======================") print("") # Check for zero input fields in the radar and NWP data. zero_precip_radar = check_norain( self.__obs_precip, self.__config.precip_threshold, self.__config.norain_threshold, self.__params.noise_kwargs["win_fun"], ) # The norain fraction threshold used for nwp is the default value of 0.0, # since nwp does not suffer from clutter. zero_precip_nwp_forecast = check_norain( self.__nwp_precip, self.__config.precip_threshold, self.__config.norain_threshold, self.__params.noise_kwargs["win_fun"], ) # If there is no precipitation in the observation, set no_rain_case to "obs" # and use only the NWP ensemble forecast if zero_precip_radar: self.__params.no_rain_case = "obs" # If there is no precipitation at the first usable NWP forecast timestep, but # in the observation, compute an extrapolation forecast elif zero_precip_nwp_forecast: self.__params.no_rain_case = "nwp" # Otherwise, set no_rain_case to 'none' and compute combined forecast as usual else: self.__params.no_rain_case = "none" return def __print_forecast_info(self): """ Print information about the forecast configuration, including inputs, methods, and parameters. """ print("Reduced-space ensemble Kalman filter") print("====================================") print("") print("Inputs") print("------") print(f"Forecast issue time: {self.__fc_init.isoformat()}") print( f"Input dimensions: {self.__obs_precip.shape[1]}x{self.__obs_precip.shape[2]}" ) print(f"Temporal resolution: {self.__temporal_res} minutes") print("") print("NWP and blending inputs") print("-----------------------") print(f"Number of (NWP) models: {self.__nwp_precip.shape[0]}") print("") print("Methods") print("-------") print( f"Extrapolation: {self.__config.extrapolation_method}" ) print( f"Bandpass filter: {self.__config.bandpass_filter_method}" ) print( f"Decomposition: {self.__config.decomposition_method}" ) print(f"Noise generator: {self.__config.noise_method}") print( f"Noise adjustment: {'yes' if self.__config.noise_stddev_adj else 'no'}" ) print(f"EnKF implementation: {self.__config.enkf_method}") print(f"FFT method: {self.__config.fft_method}") print(f"Domain: {self.__config.domain}") print("") print("Parameters") print("----------") print(f"Forecast length in min: {self.__fc_period}") print(f"Ensemble size: {self.__config.n_ens_members}") print(f"Parallel threads: {self.__config.num_workers}") print(f"Number of cascade levels: {self.__config.n_cascade_levels}") print(f"Order of the AR(p) model: {self.__config.ar_order}") print("") print(f"No rain forecast: {self.__params.no_rain_case}") def __integrated_nowcast_main_loop(self): if self.__config.measure_time: starttime_mainloop = time.time() self.__params.extrapolation_kwargs["return_displacement"] = True is_correction_timestep = False for t, fc_leadtime in enumerate(self.__forecast_leadtimes): if self.__config.measure_time: starttime = time.time() # Check whether forecast time step is also a correction time step. is_correction_timestep = ( self.__forecast_leadtimes[t - 1] in self.__correction_leadtimes and t > 1 and np.logical_and( self.__config.enable_combination, self.__params.no_rain_case != "nwp", ) ) # Check whether forecast time step is a nowcasting time step. is_nowcasting_timestep = t > 0 # Check whether full NWP weight is reached. is_full_nwp_weight = ( self.KalmanFilterModel.get_inflation_factor_obs() <= 0.02 or self.__params.no_rain_case == "obs" ) # If full NWP weight is reached, set pure NWP ensemble forecast in combined # forecast output if is_full_nwp_weight: # Set t_corr to the first available NWP data timestep and that is 0 try: t_corr except NameError: t_corr = 0 print(f"Full NWP weight is reached for lead time + {fc_leadtime} min") if is_correction_timestep: t_corr = np.where( self.__correction_leadtimes == self.__forecast_leadtimes[t] )[0][0] self.FS.nwc_prediction = self.__nwp_precip[:, t_corr] # Otherwise compute the combined forecast. else: print(f"Computing combination for lead time + {fc_leadtime} min") self.__forecast_loop(t, is_correction_timestep, is_nowcasting_timestep) # Apply back transformation if self.__config.smooth_radar_mask_range == 0: for j, FC_Model in enumerate(self.FC_Models.values()): FC_Model.backtransform() else: try: t_fill_nwp except NameError: t_fill_nwp = 0 if self.__forecast_leadtimes[t] in self.__correction_leadtimes: t_fill_nwp = np.where( self.__correction_leadtimes == self.__forecast_leadtimes[t] )[0][0] def worker(j): self.FC_Models[j].fill_backtransform( self.__nwp_precip[j, t_fill_nwp] ) dask_worker_collection = [] if DASK_IMPORTED and self.__config.n_ens_members > 1: for j in range(self.__config.n_ens_members): dask_worker_collection.append(dask.delayed(worker)(j)) dask.compute( *dask_worker_collection, num_workers=self.__params.num_ensemble_workers, ) else: for j in range(self.__config.n_ens_members): worker(j) dask_worker_collection = None self.__write_output() if self.__config.measure_time: _ = self.__measure_time("timestep", starttime) else: print("...done.") if self.__config.measure_time: self.__mainloop_time = time.time() - starttime_mainloop print( f"Elapsed time for computing forecast: {(self.__mainloop_time / 60.0):.4} min" ) return def __forecast_loop(self, t, is_correction_timestep, is_nowcasting_timestep): # If the temporal resolution of the NWP data is equal to those of the # observation, the correction step can be applied after the forecast # step for the current forecast leadtime. # However, if the temporal resolution is different, the correction step # has to be applied before the forecast step to avoid smoothing effects # in the resulting precipitation fields. if is_correction_timestep: t_corr = np.where( self.__correction_leadtimes == self.__forecast_leadtimes[t - 1] )[0][0] if self.__config.verbose_output: self.FS.background_ensemble[self.__correction_leadtimes[t_corr]] = ( self.FS.nwc_prediction.copy() ) self.FS.nwc_prediction, self.FS.fc_resampled = ( self.KalmanFilterModel.correct_step( self.FS.nwc_prediction, self.__nwp_precip[:, t_corr], self.FS.fc_resampled, ) ) # Run nowcasting time step if is_nowcasting_timestep: # Set t_corr to the first available NWP data timestep and that is 0 try: t_corr except NameError: t_corr = 0 def worker(j): self.FC_Models[j].run_forecast_step( nwp=self.__nwp_precip[j, t_corr], is_correction_timestep=is_correction_timestep, ) dask_worker_collection = [] if DASK_IMPORTED and self.__config.n_ens_members > 1: for j in range(self.__config.n_ens_members): dask_worker_collection.append(dask.delayed(worker)(j)) dask.compute( *dask_worker_collection, num_workers=self.__params.num_ensemble_workers, ) else: for j in range(self.__config.n_ens_members): worker(j) dask_worker_collection = None def __write_output(self): if ( self.__config.callback is not None and self.FS.nwc_prediction_btf.shape[1] > 0 ): self.__config.callback(self.FS.nwc_prediction_btf) if self.__config.return_output: self.FS.final_combined_forecast.append(self.FS.nwc_prediction_btf.copy()) def __measure_time(self, label, start_time): """ Measure and print the time taken for a specific part of the process. Parameters: - label: A description of the part of the process being measured. - start_time: The timestamp when the process started (from time.time()). """ if self.__config.measure_time: elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") return elapsed_time return None def forecast( obs_precip, obs_timestamps, nwp_precip, nwp_timestamps, velocity, forecast_horizon, issuetime, n_ens_members, precip_mask_dilation=1, smooth_radar_mask_range=0, n_cascade_levels=6, precip_thr=-10.0, norain_thr=0.01, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", noise_method="nonparametric", enkf_method="masked_enkf", enable_combination=True, noise_stddev_adj=None, ar_order=1, callback=None, return_output=True, seed=None, num_workers=1, fft_method="numpy", domain="spatial", extrap_kwargs=None, filter_kwargs=None, noise_kwargs=None, combination_kwargs=None, measure_time=False, verbose_output=False, ): """ Generate a combined nowcast ensemble by using the reduced-space ensemble Kalman filter method described in Nerini et al. 2019. Parameters ---------- obs_precip: np.ndarray Array of shape (ar_order+1,m,n) containing the observed input precipitation fields ordered by timestamp from oldest to newst. The time steps between the inputs are assumed to be regular. obs_timestamps: np.ndarray Array of shape (ar_order+1) containing the corresponding time stamps of observed input precipitation fields as datetime objects. nwp_precip: np.ndarray Array of shape (n_ens,n_times,m,n) containing the (NWP) ensemble model forecast. nwp_timestamps: np.ndarray Array of shape (n_times) containing the corresponding time stamps of the (NWP) ensemble model forecast as datetime objects. velocity: np.ndarray Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are based on the observed input precipitation fields and are assumed to represent one time step between the inputs. All values are required to be finite. forecast_horizon: int The length of the forecast horizon (the length of the forecast) in minutes. issuetime: datetime object Issuetime of the combined forecast to compute. n_ens_members: int The number of ensemble members to generate. This number should always be equal to or larger than the number of NWP ensemble members / number of NWP models. precip_mask_dilation: int Range by which the precipitation mask within the forecast step should be extended per time step. Defaults to 1. smooth_radar_mask_range: int, Default is 0. Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise blend near the edge of the radar domain (radar mask), where the radar data is either not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range should be a positive value, representing a buffer band of a number of pixels by which the mask is cropped and smoothed. The smooth radar mask removes the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. precip_thr: float, optional pecifies the threshold value for minimum observable precipitation intensity. Required if mask_method is not None or conditional is True. Defaults to -10.0. norain_thr: float Specifies the threshold value for the fraction of rainy (see above) pixels in the radar rainfall field below which we consider there to be no rain. Depends on the amount of clutter typically present. Defaults to -15.0. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of :py:mod:`pysteps.extrapolation.interface`. Defaults to 'semilagrangian'. decomp_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of :py:mod:`pysteps.cascade.interface`. Defaults to 'fft'. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of :py:mod:`pysteps.cascade.interface`. Defaults to 'guassian'. noise_method: {'parametric','nonparametric','ssft','nested',None}, optional Name of the noise generator to use for perturbating the precipitation field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None, no noise is generated. Defaults to 'nonparametric'. enkf_method: {'masked_enkf}, optional Name of the ensemble Kalman filter method to use for the correction step. Currently, only 'masked_enkf' method is implemented that corresponds to the reduced-space ensemble Kalman filter technique described in Nerini et al. 2019. Defaults to 'masked_enkf'. enable_combination: bool, optional Flag to specify whether the correction step should be applied or a pure nowcasting ensemble should be computed. Defaults to True. noise_stddev_adj: {'auto','fixed',None}, optional Optional adjustment for the standard deviations of the noise fields added to each cascade level. This is done to compensate incorrect std. dev. estimates of casace levels due to presence of no-rain areas. 'auto'=use the method implemented in :py:func:`pysteps.noise.utils. compute_noise_stddev_adjs`. 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable noise std. dev adjustment. ar_order: int, optional The order of the autoregressive model to use. Must be 1, since only this order is currently implemented. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input field precip, respectively. This can be used, for instance, writing the outputs into files. return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see FFT methods in :py:func:`pysteps.utils.interface.get_method`). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical STEPS model). If "spectral", the AR(2) models and stochastic perturbations are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019b`. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of :py:func:`pysteps.extrapolation.interface`. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of :py:mod:`pysteps.cascade.bandpass_filters`. noise_kwargs: dict, optional Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of :py:mod:`pysteps.noise. fftgenerators`. combination_kwargs: dict, optional Optional dictionary containing keyword arguments for the initializer of the ensemble Kalman filter method. See the documentation of :py:mod:`pysteps.blending.ens_kalman_filter_methods`. measure_time: bool If set to True, measure, print and return the computation time. verbose_output: bool If set to True, return additionally the background ensemble of the EnKF for further statistics. Returns ------- out: np.ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0. The timestep is taken from the input precipitation fields precip. See also -------- :py:mod:`pysteps.extrapolation.interface`, :py:mod:`pysteps.cascade.interface`, :py:mod:`pysteps.noise.interface`, :py:func:`pysteps.noise.utils. compute_noise_stddev_adjs` References ---------- :cite:`Nerini2019MWR` Notes ----- 1. The combination method currently supports only an AR(1) process for the forecast step. """ combination_config = EnKFCombinationConfig( n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, precip_threshold=precip_thr, norain_threshold=norain_thr, precip_mask_dilation=precip_mask_dilation, smooth_radar_mask_range=smooth_radar_mask_range, extrapolation_method=extrap_method, decomposition_method=decomp_method, bandpass_filter_method=bandpass_filter_method, noise_method=noise_method, enkf_method=enkf_method, enable_combination=enable_combination, noise_stddev_adj=noise_stddev_adj, ar_order=ar_order, seed=seed, num_workers=num_workers, fft_method=fft_method, domain=domain, extrapolation_kwargs=extrap_kwargs, filter_kwargs=filter_kwargs, noise_kwargs=noise_kwargs, combination_kwargs=combination_kwargs, measure_time=measure_time, verbose_output=verbose_output, callback=callback, return_output=return_output, n_noise_fields=30, ) combination_nowcaster = EnKFCombinationNowcaster( obs_precip=obs_precip, obs_timestamps=obs_timestamps, nwp_precip=nwp_precip, nwp_timestamps=nwp_timestamps, obs_velocity=velocity, fc_period=forecast_horizon, fc_init=issuetime, enkf_combination_config=combination_config, ) forecast_enkf_combination = combination_nowcaster.compute_forecast() return forecast_enkf_combination ================================================ FILE: pysteps/blending/skill_scores.py ================================================ # -*- coding: utf-8 -*- """ pysteps.blending.skill_scores ============================== Methods for computing skill scores, needed for the blending weights, of two- dimensional model fields with the latest observation field. .. autosummary:: :toctree: ../generated/ spatial_correlation lt_dependent_cor_nwp lt_dependent_cor_extrapolation clim_regr_values """ import numpy as np from pysteps.blending import clim def spatial_correlation(obs, mod, domain_mask): """Determine the spatial correlation between the cascade of the latest available observed (radar) rainfall field and a time-synchronous cascade derived from a model (generally NWP) field. Both fields are assumed to use the same grid. Parameters ---------- obs : array-like Array of shape [cascade_level, y, x] with per cascade_level the normalized cascade of the observed (radar) rainfall field. mod : array-like Array of shape [cascade_level, y, x] with per cascade_level the normalized cascade of the model field. domain_mask : array-like Boolean array of shape [y, x] indicating which cells fall outside the radar domain. Returns ------- rho : array-like Array of shape [n_cascade_levels] containing per cascade_level the correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. References ---------- :cite:`BPS2006` :cite:`SPN2013` """ rho = [] # Fill rho per cascade level, so loop through the cascade levels for cascade_level in range(0, obs.shape[0]): # Only calculate the skill for the pixels within the radar domain # (as that is where there are observations) obs_cascade_level = obs[cascade_level, :, :] mod_cascade_level = mod[cascade_level, :, :] obs_cascade_level[domain_mask] = np.nan mod_cascade_level[domain_mask] = np.nan # Flatten both arrays obs_1d = obs_cascade_level.flatten() mod_1d = mod_cascade_level.flatten() # Calculate the correlation between the two cov = np.nansum( (mod_1d - np.nanmean(mod_1d)) * (obs_1d - np.nanmean(obs_1d)) ) # Without 1/n, as this cancels out (same for stdx and -y) std_obs = np.sqrt(np.nansum((obs_1d - np.nanmean(obs_1d)) ** 2.0)) std_mod = np.sqrt(np.nansum((mod_1d - np.nanmean(mod_1d)) ** 2.0)) rho.append(cov / (std_mod * std_obs)) # Make sure rho is always a (finite) number rho = np.nan_to_num(rho, copy=True, nan=10e-5, posinf=10e-5, neginf=10e-5) return rho def lt_dependent_cor_nwp(lt, correlations, outdir_path, n_model=0, skill_kwargs=None): """Determine the correlation of a model field for lead time lt and cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. Parameters ---------- lt : int The lead time of the forecast in minutes. correlations : array-like Array of shape [n_cascade_levels] containing per cascade_level the correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. outdir_path: string Path to folder where the historical skill are stored. Defaults to path_workdir from rcparams. n_model: int, optional The index number of the (NWP) model when the climatological skill of multiple (NWP) models is stored. For calculations with one model, or when n_model is not provided, n_model = 0. skill_kwargs : dict, optional Dictionary containing e.g. the outdir_path, nmodels and window_length parameters. Returns ------- rho : array-like Array of shape [n_cascade_levels] containing, for lead time lt, per cascade_level the correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. References ---------- :cite:`BPS2004` :cite:`BPS2006` """ if skill_kwargs is None: skill_kwargs = dict() # Obtain the climatological values towards which the correlations will # regress clim_cor_values, regr_pars = clim_regr_values( n_cascade_levels=len(correlations), outdir_path=outdir_path, n_model=n_model, skill_kwargs=skill_kwargs, ) # Determine the speed of the regression (eq. 24 in BPS2004) qm = np.exp(-lt / regr_pars[0, :]) * (2 - np.exp(-lt / regr_pars[1, :])) # Determine the correlation for lead time lt rho = qm * correlations + (1 - qm) * clim_cor_values return rho def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=None): """Determine the correlation of the extrapolation (nowcast) component for lead time lt and cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. Parameters ---------- PHI : array-like Array of shape [n_cascade_levels, ar_order + 1] containing per cascade level the autoregression parameters. correlations : array-like, optional Array of shape [n_cascade_levels] containing per cascade_level the latest available correlation from the extrapolation component that can be found from the AR-2 model. correlations_prev : array-like, optional Similar to correlations, but from the timestep before that. Returns ------- rho : array-like Array of shape [n_cascade_levels] containing, for lead time lt, per cascade_level the correlation of the extrapolation component. References ---------- :cite:`BPS2004` :cite:`BPS2006` """ # Check if correlations_prev exists, if not, we set it to 1.0 if correlations_prev is None: correlations_prev = np.repeat(1.0, PHI.shape[0]) # Same for correlations at first time step, we set it to # phi1 / (1 - phi2), see BPS2004 if correlations is None: correlations = PHI[:, 0] / (1.0 - PHI[:, 1]) # Calculate the correlation for lead time lt rho = PHI[:, 0] * correlations + PHI[:, 1] * correlations_prev # Finally, set the current correlations array as the previous one for the # next time step rho_prev = correlations return rho, rho_prev def clim_regr_values(n_cascade_levels, outdir_path, n_model=0, skill_kwargs=None): """Obtains the climatological correlation values and regression parameters from a file called NWP_weights_window.bin in the outdir_path. If this file is not present yet, the values from :cite:`BPS2004` are used. Parameters ---------- n_cascade_levels : int The number of cascade levels to use. outdir_path: string Path to folder where the historical skill are stored. Defaults to path_workdir from rcparams. n_model: int, optional The index number of the (NWP) model when the climatological skill of multiple (NWP) models is stored. For calculations with one model, or when n_model is not provided, n_model = 0. skill_kwargs : dict, optional Dictionary containing e.g. the outdir_path, nmodels and window_length parameters. Returns ------- clim_cor_values : array-like Array of shape [n_cascade_levels] containing the climatological values of the lag 1 and lag 2 auto-correlation coefficients, obtained by calling a method implemented in pysteps.blending.skill_scores.get_clim_skill_scores. regr_pars : array-like Array of shape [2, n_cascade_levels] containing the regression parameters. These are fixed values that should be hard-coded in this function. Unless changed by the user, the standard values from `BPS2004` are used. Notes ----- The literature climatological values assume 8 cascade levels. In case less or more cascade levels are used, the script will handle this by taking the first n values or extending the array with a small value. This is not ideal, but will be fixed once the clim_regr_file is made. Hence, this requires a warm-up period of the model. In addition, the regression parameter values (eq. 24 in BPS2004) are hard- coded and can only be optimized by the user after (re)fitting of the equation. """ if skill_kwargs is None: skill_kwargs = {"n_models": 1} # First, obtain climatological skill values try: clim_cor_values = clim.calc_clim_skill( outdir_path=outdir_path, n_cascade_levels=n_cascade_levels, **skill_kwargs ) except FileNotFoundError: # The climatological skill values file does not exist yet, so we'll # use the default values from BPS2004. clim_cor_values = clim.get_default_skill( n_cascade_levels=n_cascade_levels, n_models=skill_kwargs["n_models"] ) clim_cor_values = clim_cor_values[n_model, :] # Check if clim_cor_values has only one model, otherwise it has # returned the skill values for multiple models if clim_cor_values.ndim != 1: raise IndexError( "The climatological cor. values of multiple models were returned, but only one model should be specified. Make sure to pass the argument nmodels in the function" ) # Also check whether the number of cascade_levels is correct if clim_cor_values.shape[0] > n_cascade_levels: clim_cor_values = clim_cor_values[0:n_cascade_levels] elif clim_cor_values.shape[0] < n_cascade_levels: # Get the number of cascade levels that is missing n_extra_lev = n_cascade_levels - clim_cor_values.shape[0] # Append the array with correlation values of 10e-4 clim_cor_values = np.append(clim_cor_values, np.repeat(1e-4, n_extra_lev)) # Get the regression parameters (L in eq. 24 in BPS2004) - Hard coded, # change to own values when present. regr_pars = np.array( [ [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4], ] ) # Check whether the number of cascade_levels is correct if regr_pars.shape[1] > n_cascade_levels: regr_pars = regr_pars[:, 0:n_cascade_levels] elif regr_pars.shape[1] < n_cascade_levels: # Get the number of cascade levels that is missing n_extra_lev = n_cascade_levels - regr_pars.shape[1] # Append the array with correlation values of 10e-4 regr_pars = np.append( regr_pars, [np.repeat(10.0, n_extra_lev), np.repeat(10e4, n_extra_lev)], axis=1, ) return clim_cor_values, regr_pars ================================================ FILE: pysteps/blending/steps.py ================================================ # -*- coding: utf-8 -*- """ pysteps.blending.steps ====================== Implementation of the STEPS stochastic blending method as described in :cite:`BPS2004`, :cite:`BPS2006`, :cite:`SPN2013` and :cite:`Imhoff2023`. The STEPS blending method consists of the following main steps: #. Set the radar rainfall fields in a Lagrangian space. #. Perform the cascade decomposition for the input radar rainfall fields. The method assumes that the cascade decomposition of the NWP model fields is already done prior to calling the function, as the NWP model fields are generally not updated with the same frequency (which is more efficient). A method to decompose and store the NWP model fields whenever a new NWP model field is present, is present in pysteps.blending.utils.decompose_NWP. #. Initialize the noise method (this will be bypassed if a deterministic nowcast is provided and n_ens_members is 1). #. Estimate AR parameters for the extrapolation nowcast and noise cascade. #. Initialize all the random generators. #. Calculate the initial skill of the NWP model forecasts at t=0. #. Start the forecasting loop: #. Determine which NWP models will be combined with which nowcast ensemble member. The number of output ensemble members equals the maximum number of (ensemble) members in the input, which can be either the defined number of (nowcast) ensemble members or the number of NWP models/members. #. Determine the skill and weights of the forecasting components (extrapolation, NWP and noise) for that lead time. #. Regress the extrapolation and noise cascades separately to the subsequent time step. #. Extrapolate the extrapolation and noise cascades to the current time step. #. Blend the cascades. #. Recompose the cascade to a rainfall field. #. Post-processing steps (masking and probability matching, which are different from the original blended STEPS implementation). .. autosummary:: :toctree: ../generated/ forecast calculate_ratios calculate_weights_bps calculate_weights_spn blend_means_sigmas """ import math import time from copy import copy, deepcopy from functools import partial from multiprocessing.pool import ThreadPool import numpy as np from scipy.linalg import inv from scipy.ndimage import binary_dilation, generate_binary_structure, iterate_structure from pysteps import blending, cascade, extrapolation, noise, utils from pysteps.nowcasts import utils as nowcast_utils from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation from pysteps.utils.check_norain import check_norain try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False from dataclasses import dataclass, field from typing import Any, Callable @dataclass(frozen=True) class StepsBlendingConfig: """ Parameters ---------- precip_threshold: float, optional Specifies the threshold value for minimum observable precipitation intensity. Required if mask_method is not None or conditional is True. norain_threshold: float Specifies the threshold value for the fraction of rainy (see above) pixels in the radar rainfall field below which we consider there to be no rain. Depends on the amount of clutter typically present. Standard set to 0.0 kmperpixel: float, optional Spatial resolution of the input data (kilometers/pixel). Required if vel_pert_method is not None or mask_method is 'incremental'. timestep: float Time step of the motion vectors (minutes). Required if vel_pert_method is not None or mask_method is 'incremental'. n_ens_members: int The number of ensemble members to generate. This number should always be equal to or larger than the number of NWP ensemble members / number of NWP models. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. blend_nwp_members: bool Check if NWP models/members should be used individually, or if all of them are blended together per nowcast ensemble member. Standard set to false. extrapolation_method: str, optional Name of the extrapolation method to use. See the documentation of :py:mod:`pysteps.extrapolation.interface`. decomposition_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of :py:mod:`pysteps.cascade.interface`. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of :py:mod:`pysteps.cascade.interface`. nowcasting_method: {'steps', 'external_nowcast'}, Name of the nowcasting method used to generate the nowcasts. If an external nowcast is provided, the script will use this as input and bypass the autoregression and advection of the extrapolation cascade. Defaults to 'steps', which follows the method described in :cite:`Imhoff2023`. Note, if nowcasting_method is 'external_nowcast', precip_nowcast cannot be None. noise_method: {'parametric','nonparametric','ssft','nested',None}, optional Name of the noise generator to use for perturbating the precipitation field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None, no noise is generated. noise_stddev_adj: {'auto','fixed',None}, optional Optional adjustment for the standard deviations of the noise fields added to each cascade level. This is done to compensate incorrect std. dev. estimates of casace levels due to presence of no-rain areas. 'auto'=use the method implemented in :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs`. 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable noise std. dev adjustment. ar_order: int, optional The order of the autoregressive model to use. Must be >= 1. velocity_perturbation_method: {'bps',None}, optional Name of the noise generator to use for perturbing the advection field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None, the advection field is not perturbed. weights_method: {'bps','spn'}, optional The calculation method of the blending weights. Options are the method by :cite:`BPS2006` and the covariance-based method by :cite:`SPN2013`. Defaults to bps. timestep_start_full_nwp_weight: int, optional. The timestep, which should be smaller than timesteps, at which a linear transition takes place from the calculated weights to full NWP weight (and zero extrapolation and noise weight) to ensure the blending procedure becomes equal to the NWP forecast(s) at the last timestep of the blending procedure. If not provided, the blending stick to the theoretical weights provided by the chosen weights_method for a given lead time and skill of each blending component. conditional: bool, optional If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold precip_thr. probmatching_method: {'cdf','mean',None}, optional Method for matching the statistics of the forecast field with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, 'mean'=adjust only the conditional mean value of the forecast field in precipitation areas, None=no matching applied. Using 'mean' requires that mask_method is not None. mask_method: {'obs','incremental',None}, optional The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'obs' = apply precip_thr to the most recently observed precipitation intensity field, 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. resample_distribution: bool, optional Method to resample the distribution from the extrapolation and NWP cascade as input for the probability matching. Not resampling these distributions may lead to losing some extremes when the weight of both the extrapolation and NWP cascade is similar. Defaults to True. smooth_radar_mask_range: int, Default is 0. Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise blend near the edge of the radar domain (radar mask), where the radar data is either not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range should be a positive value, representing a buffer band of a number of pixels by which the mask is cropped and smoothed. The smooth radar mask removes the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results. seed: int, optional Optional seed number for the random generators. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see FFT methods in :py:func:`pysteps.utils.interface.get_method`). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical STEPS model). If "spectral", the AR(2) models and stochastic perturbations are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019b`. outdir_path_skill: string, optional Path to folder where the historical skill are stored. Defaults to path_workdir from rcparams. If no path is given, './tmp' will be used. extrapolation_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of :py:func:`pysteps.extrapolation.interface`. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of :py:mod:`pysteps.cascade.bandpass_filters`. noise_kwargs: dict, optional Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of :py:mod:`pysteps.noise.fftgenerators`. velocity_perturbation_kwargs: dict, optional Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for the initializer of the velocity perturbator. The choice of the optimal parameters depends on the domain and the used optical flow method. Default parameters from :cite:`BPS2006`: p_par = [10.88, 0.23, -7.68] p_perp = [5.76, 0.31, -2.72] Parameters fitted to the data (optical flow/domain): darts/fmi: p_par = [13.71259667, 0.15658963, -16.24368207] p_perp = [8.26550355, 0.17820458, -9.54107834] darts/mch: p_par = [24.27562298, 0.11297186, -27.30087471] p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01] darts/fmi+mch: p_par = [16.55447057, 0.14160448, -19.24613059] p_perp = [14.75343395, 0.11785398, -16.26151612] lucaskanade/fmi: p_par = [2.20837526, 0.33887032, -2.48995355] p_perp = [2.21722634, 0.32359621, -2.57402761] lucaskanade/mch: p_par = [2.56338484, 0.3330941, -2.99714349] p_perp = [1.31204508, 0.3578426, -1.02499891] lucaskanade/fmi+mch: p_par = [2.31970635, 0.33734287, -2.64972861] p_perp = [1.90769947, 0.33446594, -2.06603662] vet/fmi: p_par = [0.25337388, 0.67542291, 11.04895538] p_perp = [0.02432118, 0.99613295, 7.40146505] vet/mch: p_par = [0.5075159, 0.53895212, 7.90331791] p_perp = [0.68025501, 0.41761289, 4.73793581] vet/fmi+mch: p_par = [0.29495222, 0.62429207, 8.6804131 ] p_perp = [0.23127377, 0.59010281, 5.98180004] fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set The above parameters have been fitted by using run_vel_pert_analysis.py and fit_vel_pert_params.py located in the scripts directory. See :py:mod:`pysteps.noise.motion` for additional documentation. climatology_kwargs: dict, optional Optional dictionary containing keyword arguments for the climatological skill file. Arguments can consist of: 'outdir_path', 'n_models' (the number of NWP models) and 'window_length' (the minimum number of days the clim file should have, otherwise the default is used). mask_kwargs: dict Optional dictionary containing mask keyword arguments 'mask_f', 'mask_rim' and 'max_mask_rim', the factor defining the the mask increment and the (maximum) rim size, respectively. The mask increment is defined as mask_f*timestep/kmperpixel. measure_time: bool If set to True, measure, print and return the computation time. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input field precip, respectively. This can be used, for instance, writing the outputs into files. return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. """ precip_threshold: float | None norain_threshold: float kmperpixel: float timestep: float n_ens_members: int n_cascade_levels: int blend_nwp_members: bool extrapolation_method: str decomposition_method: str bandpass_filter_method: str nowcasting_method: str noise_method: str | None noise_stddev_adj: str | None ar_order: int velocity_perturbation_method: str | None weights_method: str timestep_start_full_nwp_weight: int | None conditional: bool probmatching_method: str | None mask_method: str | None resample_distribution: bool smooth_radar_mask_range: int seed: int | None num_workers: int fft_method: str domain: str outdir_path_skill: str extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) filter_kwargs: dict[str, Any] = field(default_factory=dict) noise_kwargs: dict[str, Any] = field(default_factory=dict) velocity_perturbation_kwargs: dict[str, Any] = field(default_factory=dict) climatology_kwargs: dict[str, Any] = field(default_factory=dict) mask_kwargs: dict[str, Any] = field(default_factory=dict) measure_time: bool = False callback: Any | None = None return_output: bool = True @dataclass class StepsBlendingParams: noise_std_coeffs: np.ndarray | None = None bandpass_filter: Any | None = None fft: Any | None = None perturbation_generator: Callable[..., np.ndarray] | None = None noise_generator: Callable[..., np.ndarray] | None = None PHI: np.ndarray | None = None extrapolation_method: Callable[..., Any] | None = None decomposition_method: Callable[..., dict] | None = None recomposition_method: Callable[..., np.ndarray] | None = None velocity_perturbations: Any | None = None generate_velocity_noise: Callable[[Any, float], np.ndarray] | None = None velocity_perturbations_parallel: np.ndarray | None = None velocity_perturbations_perpendicular: np.ndarray | None = None fft_objs: list[Any] = field(default_factory=list) mask_rim: int | None = None struct: np.ndarray | None = None time_steps_is_list: bool = False precip_models_provided_is_cascade: bool = False xy_coordinates: np.ndarray | None = None precip_zerovalue: float | None = None precip_threshold: float | None = None mask_threshold: np.ndarray | None = None zero_precip_radar: bool = False zero_precip_model_fields: bool = False original_timesteps: list | np.ndarray | None = None num_ensemble_workers: int | None = None rho_nwp_models: np.ndarray | None = None domain_mask: np.ndarray | None = None filter_kwargs: dict | None = None noise_kwargs: dict | None = None velocity_perturbation_kwargs: dict | None = None climatology_kwargs: dict | None = None mask_kwargs: dict | None = None @dataclass class StepsBlendingState: # Radar and noise states precip_cascades: np.ndarray | None = None precip_nowcast_cascades: np.ndarray | None = None precip_noise_input: np.ndarray | None = None precip_noise_cascades: np.ndarray | None = None precip_mean_noise: np.ndarray | None = None precip_std_noise: np.ndarray | None = None # Extrapolation states mean_extrapolation: np.ndarray | None = None std_extrapolation: np.ndarray | None = None mean_nowcast: np.ndarray | None = None std_nowcast: np.ndarray | None = None mean_nowcast_timestep: np.ndarray | None = None std_nowcast_timestep: np.ndarray | None = None rho_extrap_cascade_prev: np.ndarray | None = None rho_extrap_cascade: np.ndarray | None = None precip_cascades_prev_subtimestep: np.ndarray | None = None cascade_noise_prev_subtimestep: np.ndarray | None = None precip_extrapolated_after_decomp: np.ndarray | None = None noise_extrapolated_after_decomp: np.ndarray | None = None precip_extrapolated_probability_matching: np.ndarray | None = None # NWP model states precip_models_cascades: np.ndarray | None = None precip_models_cascades_timestep: np.ndarray | None = None precip_models_timestep: np.ndarray | None = None mean_models_timestep: np.ndarray | None = None std_models_timestep: np.ndarray | None = None velocity_models_timestep: np.ndarray | None = None # Mapping from NWP members to ensemble members mapping_list_NWP_member_to_ensemble_member: np.ndarray | None = None # Random states for precipitation, motion and probmatching randgen_precip: list[np.random.RandomState] | None = None randgen_motion: list[np.random.RandomState] | None = None randgen_probmatching: list[np.random.RandomState] | None = None # Variables for final forecast computation previous_displacement: list[Any] | None = None previous_displacement_noise_cascade: list[Any] | None = None previous_displacement_prob_matching: list[Any] | None = None rho_final_blended_forecast: np.ndarray | None = None final_blended_forecast_means: np.ndarray | None = None final_blended_forecast_stds: np.ndarray | None = None final_blended_forecast_means_mod_only: np.ndarray | None = None final_blended_forecast_stds_mod_only: np.ndarray | None = None final_blended_forecast_cascades: np.ndarray | None = None final_blended_forecast_cascades_mod_only: np.ndarray | None = None final_blended_forecast_recomposed: np.ndarray | None = None final_blended_forecast_recomposed_mod_only: np.ndarray | None = None # Final outputs final_blended_forecast: np.ndarray | None = None final_blended_forecast_non_perturbed: np.ndarray | None = None weights: np.ndarray | None = None weights_model_only: np.ndarray | None = None # Timing and indexing time_prev_timestep: list[float] | None = None leadtime_since_start_forecast: list[float] | None = None subtimesteps: list[float] | None = None is_nowcast_time_step: bool | None = None subtimestep_index: int | None = None # Weights used for blending weights: np.ndarray | None = None weights_model_only: np.ndarray | None = None # This is stores here as well because this is changed during the forecast loop and thus no longer part of the config extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) class StepsBlendingNowcaster: def __init__( self, precip, precip_nowcast, precip_models, velocity, velocity_models, time_steps, issue_time, steps_blending_config: StepsBlendingConfig, ): """Initializes the StepsBlendingNowcaster with inputs and configurations.""" # Store inputs self.__precip = precip self.__precip_nowcast = precip_nowcast self.__precip_models = precip_models self.__velocity = velocity self.__velocity_models = velocity_models self.__timesteps = time_steps self.__issuetime = issue_time self.__config = steps_blending_config # Initialize Params and State self.__params = StepsBlendingParams() self.__state = StepsBlendingState() # Additional variables for time measurement self.__start_time_init = None self.__init_time = None self.__mainloop_time = None def compute_forecast(self): """ Generate a blended nowcast ensemble by using the Short-Term Ensemble Prediction System (STEPS) method. Parameters ---------- precip: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. precip_models: array-like Either raw (NWP) model forecast data or decomposed (NWP) model forecast data. If you supply decomposed data, it needs to be an array of shape (n_models,timesteps+1) containing, per timestep (t=0 to lead time here) and per (NWP) model or model ensemble member, a dictionary with a list of cascades obtained by calling a method implemented in :py:mod:`pysteps.cascade.decomposition`. If you supply the original (NWP) model forecast data, it needs to be an array of shape (n_models,timestep+1,m,n) containing precipitation (or other) fields, which will then be decomposed in this function. Depending on your use case it can be advantageous to decompose the model forecasts outside beforehand, as this slightly reduces calculation times. This is possible with :py:func:`pysteps.blending.utils.decompose_NWP`, :py:func:`pysteps.blending.utils.compute_store_nwp_motion`, and :py:func:`pysteps.blending.utils.load_NWP`. However, if you have a lot of (NWP) model members (e.g. 1 model member per nowcast member), this can lead to excessive memory usage. To further reduce memory usage, both this array and the ``velocity_models`` array can be given as float32. They will then be converted to float64 before computations to minimize loss in precision. In case of one (deterministic) model as input, add an extra dimension to make sure precip_models is four dimensional prior to calling this function. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. velocity_models: array-like Array of shape (n_models,timestep,2,m,n) containing the x- and y-components of the advection field for the (NWP) model field per forecast lead time. All values are required to be finite. To reduce memory usage, this array can be given as float32. They will then be converted to float64 before computations to minimize loss in precision. time_steps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. issue_time: datetime is issued. config: StepsBlendingConfig Provides a set of configuration parameters for the nowcast ensemble generation. Returns ------- out: ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields precip. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). See also -------- :py:mod:`pysteps.extrapolation.interface`, :py:mod:`pysteps.cascade.interface`, :py:mod:`pysteps.noise.interface`, :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs` References ---------- :cite:`Seed2003`, :cite:`BPS2004`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` Notes ----- 1. The blending currently does not blend the beta-parameters in the parametric noise method. It is recommended to use the non-parameteric noise method. 2. If blend_nwp_members is True, the BPS2006 method for the weights is suboptimal. It is recommended to use the SPN2013 method instead. 3. Not yet implemented (and neither in the steps nowcasting module): The regression of the lag-1 and lag-2 parameters to their climatological values. See also eq. 12 - 19 in :cite: `BPS2004`. By doing so, the Phi parameters change over time, which enhances the AR process. This can become a future development if this turns out to be a warranted functionality. """ self.__check_inputs() self.__print_forecast_info() # Measure time for initialization if self.__config.measure_time: self.__start_time_init = time.time() # Slice the precipitation field to only use the last ar_order + 1 fields self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() self.__initialize_nowcast_components() self.__prepare_radar_and_NWP_fields() # Determine if rain is present in both radar and NWP fields if self.__params.zero_precip_radar and self.__params.zero_precip_model_fields: return self.__zero_precipitation_forecast() else: # Prepare the data for the zero precipitation radar case and initialize the noise correctly if self.__params.zero_precip_radar: self.__prepare_nowcast_for_zero_radar() else: self.__state.precip_noise_input = self.__precip.copy() self.__initialize_noise() self.__estimate_ar_parameters_radar() self.__multiply_precip_cascade_to_match_ensemble_members() self.__initialize_random_generators() self.__prepare_forecast_loop() self.__initialize_noise_cascades() if self.__config.measure_time: self.__init_time = self.__measure_time( "initialization", self.__start_time_init ) self.__blended_nowcast_main_loop() # Stack and return the forecast output if self.__config.return_output: self.__state.final_blended_forecast = np.stack( [ np.stack(self.__state.final_blended_forecast[j]) for j in range(self.__config.n_ens_members) ] ) if self.__config.measure_time: return ( self.__state.final_blended_forecast, self.__init_time, self.__mainloop_time, ) else: return self.__state.final_blended_forecast else: return None def __blended_nowcast_main_loop(self): """ Main nowcast loop that iterates through the ensemble members and time steps to generate forecasts. """ # Isolate the last time slice of observed precipitation self.__precip = self.__precip[-1, :, :] print("Starting blended nowcast computation.") if self.__config.measure_time: starttime_mainloop = time.time() self.__state.extrapolation_kwargs["return_displacement"] = True self.__state.precip_cascades_prev_subtimestep = deepcopy( self.__state.precip_cascades ) self.__state.cascade_noise_prev_subtimestep = deepcopy( self.__state.precip_noise_cascades ) self.__state.time_prev_timestep = [ 0.0 for j in range(self.__config.n_ens_members) ] self.__state.leadtime_since_start_forecast = [ 0.0 for j in range(self.__config.n_ens_members) ] # iterate each time step for t, subtimestep_idx in enumerate(self.__timesteps): self.__determine_subtimesteps_and_nowcast_time_step(t, subtimestep_idx) if self.__config.measure_time: starttime = time.time() self.__decompose_nwp_if_needed_and_fill_nans_in_nwp(t) self.__find_nowcast_NWP_combination(t) self.__determine_skill_for_current_timestep(t) # the nowcast iteration for each ensemble member final_blended_forecast_all_members_one_timestep = [ None for _ in range(self.__config.n_ens_members) ] if self.__config.nowcasting_method == "external_nowcast": self.__state.mean_nowcast_timestep = self.__state.mean_nowcast[:, :, t] self.__state.std_nowcast_timestep = self.__state.std_nowcast[:, :, t] def worker(j): worker_state = copy(self.__state) self.__determine_NWP_skill_for_next_timestep(t, j, worker_state) self.__determine_weights_per_component(t, worker_state) self.__regress_extrapolation_and_noise_cascades(j, worker_state, t) self.__perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( t, j, worker_state ) # 8.5 Blend the cascades final_blended_forecast_single_member = [] for t_sub in self.__state.subtimesteps: # TODO: does it make sense to use sub time steps - check if it works? if t_sub > 0: self.__blend_cascades(t_sub, j, worker_state) self.__recompose_cascade_to_rainfall_field(j, worker_state) final_blended_forecast_single_member = ( self.__post_process_output( j, t_sub, final_blended_forecast_single_member, worker_state, ) ) final_blended_forecast_all_members_one_timestep[j] = ( final_blended_forecast_single_member ) dask_worker_collection = [] if DASK_IMPORTED and self.__config.n_ens_members > 1: for j in range(self.__config.n_ens_members): dask_worker_collection.append(dask.delayed(worker)(j)) dask.compute( *dask_worker_collection, num_workers=self.__params.num_ensemble_workers, ) else: for j in range(self.__config.n_ens_members): worker(j) dask_worker_collection = None if self.__state.is_nowcast_time_step: if self.__config.measure_time: _ = self.__measure_time("subtimestep", starttime) else: print("done.") if self.__config.callback is not None: precip_forecast_final = np.stack( final_blended_forecast_all_members_one_timestep ) if precip_forecast_final.shape[1] > 0: self.__config.callback(precip_forecast_final.squeeze()) if self.__config.return_output: for j in range(self.__config.n_ens_members): self.__state.final_blended_forecast[j].extend( final_blended_forecast_all_members_one_timestep[j] ) final_blended_forecast_all_members_one_timestep = None if self.__config.measure_time: self.__mainloop_time = time.time() - starttime_mainloop def __check_inputs(self): """ Validates the inputs and determines if the user provided raw forecasts or decomposed forecasts. """ # Check dimensions of precip if self.__precip.ndim != 3: raise ValueError( "precip must be a three-dimensional array of shape (ar_order + 1, m, n)" ) if self.__precip.shape[0] < self.__config.ar_order + 1: raise ValueError( f"precip must have at least {self.__config.ar_order + 1} time steps in the first dimension " f"to match the autoregressive order (ar_order={self.__config.ar_order})" ) # Check when precip_nowcast is provided that nowcasting_method is set to 'external_nowcast' # and the other way around. if ( self.__precip_nowcast is not None and self.__config.nowcasting_method != "external_nowcast" ): raise KeyError( "if precip_nowcast is not None, nowcasting_method should be set to 'external_nowcast' " ) if ( self.__config.nowcasting_method == "external_nowcast" and self.__precip_nowcast is None ): raise KeyError( "if nowcasting_method is set to 'external_nowcast', an external precip_nowcast should be provided as variable." ) # Check dimensions of velocity if self.__velocity.ndim != 3: raise ValueError( "velocity must be a three-dimensional array of shape (2, m, n)" ) if self.__velocity_models.ndim != 5: raise ValueError( "velocity_models must be a five-dimensional array of shape (n_models, timestep, 2, m, n)" ) if self.__velocity.shape[0] != 2 or self.__velocity_models.shape[2] != 2: raise ValueError( "velocity and velocity_models must have an x- and y-component, check the shape" ) # Check that spatial dimensions match between precip and velocity if self.__precip.shape[1:3] != self.__velocity.shape[1:3]: raise ValueError( f"Spatial dimensions of precip and velocity do not match: " f"{self.__precip.shape[1:3]} vs {self.__velocity.shape[1:3]}" ) # Check if the number of members in the precipitation models and velocity models match if self.__precip_models.shape[0] != self.__velocity_models.shape[0]: raise ValueError( "The number of members in the precipitation models and velocity models must match" ) if isinstance(self.__timesteps, list): self.__params.time_steps_is_list = True if not sorted(self.__timesteps) == self.__timesteps: raise ValueError( "timesteps is not in ascending order", self.__timesteps ) if self.__precip_models.shape[1] != math.ceil(self.__timesteps[-1]) + 1: raise ValueError( "precip_models does not contain sufficient lead times for this forecast" ) self.__params.original_timesteps = [0] + list(self.__timesteps) self.__timesteps = nowcast_utils.binned_timesteps( self.__params.original_timesteps ) else: self.__params.time_steps_is_list = False if self.__precip_models.shape[1] != self.__timesteps + 1: raise ValueError( "precip_models does not contain sufficient lead times for this forecast" ) self.__timesteps = list(range(self.__timesteps + 1)) precip_nwp_dim = self.__precip_models.ndim if precip_nwp_dim == 2: if isinstance(self.__precip_models[0][0], dict): # It's a 2D array of dictionaries with decomposed cascades self.__params.precip_models_provided_is_cascade = True else: raise ValueError( "When precip_models has ndim == 2, it must contain dictionaries with decomposed cascades." ) elif precip_nwp_dim == 4: self.__params.precip_models_provided_is_cascade = False else: raise ValueError( "precip_models must be either a two-dimensional array containing dictionaries with decomposed model fields" "or a four-dimensional array containing the original (NWP) model forecasts" ) if self.__precip_nowcast is not None: precip_nowcast_dim = self.__precip_nowcast.ndim if precip_nowcast_dim != 4: raise ValueError( "precip_nowcast must be a four-dimensional array containing the externally calculated nowcast" ) if self.__config.extrapolation_kwargs is None: self.__state.extrapolation_kwargs = dict() else: self.__state.extrapolation_kwargs = deepcopy( self.__config.extrapolation_kwargs ) if self.__config.filter_kwargs is None: self.__params.filter_kwargs = dict() else: self.__params.filter_kwargs = deepcopy(self.__config.filter_kwargs) if self.__config.noise_kwargs is None: self.__params.noise_kwargs = {"win_fun": "tukey"} else: self.__params.noise_kwargs = deepcopy(self.__config.noise_kwargs) if self.__config.velocity_perturbation_kwargs is None: self.__params.velocity_perturbation_kwargs = dict() else: self.__params.velocity_perturbation_kwargs = deepcopy( self.__config.velocity_perturbation_kwargs ) if self.__config.climatology_kwargs is None: # Make sure clim_kwargs at least contains the number of models self.__params.climatology_kwargs = dict( {"n_models": self.__precip_models.shape[0]} ) else: self.__params.climatology_kwargs = deepcopy( self.__config.climatology_kwargs ) if self.__config.mask_kwargs is None: self.__params.mask_kwargs = dict() else: self.__params.mask_kwargs = deepcopy(self.__config.mask_kwargs) self.__params.precip_threshold = self.__config.precip_threshold if np.any(~np.isfinite(self.__velocity)): raise ValueError("velocity contains non-finite values") if self.__config.mask_method not in ["obs", "incremental", None]: raise ValueError( "unknown mask method %s: must be 'obs', 'incremental' or None" % self.__config.mask_method ) if self.__config.conditional and self.__params.precip_threshold is None: raise ValueError("conditional=True but precip_thr is not set") if ( self.__config.mask_method is not None and self.__params.precip_threshold is None ): raise ValueError("mask_method!=None but precip_thr=None") if self.__config.noise_stddev_adj not in ["auto", "fixed", None]: raise ValueError( "unknown noise_std_dev_adj method %s: must be 'auto', 'fixed', or None" % self.__config.noise_stddev_adj ) if self.__config.kmperpixel is None: if self.__config.velocity_perturbation_method is not None: raise ValueError( "velocity_perturbation_method is set but kmperpixel=None" ) if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but kmperpixel=None") if self.__config.timestep is None: if self.__config.velocity_perturbation_method is not None: raise ValueError( "velocity_perturbation_method is set but timestep=None" ) if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but timestep=None") if self.__config.timestep_start_full_nwp_weight is not None: if self.__config.timestep_start_full_nwp_weight < 0: raise ValueError( "timestep_start_full_nwp_weight cannot be smaller than zero" ) if self.__config.timestep_start_full_nwp_weight is not None: if self.__config.timestep_start_full_nwp_weight >= self.__timesteps[-1]: raise ValueError( "timestep_start_full_nwp_weight cannot be the same or larger than the total number of timesteps in this forecast" ) def __print_forecast_info(self): """ Print information about the forecast setup, including inputs, methods, and parameters. """ print("STEPS blending") print("==============") print("") print("Inputs") print("------") print(f"forecast issue time: {self.__issuetime.isoformat()}") print( f"input dimensions: {self.__precip.shape[1]}x{self.__precip.shape[2]}" ) if self.__precip_nowcast is not None: print( f"input dimensions pre-computed nowcast: {self.__precip_nowcast.shape[2]}x{self.__precip_nowcast.shape[3]}" ) if self.__config.kmperpixel is not None: print(f"km/pixel: {self.__config.kmperpixel}") if self.__config.timestep is not None: print(f"time step: {self.__config.timestep} minutes") print("") print("NWP and blending inputs") print("-----------------------") print(f"number of (NWP) models: {self.__precip_models.shape[0]}") print(f"blend (NWP) model members: {self.__config.blend_nwp_members}") print( f"decompose (NWP) models: {'yes' if self.__precip_models.ndim == 4 else 'no'}" ) print("") print("Methods") print("-------") print(f"extrapolation: {self.__config.extrapolation_method}") print(f"bandpass filter: {self.__config.bandpass_filter_method}") print(f"decomposition: {self.__config.decomposition_method}") print(f"nowcasting algorithm: {self.__config.nowcasting_method}") print(f"noise generator: {self.__config.noise_method}") print( f"noise adjustment: {'yes' if self.__config.noise_stddev_adj else 'no'}" ) print( f"velocity perturbator: {self.__config.velocity_perturbation_method}" ) print(f"blending weights method: {self.__config.weights_method}") print( f"conditional statistics: {'yes' if self.__config.conditional else 'no'}" ) print(f"precip. mask method: {self.__config.mask_method}") print(f"probability matching: {self.__config.probmatching_method}") print(f"FFT method: {self.__config.fft_method}") print(f"domain: {self.__config.domain}") print("") print("Parameters") print("----------") if isinstance(self.__timesteps, int): print(f"number of time steps: {self.__timesteps}") else: print(f"time steps: {self.__timesteps}") print(f"ensemble size: {self.__config.n_ens_members}") print(f"parallel threads: {self.__config.num_workers}") print(f"number of cascade levels: {self.__config.n_cascade_levels}") print(f"order of the AR(p) model: {self.__config.ar_order}") if self.__config.velocity_perturbation_method == "bps": self.__params.velocity_perturbations_parallel = ( self.__params.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) ) self.__params.velocity_perturbations_perpendicular = ( self.__params.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) print( f"vel. pert. parallel: {self.__params.velocity_perturbations_parallel[0]},{self.__params.velocity_perturbations_parallel[1]},{self.__params.velocity_perturbations_parallel[2]}" ) print( f"vel. pert. perpendicular: {self.__params.velocity_perturbations_perpendicular[0]},{self.__params.velocity_perturbations_perpendicular[1]},{self.__params.velocity_perturbations_perpendicular[2]}" ) else: ( self.__params.velocity_perturbations_parallel, self.__params.velocity_perturbations_perpendicular, ) = (None, None) if self.__config.conditional or self.__config.mask_method is not None: print(f"precip. intensity threshold: {self.__params.precip_threshold}") print(f"no-rain fraction threshold for radar: {self.__config.norain_threshold}") print("") def __initialize_nowcast_components(self): """ Initialize the FFT, bandpass filters, decomposition methods, and extrapolation method. """ # Initialize number of ensemble workers self.__params.num_ensemble_workers = min( self.__config.n_ens_members, self.__config.num_workers ) M, N = self.__precip.shape[1:] # Extract the spatial dimensions (height, width) # Initialize FFT method self.__params.fft = utils.get_method( self.__config.fft_method, shape=(M, N), n_threads=self.__config.num_workers ) # Initialize the band-pass filter for the cascade decomposition filter_method = cascade.get_method(self.__config.bandpass_filter_method) self.__params.bandpass_filter = filter_method( (M, N), self.__config.n_cascade_levels, **(self.__params.filter_kwargs or {}), ) # Get the decomposition method (e.g., FFT) ( self.__params.decomposition_method, self.__params.recomposition_method, ) = cascade.get_method(self.__config.decomposition_method) # Get the extrapolation method (e.g., semilagrangian) self.__params.extrapolation_method = extrapolation.get_method( self.__config.extrapolation_method ) # Generate the mesh grid for spatial coordinates x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) self.__params.xy_coordinates = np.stack([x_values, y_values]) self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() # Determine the domain mask from non-finite values in the precipitation data self.__params.domain_mask = np.logical_or.reduce( [~np.isfinite(self.__precip[i, :]) for i in range(self.__precip.shape[0])] ) print("Blended nowcast components initialized successfully.") def __prepare_radar_and_NWP_fields(self): """ Prepare radar and NWP precipitation fields for nowcasting. This includes generating a threshold mask, transforming fields into Lagrangian coordinates, cascade decomposing/recomposing, and checking for zero-precip areas. The results are stored in class attributes. """ # determine the precipitation threshold mask if self.__config.conditional: # TODO: is this logical_and correct here? Now only those places where precip is in all images is saved? self.__params.mask_threshold = np.logical_and.reduce( [ self.__precip[i, :, :] >= self.__params.precip_threshold for i in range(self.__precip.shape[0]) ] ) else: self.__params.mask_threshold = None # we need to know the zerovalue of precip to replace the mask when decomposing after # extrapolation self.__params.nowcast_zerovalue = np.nanmin(self.__precip_nowcast) self.__params.precip_zerovalue = np.nanmin(self.__precip) # 1. Start with the radar rainfall fields. We want the fields in a Lagrangian # space. Advect the previous precipitation fields to the same position with # the most recent one (i.e. transform them into the Lagrangian coordinates). self.__state.extrapolation_kwargs["xy_coords"] = self.__params.xy_coordinates res = [] def transform_to_lagrangian(precip, i): return self.__params.extrapolation_method( precip[i, :, :], self.__velocity, self.__config.ar_order - i, "min", allow_nonfinite_values=True, **self.__state.extrapolation_kwargs.copy(), )[-1] if not DASK_IMPORTED: # Process each earlier precipitation field directly for i in range(self.__config.ar_order): self.__precip[i, :, :] = transform_to_lagrangian(self.__precip, i) else: # Use Dask delayed for parallelization if DASK_IMPORTED is True for i in range(self.__config.ar_order): res.append(dask.delayed(transform_to_lagrangian)(self.__precip, i)) num_workers_ = ( len(res) if self.__config.num_workers > len(res) else self.__config.num_workers ) self.__precip = np.stack( list(dask.compute(*res, num_workers=num_workers_)) + [self.__precip[-1, :, :]] ) # Replace non-finite values with the minimum value for each field self.__precip = self.__precip.copy() for i in range(self.__precip.shape[0]): self.__precip[i, ~np.isfinite(self.__precip[i, :])] = np.nanmin( self.__precip[i, :] ) if self.__precip_nowcast is not None: self.__precip_nowcast = self.__precip_nowcast.copy() for ens_mem in range(self.__precip_nowcast.shape[0]): for t in range(self.__precip_nowcast.shape[1]): self.__precip_nowcast[ ens_mem, t, ~np.isfinite(self.__precip_nowcast[ens_mem, t, :, :]), ] = np.nanmin(self.__precip_nowcast[ens_mem, t, :, :]) # Perform the cascade decomposition for the input precip fields and, # if necessary, for the (NWP) model fields # Compute the cascade decompositions of the input precipitation fields precip_forecast_decomp = [] for i in range(self.__config.ar_order + 1): precip_forecast = self.__params.decomposition_method( self.__precip[i, :, :], self.__params.bandpass_filter, mask=self.__params.mask_threshold, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, ) precip_forecast_decomp.append(precip_forecast) # Rearrange the cascaded into a four-dimensional array of shape # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model self.__state.precip_cascades = nowcast_utils.stack_cascades( precip_forecast_decomp, self.__config.n_cascade_levels ) precip_forecast_decomp = precip_forecast_decomp[-1] self.__state.mean_extrapolation = np.array(precip_forecast_decomp["means"]) self.__state.std_extrapolation = np.array(precip_forecast_decomp["stds"]) # Decompose precomputed nowcasts and rearange them again into the required components if self.__precip_nowcast is not None: if self.__precip_nowcast.shape[0] == 1: results = [self.__decompose_member(self.__precip_nowcast[0])] else: with ThreadPool(self.__config.num_workers) as pool: results = pool.map( partial(self.__decompose_member), list(self.__precip_nowcast), ) self.__state.precip_nowcast_cascades = np.array( [result["precip_nowcast_decomp"] for result in results] ).swapaxes(1, 2) self.__state.mean_nowcast = np.array( [result["precip_nowcast_means"] for result in results] ).swapaxes(1, 2) self.__state.std_nowcast = np.array( [result["precip_nowcast_stds"] for result in results] ).swapaxes(1, 2) # If necessary, recompose (NWP) model forecasts self.__state.precip_models_cascades = None if self.__params.precip_models_provided_is_cascade: self.__state.precip_models_cascades = self.__precip_models # Inline logic of _compute_cascade_recomposition_nwp temp_precip_models = [] for i in range(self.__precip_models.shape[0]): precip_model = [] for time_step in range(self.__precip_models.shape[1]): # Use the recomposition method to rebuild the rainfall fields recomposed = self.__params.recomposition_method( self.__precip_models[i, time_step] ) precip_model.append(recomposed) temp_precip_models.append(precip_model) self.__precip_models = np.stack(temp_precip_models) # Check for zero input fields in the radar, nowcast and NWP data. self.__params.zero_precip_radar = check_norain( self.__precip, self.__params.precip_threshold, self.__config.norain_threshold, self.__params.noise_kwargs["win_fun"], ) # The norain fraction threshold used for nwp is the default value of 0.0, # since nwp does not suffer from clutter. self.__params.zero_precip_model_fields = check_norain( self.__precip_models, self.__params.precip_threshold, self.__config.norain_threshold, self.__params.noise_kwargs["win_fun"], ) def __decompose_member(self, member_field): """Loop over timesteps for a single ensemble member.""" results_decomp = [] means = [] stds = [] for t in range(member_field.shape[0]): # loop over timesteps res = self.__params.decomposition_method( field=member_field[t, :, :], bp_filter=self.__params.bandpass_filter, n_levels=self.__config.n_cascade_levels, mask=self.__params.mask_threshold, method="fft", fft_method=self.__params.fft, output_domain=self.__config.domain, compute_stats=True, normalize=True, compact_output=True, ) results_decomp.append(res["cascade_levels"]) means.append(res["means"]) stds.append(res["stds"]) results = { "precip_nowcast_decomp": results_decomp, "precip_nowcast_means": means, "precip_nowcast_stds": stds, } return results def __zero_precipitation_forecast(self): """ Generate a zero-precipitation forecast (filled with the minimum precip value) when no precipitation above the threshold is detected. The forecast is optionally returned or passed to a callback. """ print( "No precipitation above the threshold found in both the radar and NWP fields" ) print("The resulting forecast will contain only zeros") # Create the output list precip_forecast = [[] for j in range(self.__config.n_ens_members)] # Save per time step to ensure the array does not become too large if # no return_output is requested and callback is not None. for t, subtimestep_idx in enumerate(self.__timesteps): # If the timestep is not the first one, we need to provide the zero forecast if t > 0: # Create an empty np array with shape [n_ens_members, rows, cols] # and fill it with the minimum value from precip (corresponding to # zero precipitation) N, M = self.__precip.shape[1:] precip_forecast_workers = np.full( (self.__config.n_ens_members, N, M), self.__params.precip_zerovalue ) if subtimestep_idx: if self.__config.callback is not None: if precip_forecast_workers.shape[1] > 0: self.__config.callback(precip_forecast_workers.squeeze()) if self.__config.return_output: for j in range(self.__config.n_ens_members): precip_forecast[j].append(precip_forecast_workers[j]) precip_forecast_workers = None if self.__config.measure_time: zero_precip_time = time.time() - self.__start_time_init if self.__config.return_output: precip_forecast_all_members_all_times = np.stack( [ np.stack(precip_forecast[j]) for j in range(self.__config.n_ens_members) ] ) if self.__config.measure_time: return ( precip_forecast_all_members_all_times, zero_precip_time, zero_precip_time, ) else: return precip_forecast_all_members_all_times else: return None def __prepare_nowcast_for_zero_radar(self): """ Handle the case when radar fields indicate zero precipitation. This method updates the cascade with NWP data, uses the NWP velocity field, and initializes the noise model based on the time step with the most rain. """ # If zero_precip_radar is True, only use the velocity field of the NWP # forecast. I.e., velocity (radar) equals velocity_model at the first time # step. # Use the velocity from velocity_models at time step 0 self.__velocity = self.__velocity_models[:, 0, :, :, :].astype( np.float64, copy=False ) # Take the average over the first axis, which corresponds to n_models # (hence, the model average) self.__velocity = np.mean(self.__velocity, axis=0) # Initialize the noise method. # If zero_precip_radar is True, initialize noise based on the NWP field time # step where the fraction of rainy cells is highest (because other lead times # might be zero as well). Else, initialize the noise with the radar # rainfall data # Initialize noise based on the NWP field time step where the fraction of rainy cells is highest if self.__params.precip_threshold is None: self.__params.precip_threshold = np.nanmin(self.__precip_models) max_rain_pixels = -1 max_rain_pixels_j = -1 max_rain_pixels_t = -1 for j in range(self.__precip_models.shape[0]): for t in self.__timesteps: rain_pixels = self.__precip_models[j][t][ self.__precip_models[j][t] > self.__params.precip_threshold ].size if rain_pixels > max_rain_pixels: max_rain_pixels = rain_pixels max_rain_pixels_j = j max_rain_pixels_t = t self.__state.precip_noise_input = self.__precip_models[max_rain_pixels_j][ max_rain_pixels_t ] self.__state.precip_noise_input = self.__state.precip_noise_input.astype( np.float64, copy=False ) # If zero_precip_radar, make sure that precip_cascade does not contain # only nans or infs. If so, fill it with the zero value. if self.__state.precip_models_cascades is not None: self.__state.precip_cascades[~np.isfinite(self.__state.precip_cascades)] = ( np.nanmin( self.__state.precip_models_cascades[ max_rain_pixels_j, max_rain_pixels_t ]["cascade_levels"] ) ) else: precip_models_cascade_timestep = self.__params.decomposition_method( self.__precip_models[max_rain_pixels_j, max_rain_pixels_t, :, :], bp_filter=self.__params.bandpass_filter, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, )["cascade_levels"] self.__state.precip_cascades[~np.isfinite(self.__state.precip_cascades)] = ( np.nanmin(precip_models_cascade_timestep) ) # Make sure precip_noise_input is three-dimensional if len(self.__state.precip_noise_input.shape) != 3: self.__state.precip_noise_input = self.__state.precip_noise_input[ np.newaxis, :, : ] def __initialize_noise(self): """ Initialize noise-based perturbations if configured, computing any required adjustment coefficients and setting up the perturbation generator. """ if self.__config.noise_method is not None: # get methods for perturbations init_noise, self.__params.noise_generator = noise.get_method( self.__config.noise_method ) # initialize the perturbation generator for the precipitation field self.__params.perturbation_generator = init_noise( self.__state.precip_noise_input, fft_method=self.__params.fft, **self.__params.noise_kwargs, ) if self.__config.noise_stddev_adj == "auto": print("Computing noise adjustment coefficients... ", end="", flush=True) if self.__config.measure_time: starttime = time.time() precip_forecast_min = np.min(self.__state.precip_noise_input) self.__params.noise_std_coeffs = noise.utils.compute_noise_stddev_adjs( self.__state.precip_noise_input[-1, :, :], self.__params.precip_threshold, precip_forecast_min, self.__params.bandpass_filter, self.__params.decomposition_method, self.__params.perturbation_generator, self.__params.noise_generator, 20, conditional=True, num_workers=self.__config.num_workers, seed=self.__config.seed, ) if self.__config.measure_time: _ = self.__measure_time("Initialize noise", starttime) else: print("done.") elif self.__config.noise_stddev_adj == "fixed": f = lambda k: 1.0 / (0.75 + 0.09 * k) self.__params.noise_std_coeffs = [ f(k) for k in range(1, self.__config.n_cascade_levels + 1) ] else: self.__params.noise_std_coeffs = np.ones(self.__config.n_cascade_levels) if self.__config.noise_stddev_adj is not None: print(f"noise std. dev. coeffs: {self.__params.noise_std_coeffs}") else: self.__params.perturbation_generator = None self.__params.noise_generator = None self.__params.noise_std_coeffs = None def __estimate_ar_parameters_radar(self): """ Estimate autoregressive (AR) parameters for the radar rainfall field. If precipitation exists, compute temporal auto-correlations; otherwise, use predefined climatological values. Adjust coefficients if necessary and estimate AR model parameters. """ # If there are values in the radar fields, compute the auto-correlations GAMMA = np.empty((self.__config.n_cascade_levels, self.__config.ar_order)) if not self.__params.zero_precip_radar: # compute lag-l temporal auto-correlation coefficients for each cascade level for i in range(self.__config.n_cascade_levels): GAMMA[i, :] = correlation.temporal_autocorrelation( self.__state.precip_cascades[i], mask=self.__params.mask_threshold ) # Else, use standard values for the auto-correlations else: # Get the climatological lag-1 and lag-2 auto-correlation values from Table 2 # in `BPS2004`. # Hard coded, change to own (climatological) values when present. # TODO: add user warning here so users can be aware of this without reading the code? GAMMA = np.array( [ [0.99805, 0.9925, 0.9776, 0.9297, 0.796, 0.482, 0.079, 0.0006], [0.9933, 0.9752, 0.923, 0.750, 0.367, 0.069, 0.0018, 0.0014], ] ) # Check whether the number of cascade_levels is correct if GAMMA.shape[1] > self.__config.n_cascade_levels: GAMMA = GAMMA[:, 0 : self.__config.n_cascade_levels] elif GAMMA.shape[1] < self.__config.n_cascade_levels: # Get the number of cascade levels that is missing n_extra_lev = self.__config.n_cascade_levels - GAMMA.shape[1] # Append the array with correlation values of 10e-4 GAMMA = np.append( GAMMA, [np.repeat(0.0006, n_extra_lev), np.repeat(0.0014, n_extra_lev)], axis=1, ) # Finally base GAMMA.shape[0] on the AR-level if self.__config.ar_order == 1: GAMMA = GAMMA[0, :] if self.__config.ar_order > 2: for _ in range(self.__config.ar_order - 2): GAMMA = np.vstack((GAMMA, GAMMA[1, :])) # Finally, transpose GAMMA to ensure that the shape is the same as np.empty((n_cascade_levels, ar_order)) GAMMA = GAMMA.transpose() assert GAMMA.shape == ( self.__config.n_cascade_levels, self.__config.ar_order, ) # Print the GAMMA value nowcast_utils.print_corrcoefs(GAMMA) if self.__config.ar_order == 2: # adjust the lag-2 correlation coefficient to ensure that the AR(p) # process is stationary for i in range(self.__config.n_cascade_levels): GAMMA[i, 1] = autoregression.adjust_lag2_corrcoef2( GAMMA[i, 0], GAMMA[i, 1] ) # estimate the parameters of the AR(p) model from the auto-correlation # coefficients self.__params.PHI = np.empty( (self.__config.n_cascade_levels, self.__config.ar_order + 1) ) for i in range(self.__config.n_cascade_levels): self.__params.PHI[i, :] = autoregression.estimate_ar_params_yw(GAMMA[i, :]) nowcast_utils.print_ar_params(self.__params.PHI) def __multiply_precip_cascade_to_match_ensemble_members(self): """ Duplicate the last p-1 precipitation cascades across all ensemble members for the AR(p) model, ensuring each member has the required input structure. """ self.__state.precip_cascades = np.stack( [ [ self.__state.precip_cascades[i][-self.__config.ar_order :].copy() for i in range(self.__config.n_cascade_levels) ] ] * self.__config.n_ens_members ) def __initialize_random_generators(self): """ Initialize random generators for precipitation noise, probability matching, and velocity perturbations. Each ensemble member gets a separate generator, ensuring reproducibility and controlled randomness in forecasts. """ seed = self.__config.seed if self.__config.noise_method is not None: self.__state.randgen_precip = [] for j in range(self.__config.n_ens_members): rs = np.random.RandomState(seed) self.__state.randgen_precip.append(rs) seed = rs.randint(0, high=1e9) if self.__config.probmatching_method is not None: self.__state.randgen_probmatching = [] for j in range(self.__config.n_ens_members): rs = np.random.RandomState(seed) self.__state.randgen_probmatching.append(rs) seed = rs.randint(0, high=1e9) if self.__config.velocity_perturbation_method is not None: self.__state.randgen_motion = [] for j in range(self.__config.n_ens_members): rs = np.random.RandomState(seed) self.__state.randgen_motion.append(rs) seed = rs.randint(0, high=1e9) ( init_velocity_noise, self.__params.generate_velocity_noise, ) = noise.get_method(self.__config.velocity_perturbation_method) # initialize the perturbation generators for the motion field self.__params.velocity_perturbations = [] for j in range(self.__config.n_ens_members): kwargs = { "randstate": self.__state.randgen_motion[j], "p_par": self.__params.velocity_perturbations_parallel, "p_perp": self.__params.velocity_perturbations_perpendicular, } vp_ = init_velocity_noise( self.__velocity, 1.0 / self.__config.kmperpixel, self.__config.timestep, **kwargs, ) self.__params.velocity_perturbations.append(vp_) else: ( self.__params.velocity_perturbations, self.__params.generate_velocity_noise, ) = (None, None) def __prepare_forecast_loop(self): """ Initialize variables and structures needed for the forecast loop, including displacement tracking, mask parameters, noise handling, FFT objects, and extrapolation scaling for nowcasting. """ # Empty arrays for the previous displacements and the forecast cascade self.__state.previous_displacement = np.stack( [None for j in range(self.__config.n_ens_members)] ) self.__state.previous_displacement_noise_cascade = np.stack( [None for j in range(self.__config.n_ens_members)] ) self.__state.previous_displacement_prob_matching = np.stack( [None for j in range(self.__config.n_ens_members)] ) self.__state.final_blended_forecast = [ [] for j in range(self.__config.n_ens_members) ] if self.__config.mask_method == "incremental": # get mask parameters self.__params.mask_rim = self.__params.mask_kwargs.get("mask_rim", 10) self.__params.max_mask_rim = self.__params.mask_kwargs.get( "max_mask_rim", 10 ) mask_f = self.__params.mask_kwargs.get("mask_f", 1.0) # initialize the structuring element struct = generate_binary_structure(2, 1) # iterate it to expand it nxn n = mask_f * self.__config.timestep / self.__config.kmperpixel self.__params.struct = iterate_structure(struct, int((n - 1) / 2.0)) else: self.__params.mask_rim, self.__params.struct = None, None if self.__config.noise_method is None: self.__state.final_blended_forecast_non_perturbed = [ self.__state.precip_cascades[0][i].copy() for i in range(self.__config.n_cascade_levels) ] else: self.__state.final_blended_forecast_non_perturbed = None self.__params.fft_objs = [] for i in range(self.__config.n_ens_members): self.__params.fft_objs.append( utils.get_method( self.__config.fft_method, shape=self.__state.precip_cascades.shape[-2:], ) ) # initizalize the current and previous extrapolation forecast scale for the nowcasting component # phi1 / (1 - phi2), see BPS2004 self.__state.rho_extrap_cascade_prev = np.repeat( 1.0, self.__params.PHI.shape[0] ) self.__state.rho_extrap_cascade = self.__params.PHI[:, 0] / ( 1.0 - self.__params.PHI[:, 1] ) def __initialize_noise_cascades(self): """ Initialize the noise cascade with identical noise for all AR(n) steps We also need to return the mean and standard deviations of the noise for the recombination of the noise before advecting it. """ self.__state.precip_noise_cascades = np.zeros( self.__state.precip_cascades.shape ) self.__state.precip_mean_noise = np.zeros( (self.__config.n_ens_members, self.__config.n_cascade_levels) ) self.__state.precip_std_noise = np.zeros( (self.__config.n_ens_members, self.__config.n_cascade_levels) ) if self.__config.noise_method: for j in range(self.__config.n_ens_members): epsilon = self.__params.noise_generator( self.__params.perturbation_generator, randstate=self.__state.randgen_precip[j], fft_method=self.__params.fft_objs[j], domain=self.__config.domain, ) epsilon_decomposed = self.__params.decomposition_method( epsilon, self.__params.bandpass_filter, fft_method=self.__params.fft_objs[j], input_domain=self.__config.domain, output_domain=self.__config.domain, compute_stats=True, normalize=True, compact_output=True, ) self.__state.precip_mean_noise[j] = epsilon_decomposed["means"] self.__state.precip_std_noise[j] = epsilon_decomposed["stds"] for i in range(self.__config.n_cascade_levels): epsilon_temp = epsilon_decomposed["cascade_levels"][i] epsilon_temp *= self.__params.noise_std_coeffs[i] for n in range(self.__config.ar_order): self.__state.precip_noise_cascades[j][i][n] = epsilon_temp epsilon_decomposed = None epsilon_temp = None def __determine_subtimesteps_and_nowcast_time_step(self, t, subtimestep_idx): """ Determine the current sub-timesteps and check if the current time step requires nowcasting. Updates the `is_nowcast_time_step` flag accordingly. """ if self.__params.time_steps_is_list: self.__state.subtimesteps = [ self.__params.original_timesteps[t_] for t_ in subtimestep_idx ] else: self.__state.subtimesteps = [t] if (self.__params.time_steps_is_list and self.__state.subtimesteps) or ( not self.__params.time_steps_is_list and t > 0 ): self.__state.is_nowcast_time_step = True else: self.__state.is_nowcast_time_step = False if self.__state.is_nowcast_time_step: print( f"Computing nowcast for time step {t}... ", end="", flush=True, ) def __decompose_nwp_if_needed_and_fill_nans_in_nwp(self, t): """ Decompose NWP model precipitation fields if needed, store cascade components, and replace any NaN or infinite values with appropriate minimum values. """ if self.__state.precip_models_cascades is not None: decomp_precip_models = list(self.__state.precip_models_cascades[:, t]) else: if self.__precip_models.shape[0] == 1: decomp_precip_models = [ self.__params.decomposition_method( self.__precip_models[0, t, :, :], bp_filter=self.__params.bandpass_filter, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, ) ] else: with ThreadPool(self.__config.num_workers) as pool: decomp_precip_models = pool.map( partial( self.__params.decomposition_method, bp_filter=self.__params.bandpass_filter, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, ), list(self.__precip_models[:, t, :, :]), ) self.__state.precip_models_cascades_timestep = np.array( [decomp["cascade_levels"] for decomp in decomp_precip_models] ) self.__state.mean_models_timestep = np.array( [decomp["means"] for decomp in decomp_precip_models] ) self.__state.std_models_timestep = np.array( [decomp["stds"] for decomp in decomp_precip_models] ) # Check if the NWP fields contain nans or infinite numbers. If so, # fill these with the minimum value present in precip (corresponding to # zero rainfall in the radar observations) # Ensure that the NWP cascade and fields do no contain any nans or infinite number # Fill nans and infinite numbers with the minimum value present in precip self.__state.precip_models_timestep = self.__precip_models[:, t, :, :].astype( np.float64, copy=False ) # (corresponding to zero rainfall in the radar observations) min_cascade = np.nanmin(self.__state.precip_cascades) min_precip = np.nanmin(self.__precip) self.__state.precip_models_cascades_timestep[ ~np.isfinite(self.__state.precip_models_cascades_timestep) ] = min_cascade self.__state.precip_models_timestep[ ~np.isfinite(self.__state.precip_models_timestep) ] = min_precip # Also set any nans or infs in the mean and sigma of the cascade to # respectively 0.0 and 1.0 self.__state.mean_models_timestep[ ~np.isfinite(self.__state.mean_models_timestep) ] = 0.0 self.__state.std_models_timestep[ ~np.isfinite(self.__state.std_models_timestep) ] = 0.0 def __find_nowcast_NWP_combination(self, t): """ Determine which (NWP) models will be combined with which nowcast ensemble members. With the way it is implemented at this moment: n_ens_members of the output equals the maximum number of (ensemble) members in the input (either the nowcasts or NWP). """ self.__state.velocity_models_timestep = self.__velocity_models[ :, t, :, :, : ].astype(np.float64, copy=False) # Make sure the number of model members is not larger than or equal to n_ens_members n_model_members = self.__state.precip_models_cascades_timestep.shape[0] if n_model_members > self.__config.n_ens_members: raise ValueError( "The number of NWP model members is larger than the given number of ensemble members. n_model_members <= n_ens_members." ) # Check if NWP models/members should be used individually, or if all of # them are blended together per nowcast ensemble member. if self.__config.blend_nwp_members: self.__state.mapping_list_NWP_member_to_ensemble_member = None elif self.__config.nowcasting_method == "external_nowcast": self.__state.precip_nowcast_timestep = self.__precip_nowcast[ :, t, :, : ].astype(np.float64, copy=False) n_ens_members_provided = self.__precip_nowcast.shape[0] if n_ens_members_provided > self.__config.n_ens_members: raise ValueError( "The number of nowcast ensemble members provided is larger than the given number of ensemble members requested. n_ens_members_provided <= n_ens_members." ) n_ens_members_max = self.__config.n_ens_members n_ens_members_min = min(n_ens_members_provided, n_model_members) # Also make a list of the model index numbers. These indices are needed # for indexing the right climatological skill file when pysteps calculates # the blended forecast in parallel. if n_model_members > 1: self.__state.mapping_list_NWP_member_to_ensemble_member = np.arange( n_model_members ) else: self.__state.mapping_list_NWP_member_to_ensemble_member = [0] def repeat_precip_to_match_ensemble_size(repeats, model_type): if model_type == "nwp": print("Repeating the NWP model for all ensemble members") self.__state.precip_models_cascades_timestep = np.repeat( self.__state.precip_models_cascades_timestep, repeats, axis=0, ) self.__state.mean_models_timestep = np.repeat( self.__state.mean_models_timestep, repeats, axis=0 ) self.__state.std_models_timestep = np.repeat( self.__state.std_models_timestep, repeats, axis=0 ) self.__state.velocity_models_timestep = np.repeat( self.__state.velocity_models_timestep, repeats, axis=0 ) # For the prob. matching self.__state.precip_models_timestep = np.repeat( self.__state.precip_models_timestep, repeats, axis=0 ) # Finally, for the model indices self.__state.mapping_list_NWP_member_to_ensemble_member = np.repeat( self.__state.mapping_list_NWP_member_to_ensemble_member, repeats, axis=0, ) if model_type == "nowcast": print("Repeating the nowcast for all ensemble members") self.__state.precip_nowcast_cascades = np.repeat( self.__state.precip_nowcast_cascades, repeats, axis=0, ) self.__precip_nowcast = np.repeat( self.__precip_nowcast, repeats, axis=0, ) self.__state.mean_nowcast = np.repeat( self.__state.mean_nowcast, repeats, axis=0 ) self.__state.std_nowcast = np.repeat( self.__state.std_nowcast, repeats, axis=0 ) # For the prob. matching self.__state.precip_nowcast_timestep = np.repeat( self.__state.precip_nowcast_timestep, repeats, axis=0 ) # Now, repeat the nowcast ensemble members or the nwp models/members until # it has the same amount of members as n_ens_members_max. For instance, if # you have 10 ensemble nowcasts members and 3 NWP members, the output will # be an ensemble of 10 members. Hence, the three NWP members are blended # with the first three members of the nowcast (member one with member one, # two with two, etc.), subsequently, the same NWP members are blended with # the next three members (NWP member one with member 4, NWP member 2 with # member 5, etc.), until 10 is reached. if n_ens_members_min != n_ens_members_max: if n_model_members == 1: repeat_precip_to_match_ensemble_size(n_ens_members_max, "nwp") if n_ens_members_provided == 1: repeat_precip_to_match_ensemble_size(n_ens_members_max, "nowcast") if n_model_members == n_ens_members_min and n_model_members != 1: print("Repeating the NWP model for all ensemble members") repeats = [ (n_ens_members_max + i) // n_ens_members_min for i in range(n_ens_members_min) ] repeat_precip_to_match_ensemble_size(repeats, "nwp") if ( n_ens_members_provided == n_ens_members_min and n_ens_members_provided != 1 ): repeat_precip_to_match_ensemble_size(repeats, "nowcast") else: # Start with determining the maximum and mimimum number of members/models # in both input products n_ens_members_max = max(self.__config.n_ens_members, n_model_members) n_ens_members_min = min(self.__config.n_ens_members, n_model_members) # Also make a list of the model index numbers. These indices are needed # for indexing the right climatological skill file when pysteps calculates # the blended forecast in parallel. if n_model_members > 1: self.__state.mapping_list_NWP_member_to_ensemble_member = np.arange( n_model_members ) else: self.__state.mapping_list_NWP_member_to_ensemble_member = [0] # Now, repeat the nowcast ensemble members or the nwp models/members until # it has the same amount of members as n_ens_members_max. For instance, if # you have 10 ensemble nowcasts members and 3 NWP members, the output will # be an ensemble of 10 members. Hence, the three NWP members are blended # with the first three members of the nowcast (member one with member one, # two with two, etc.), subsequently, the same NWP members are blended with # the next three members (NWP member one with member 4, NWP member 2 with # member 5, etc.), until 10 is reached. if n_ens_members_min != n_ens_members_max: if n_model_members == 1: self.__state.precip_models_cascades_timestep = np.repeat( self.__state.precip_models_cascades_timestep, n_ens_members_max, axis=0, ) self.__state.mean_models_timestep = np.repeat( self.__state.mean_models_timestep, n_ens_members_max, axis=0 ) self.__state.std_models_timestep = np.repeat( self.__state.std_models_timestep, n_ens_members_max, axis=0 ) self.__state.velocity_models_timestep = np.repeat( self.__state.velocity_models_timestep, n_ens_members_max, axis=0 ) # For the prob. matching self.__state.precip_models_timestep = np.repeat( self.__state.precip_models_timestep, n_ens_members_max, axis=0 ) # Finally, for the model indices self.__state.mapping_list_NWP_member_to_ensemble_member = np.repeat( self.__state.mapping_list_NWP_member_to_ensemble_member, n_ens_members_max, axis=0, ) elif n_model_members == n_ens_members_min: repeats = [ (n_ens_members_max + i) // n_ens_members_min for i in range(n_ens_members_min) ] self.__state.precip_models_cascades_timestep = np.repeat( self.__state.precip_models_cascades_timestep, repeats, axis=0, ) self.__state.mean_models_timestep = np.repeat( self.__state.mean_models_timestep, repeats, axis=0 ) self.__state.std_models_timestep = np.repeat( self.__state.std_models_timestep, repeats, axis=0 ) self.__state.velocity_models_timestep = np.repeat( self.__state.velocity_models_timestep, repeats, axis=0 ) # For the prob. matching self.__state.precip_models_timestep = np.repeat( self.__state.precip_models_timestep, repeats, axis=0 ) # Finally, for the model indices self.__state.mapping_list_NWP_member_to_ensemble_member = np.repeat( self.__state.mapping_list_NWP_member_to_ensemble_member, repeats, axis=0, ) def __determine_skill_for_current_timestep(self, t): """ Compute the skill of NWP model forecasts at t=0 using spatial correlation, ensuring skill decreases with increasing scale level. For t>0, update extrapolation skill based on lead time. """ if t == 0: # Calculate the initial skill of the (NWP) model forecasts at t=0. self.__params.rho_nwp_models = [] for model_index in range( self.__state.precip_models_cascades_timestep.shape[0] ): rho_value = blending.skill_scores.spatial_correlation( obs=self.__state.precip_cascades[0, :, -1, :, :].copy(), mod=self.__state.precip_models_cascades_timestep[ model_index, :, :, : ].copy(), domain_mask=self.__params.domain_mask, ) self.__params.rho_nwp_models.append(rho_value) self.__params.rho_nwp_models = np.stack(self.__params.rho_nwp_models) # Ensure that the model skill decreases with increasing scale level. for model_index in range( self.__state.precip_models_cascades_timestep.shape[0] ): for i in range( 1, self.__state.precip_models_cascades_timestep.shape[1] ): if ( self.__params.rho_nwp_models[model_index, i] > self.__params.rho_nwp_models[model_index, i - 1] ): # Set it equal to the previous scale level self.__params.rho_nwp_models[model_index, i] = ( self.__params.rho_nwp_models[model_index, i - 1] ) # Save this in the climatological skill file blending.clim.save_skill( current_skill=self.__params.rho_nwp_models, validtime=self.__issuetime, outdir_path=self.__config.outdir_path_skill, **self.__params.climatology_kwargs, ) if t > 0: # Determine the skill of the components for lead time (t0 + t) # First for the extrapolation component. Only calculate it when t > 0. ( self.__state.rho_extrap_cascade, self.__state.rho_extrap_cascade_prev, ) = blending.skill_scores.lt_dependent_cor_extrapolation( PHI=self.__params.PHI, correlations=self.__state.rho_extrap_cascade, correlations_prev=self.__state.rho_extrap_cascade_prev, ) def __determine_NWP_skill_for_next_timestep(self, t, j, worker_state): """ Compute the skill of NWP model components for the next lead time (t0 + t), blending with extrapolation skill if configured. Updates the worker state with the final blended skill forecast. """ if self.__config.blend_nwp_members: rho_nwp_forecast = [] for model_index in range(self.__params.rho_nwp_models.shape[0]): rho_value = blending.skill_scores.lt_dependent_cor_nwp( lt=(t * int(self.__config.timestep)), correlations=self.__params.rho_nwp_models[model_index], outdir_path=self.__config.outdir_path_skill, n_model=model_index, skill_kwargs=self.__params.climatology_kwargs, ) rho_nwp_forecast.append(rho_value) rho_nwp_forecast = np.stack(rho_nwp_forecast) # Concatenate rho_extrap_cascade and rho_nwp worker_state.rho_final_blended_forecast = np.concatenate( (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast), axis=0 ) else: # TODO: check if j is the best accessor for this variable rho_nwp_forecast = blending.skill_scores.lt_dependent_cor_nwp( lt=(t * int(self.__config.timestep)), correlations=self.__params.rho_nwp_models[j], outdir_path=self.__config.outdir_path_skill, n_model=worker_state.mapping_list_NWP_member_to_ensemble_member[j], skill_kwargs=self.__params.climatology_kwargs, ) # Concatenate rho_extrap_cascade and rho_nwp worker_state.rho_final_blended_forecast = np.concatenate( (worker_state.rho_extrap_cascade[None, :], rho_nwp_forecast[None, :]), axis=0, ) def __determine_weights_per_component(self, t, worker_state): """ Compute blending weights for each component based on the selected method ('bps' or 'spn'). Weights are determined for both full blending and model-only scenarios, accounting for correlations and covariance. """ start_smoothing_to_final_weights = False if self.__config.timestep_start_full_nwp_weight is not None: if t > self.__config.timestep_start_full_nwp_weight: start_smoothing_to_final_weights = True # Weights following the bps method. These are needed for the velocity # weights prior to the advection step. If weights method spn is # selected, weights will be overwritten with those weights prior to # blending step. # weight = [(extr_field, n_model_fields, noise), n_cascade_levels, ...] if not start_smoothing_to_final_weights: worker_state.weights = calculate_weights_bps( worker_state.rho_final_blended_forecast ) else: worker_state.weights = calculate_end_weights( previous_weights=self.__state.weights, timestep=t, n_timesteps=self.__timesteps[-1], start_full_nwp_weight=self.__config.timestep_start_full_nwp_weight, model_only=False, ) # The model only weights if ( self.__config.weights_method == "bps" and not start_smoothing_to_final_weights ): # Determine the weights of the components without the extrapolation # cascade, in case this is no data or outside the mask. worker_state.weights_model_only = calculate_weights_bps( worker_state.rho_final_blended_forecast[1:, :] ) elif ( self.__config.weights_method == "spn" and not start_smoothing_to_final_weights ): # Only the weights of the components without the extrapolation # cascade will be determined here. The full set of weights are # determined after the extrapolation step in this method. if ( self.__config.blend_nwp_members and worker_state.precip_models_cascades_timestep.shape[0] > 1 ): worker_state.weights_model_only = np.zeros( ( worker_state.precip_models_cascades_timestep.shape[0] + 1, self.__config.n_cascade_levels, ) ) for i in range(self.__config.n_cascade_levels): # Determine the normalized covariance matrix (containing) # the cross-correlations between the models covariance_nwp_models = np.corrcoef( np.stack( [ worker_state.precip_models_cascades_timestep[ n_model, i, :, : ].flatten() for n_model in range( worker_state.precip_models_cascades_timestep.shape[ 0 ] ) ] ) ) # Determine the weights for this cascade level worker_state.weights_model_only[:, i] = calculate_weights_spn( correlations=worker_state.rho_final_blended_forecast[1:, i], covariance=covariance_nwp_models, ) else: # Same as correlation and noise is 1 - correlation worker_state.weights_model_only = calculate_weights_bps( worker_state.rho_final_blended_forecast[1:, :], ) elif start_smoothing_to_final_weights: worker_state.weights_model_only = calculate_end_weights( previous_weights=self.__state.weights_model_only, timestep=t, n_timesteps=self.__timesteps[-1], start_full_nwp_weight=self.__config.timestep_start_full_nwp_weight, model_only=True, ) else: raise ValueError( "Unknown weights method %s: must be 'bps' or 'spn'" % self.__config.weights_method ) self.__state.weights = worker_state.weights self.__state.weights_model_only = worker_state.weights_model_only def __regress_extrapolation_and_noise_cascades(self, j, worker_state, t): """ Apply autoregressive (AR) updates to the extrapolation and noise cascades for the next time step. If noise is enabled, generate and decompose a spatially correlated noise field before applying the AR process. """ # Determine the epsilon, a cascade of temporally independent # but spatially correlated noise if self.__config.noise_method is not None: # generate noise field epsilon = self.__params.noise_generator( self.__params.perturbation_generator, randstate=worker_state.randgen_precip[j], fft_method=self.__params.fft_objs[j], domain=self.__config.domain, ) # decompose the noise field into a cascade epsilon_decomposed = self.__params.decomposition_method( epsilon, self.__params.bandpass_filter, fft_method=self.__params.fft_objs[j], input_domain=self.__config.domain, output_domain=self.__config.domain, compute_stats=True, normalize=True, compact_output=True, ) else: epsilon_decomposed = None # Regress the extrapolation component to the subsequent time step. # Iterate the AR(p) model for each cascade level if self.__config.nowcasting_method == "external_nowcast": for i in range(self.__config.n_cascade_levels): # Use a deterministic Externally computed nowcasting model worker_state.precip_cascades[j][i] = ( self.__state.precip_nowcast_cascades[j][i][t] ) # Follow the 'standard' STEPS blending approach as described in :cite:`Imhoff2023` elif self.__config.nowcasting_method == "steps": for i in range(self.__config.n_cascade_levels): # apply AR(p) process to extrapolation cascade level if ( epsilon_decomposed is not None or self.__config.velocity_perturbation_method is not None ): worker_state.precip_cascades[j][i] = ( autoregression.iterate_ar_model( worker_state.precip_cascades[j][i], self.__params.PHI[i, :] ) ) # Renormalize the cascade worker_state.precip_cascades[j][i][1] /= np.std( worker_state.precip_cascades[j][i][1] ) else: # use the deterministic AR(p) model computed above if # perturbations are disabled worker_state.precip_cascades[j][i] = ( worker_state.final_blended_forecast_non_perturbed[i] ) if self.__config.noise_method is not None: # Regress the noise component to the subsequent time step # iterate the AR(p) model for each cascade level for i in range(self.__config.n_cascade_levels): # normalize the noise cascade if epsilon_decomposed is not None: epsilon_temp = epsilon_decomposed["cascade_levels"][i] epsilon_temp *= self.__params.noise_std_coeffs[i] else: epsilon_temp = None # apply AR(p) process to noise cascade level # (Returns zero noise if epsilon_decomposed is None) worker_state.precip_noise_cascades[j][i] = ( autoregression.iterate_ar_model( worker_state.precip_noise_cascades[j][i], self.__params.PHI[i, :], eps=epsilon_temp, ) ) epsilon_decomposed = None epsilon_temp = None def __perturb_blend_and_advect_extrapolation_and_noise_to_current_timestep( self, t, j, worker_state ): """ Apply perturbations, blend motion fields, and advect extrapolated and noise cascades to the current time step (or sub-timesteps). This step ensures realistic motion updates in nowcasting. """ # Settings and initialize the output extrap_kwargs_ = worker_state.extrapolation_kwargs.copy() extrap_kwargs_noise = worker_state.extrapolation_kwargs.copy() extrap_kwargs_pb = worker_state.extrapolation_kwargs.copy() velocity_perturbations_extrapolation = self.__velocity # The following should be accessible after this function worker_state.precip_extrapolated_decomp = [] worker_state.noise_extrapolated_decomp = [] worker_state.precip_extrapolated_probability_matching = [] # Extrapolate per sub time step for t_sub in worker_state.subtimesteps: if t_sub > 0: t_diff_prev_subtimestep_int = t_sub - int(t_sub) if t_diff_prev_subtimestep_int > 0.0: if self.__config.nowcasting_method == "steps": precip_forecast_cascade_subtimestep = [ (1.0 - t_diff_prev_subtimestep_int) * worker_state.precip_cascades_prev_subtimestep[j][i][-1, :] + t_diff_prev_subtimestep_int * worker_state.precip_cascades[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] if self.__config.noise_method is not None: noise_cascade_subtimestep = [ (1.0 - t_diff_prev_subtimestep_int) * worker_state.cascade_noise_prev_subtimestep[j][i][-1, :] + t_diff_prev_subtimestep_int * worker_state.precip_noise_cascades[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] else: if self.__config.nowcasting_method == "steps": precip_forecast_cascade_subtimestep = [ worker_state.precip_cascades_prev_subtimestep[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] if self.__config.noise_method is not None: noise_cascade_subtimestep = [ worker_state.cascade_noise_prev_subtimestep[j][i][-1, :] for i in range(self.__config.n_cascade_levels) ] if self.__config.nowcasting_method == "steps": precip_forecast_cascade_subtimestep = np.stack( precip_forecast_cascade_subtimestep ) if self.__config.noise_method is not None: noise_cascade_subtimestep = np.stack(noise_cascade_subtimestep) t_diff_prev_subtimestep = t_sub - worker_state.time_prev_timestep[j] worker_state.leadtime_since_start_forecast[j] += t_diff_prev_subtimestep # compute the perturbed motion field - include the NWP # velocities and the weights. Note that we only perturb # the extrapolation velocity field, as the NWP velocity # field is present per time step if self.__config.velocity_perturbation_method is not None: velocity_perturbations_extrapolation = ( self.__velocity + self.__params.generate_velocity_noise( self.__params.velocity_perturbations[j], worker_state.leadtime_since_start_forecast[j] * self.__config.timestep, ) ) # Stack the perturbed extrapolation and the NWP velocities if self.__config.blend_nwp_members: velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], worker_state.velocity_models_timestep, ), axis=0, ) else: velocity_models = worker_state.velocity_models_timestep[j] velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], velocity_models[None, :, :, :], ), axis=0, ) velocity_models = None # Obtain a blended optical flow, using the weights of the # second cascade following eq. 24 in BPS2006 velocity_blended = blending.utils.blend_optical_flows( flows=velocity_stack_all, weights=worker_state.weights[ :-1, 1 ], # [(extr_field, n_model_fields), cascade_level=2] ) # Extrapolate both cascades to the next time step # First recompose the cascade, advect it and decompose it again # This is needed to remove the interpolation artefacts. # In addition, the number of extrapolations is greatly reduced # A. The extrapolation component if self.__config.nowcasting_method == "steps": # First, recompose the cascades into one forecast precip_forecast_recomp_subtimestep = ( blending.utils.recompose_cascade( combined_cascade=precip_forecast_cascade_subtimestep, combined_mean=worker_state.mean_extrapolation, combined_sigma=worker_state.std_extrapolation, ) ) # Make sure we have values outside the mask if self.__params.zero_precip_radar: precip_forecast_recomp_subtimestep = np.nan_to_num( precip_forecast_recomp_subtimestep, copy=True, nan=self.__params.precip_zerovalue, posinf=self.__params.precip_zerovalue, neginf=self.__params.precip_zerovalue, ) # Put back the mask precip_forecast_recomp_subtimestep[self.__params.domain_mask] = ( np.nan ) worker_state.extrapolation_kwargs["displacement_prev"] = ( worker_state.previous_displacement[j] ) ( precip_forecast_extrapolated_recomp_subtimestep_temp, worker_state.previous_displacement[j], ) = self.__params.extrapolation_method( precip_forecast_recomp_subtimestep, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, **worker_state.extrapolation_kwargs, ) precip_extrapolated_recomp_subtimestep = ( precip_forecast_extrapolated_recomp_subtimestep_temp[0].copy() ) temp_mask = ~np.isfinite(precip_extrapolated_recomp_subtimestep) # Set non-finite values to the zerovalue precip_extrapolated_recomp_subtimestep[ ~np.isfinite(precip_extrapolated_recomp_subtimestep) ] = self.__params.precip_zerovalue # Decompose the forecast again into multiplicative cascades precip_extrapolated_decomp = self.__params.decomposition_method( precip_extrapolated_recomp_subtimestep, self.__params.bandpass_filter, mask=self.__params.mask_threshold, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, )["cascade_levels"] # Make sure we have values outside the mask if self.__params.zero_precip_radar: precip_extrapolated_decomp = np.nan_to_num( precip_extrapolated_decomp, copy=True, nan=np.nanmin(precip_forecast_cascade_subtimestep), posinf=np.nanmin(precip_forecast_cascade_subtimestep), neginf=np.nanmin(precip_forecast_cascade_subtimestep), ) for i in range(self.__config.n_cascade_levels): precip_extrapolated_decomp[i][temp_mask] = np.nan # Append the results to the output lists worker_state.precip_extrapolated_decomp.append( precip_extrapolated_decomp.copy() ) precip_forecast_cascade_subtimestep = None precip_forecast_recomp_subtimestep = None precip_forecast_extrapolated_recomp_subtimestep_temp = None precip_extrapolated_recomp_subtimestep = None precip_extrapolated_decomp = None # B. The noise component if self.__config.noise_method is not None: # First, recompose the cascades into one forecast noise_cascade_subtimestep_recomp = blending.utils.recompose_cascade( combined_cascade=noise_cascade_subtimestep, combined_mean=worker_state.precip_mean_noise[j], combined_sigma=worker_state.precip_std_noise[j], ) extrap_kwargs_noise["displacement_prev"] = ( worker_state.previous_displacement_noise_cascade[j] ) extrap_kwargs_noise["map_coordinates_mode"] = "wrap" ( noise_extrapolated_recomp_temp, worker_state.previous_displacement_noise_cascade[j], ) = self.__params.extrapolation_method( noise_cascade_subtimestep_recomp, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_noise, ) noise_extrapolated_recomp = noise_extrapolated_recomp_temp[0].copy() # Decompose the noise component again into multiplicative cascades noise_extrapolated_decomp = self.__params.decomposition_method( noise_extrapolated_recomp, self.__params.bandpass_filter, mask=self.__params.mask_threshold, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, )["cascade_levels"] for i in range(self.__config.n_cascade_levels): noise_extrapolated_decomp[i] *= self.__params.noise_std_coeffs[ i ] # Append the results to the output lists worker_state.noise_extrapolated_decomp.append( noise_extrapolated_decomp.copy() ) noise_cascade_subtimestep = None noise_cascade_subtimestep_recomp = None noise_extrapolated_recomp_temp = None noise_extrapolated_recomp = None noise_extrapolated_decomp = None # Finally, also extrapolate the initial radar rainfall field. This will be # blended with the rainfall field(s) of the (NWP) model(s) for Lagrangian # blended prob. matching min_R = np.min(precip). If we use an external # nowcast, this variable will be set later in this function. if self.__config.nowcasting_method == "steps": extrap_kwargs_pb["displacement_prev"] = ( worker_state.previous_displacement_prob_matching[j] ) # Apply the domain mask to the extrapolation component precip_forecast_temp_for_probability_matching = self.__precip.copy() precip_forecast_temp_for_probability_matching[ self.__params.domain_mask ] = np.nan ( precip_forecast_extrapolated_probability_matching_temp, worker_state.previous_displacement_prob_matching[j], ) = self.__params.extrapolation_method( precip_forecast_temp_for_probability_matching, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_pb, ) worker_state.precip_extrapolated_probability_matching.append( precip_forecast_extrapolated_probability_matching_temp[0] ) worker_state.time_prev_timestep[j] = t_sub if len(worker_state.precip_extrapolated_decomp) > 0: if self.__config.nowcasting_method == "steps": worker_state.precip_extrapolated_decomp = np.stack( worker_state.precip_extrapolated_decomp ) worker_state.precip_extrapolated_probability_matching = np.stack( worker_state.precip_extrapolated_probability_matching ) if len(worker_state.noise_extrapolated_decomp) > 0: if self.__config.noise_method is not None: worker_state.noise_extrapolated_decomp = np.stack( worker_state.noise_extrapolated_decomp ) # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not worker_state.subtimesteps: t_diff_prev_subtimestep = t + 1 - worker_state.time_prev_timestep[j] worker_state.leadtime_since_start_forecast[j] += t_diff_prev_subtimestep # compute the perturbed motion field - include the NWP # velocities and the weights if self.__config.velocity_perturbation_method is not None: velocity_perturbations_extrapolation = ( self.__velocity + self.__params.generate_velocity_noise( self.__params.velocity_perturbations[j], worker_state.leadtime_since_start_forecast[j] * self.__config.timestep, ) ) # Stack the perturbed extrapolation and the NWP velocities if self.__config.blend_nwp_members: velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], worker_state.velocity_models_timestep, ), axis=0, ) else: velocity_models = worker_state.velocity_models_timestep[j] velocity_stack_all = np.concatenate( ( velocity_perturbations_extrapolation[None, :, :, :], velocity_models[None, :, :, :], ), axis=0, ) velocity_models = None # Obtain a blended optical flow, using the weights of the # second cascade following eq. 24 in BPS2006 velocity_blended = blending.utils.blend_optical_flows( flows=velocity_stack_all, weights=worker_state.weights[ :-1, 1 ], # [(extr_field, n_model_fields), cascade_level=2] ) # Extrapolate the extrapolation and noise cascade extrap_kwargs_["displacement_prev"] = worker_state.previous_displacement[j] extrap_kwargs_noise["displacement_prev"] = ( worker_state.previous_displacement_noise_cascade[j] ) extrap_kwargs_noise["map_coordinates_mode"] = "wrap" # Extrapolate the extrapolation cascade if self.__config.nowcasting_method == "steps": ( _, worker_state.previous_displacement[j], ) = self.__params.extrapolation_method( None, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_, ) # Extrapolate the noise cascade if self.__config.noise_method is not None: ( _, worker_state.previous_displacement_noise_cascade[j], ) = self.__params.extrapolation_method( None, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_noise, ) # Also extrapolate the radar observation, used for the probability # matching and post-processing steps if self.__config.nowcasting_method == "steps": extrap_kwargs_pb["displacement_prev"] = ( worker_state.previous_displacement_prob_matching[j] ) ( _, worker_state.previous_displacement_prob_matching[j], ) = self.__params.extrapolation_method( None, velocity_blended, [t_diff_prev_subtimestep], allow_nonfinite_values=True, **extrap_kwargs_pb, ) worker_state.time_prev_timestep[j] = t + 1 # If an external nowcast is provided, precip_extrapolated_decomp and # precip_extrapolated_probability_matching have been omitted so far. # Fill them in with the external nowcast information now. if self.__config.nowcasting_method == "external_nowcast": for i in range(self.__config.n_cascade_levels): precip_extrapolated_decomp = worker_state.precip_cascades[j][i][-1, :] worker_state.time_prev_timestep[j] = t + 1 worker_state.precip_extrapolated_decomp.append( precip_extrapolated_decomp.copy() ) # Also update the probability matching fields precip_extrapolated = self.__precip_nowcast[j][t][:, :] worker_state.precip_extrapolated_probability_matching.append( precip_extrapolated.copy() ) # Stack it for the output worker_state.precip_extrapolated_decomp = np.stack( worker_state.precip_extrapolated_decomp )[None, :] worker_state.precip_extrapolated_probability_matching = np.stack( worker_state.precip_extrapolated_probability_matching ) # [None, :] worker_state.precip_cascades_prev_subtimestep[j] = worker_state.precip_cascades[ j ] worker_state.cascade_noise_prev_subtimestep[j] = ( worker_state.precip_noise_cascades[j] ) def __blend_cascades(self, t_sub, j, worker_state): """ Blend extrapolated, NWP model, and noise cascades using predefined weights. Computes both full and model-only blends while also blending means and standard deviations across scales. """ worker_state.subtimestep_index = np.where( np.array(worker_state.subtimesteps) == t_sub )[0][0] # First concatenate the cascades and the means and sigmas # precip_models = [n_models,timesteps,n_cascade_levels,m,n] if ( self.__config.blend_nwp_members and self.__config.nowcasting_method == "external_nowcast" ): if self.__config.noise_method is None: cascade_stack_all_components = np.concatenate( ( worker_state.precip_extrapolated_decomp[ None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep, ), axis=0, ) # [(extr_field, n_model_fields), n_cascade_levels, ...] else: cascade_stack_all_components = np.concatenate( ( worker_state.precip_extrapolated_decomp[ None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep, worker_state.noise_extrapolated_decomp[ None, worker_state.subtimestep_index ], ), axis=0, ) # [(extr_field, n_model_fields), n_cascade_levels, ...] means_stacked = np.concatenate( ( worker_state.mean_nowcast_timestep[None, j, :], worker_state.mean_models_timestep, ), axis=0, ) sigmas_stacked = np.concatenate( ( worker_state.std_nowcast_timestep[None, j, :], worker_state.std_models_timestep, ), axis=0, ) elif ( self.__config.blend_nwp_members and self.__config.nowcasting_method == "steps" ): cascade_stack_all_components = np.concatenate( ( worker_state.precip_extrapolated_decomp[ None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep, worker_state.noise_extrapolated_decomp[ None, worker_state.subtimestep_index ], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] means_stacked = np.concatenate( ( worker_state.mean_extrapolation[None, :], worker_state.mean_models_timestep, ), axis=0, ) sigmas_stacked = np.concatenate( ( worker_state.std_extrapolation[None, :], worker_state.std_models_timestep, ), axis=0, ) elif self.__config.nowcasting_method == "external_nowcast": if self.__config.noise_method is None: cascade_stack_all_components = np.concatenate( ( worker_state.precip_extrapolated_decomp[ None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep[None, j], ), axis=0, ) # [(extr_field, n_model_fields), n_cascade_levels, ...] else: cascade_stack_all_components = np.concatenate( ( worker_state.precip_extrapolated_decomp[ None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep[None, j], worker_state.noise_extrapolated_decomp[ None, worker_state.subtimestep_index ], ), axis=0, ) # [(extr_field, n_model_fields), n_cascade_levels, ...] means_stacked = np.concatenate( ( worker_state.mean_nowcast_timestep[None, j, :], worker_state.mean_models_timestep[None, j], ), axis=0, ) sigmas_stacked = np.concatenate( ( worker_state.std_nowcast_timestep[None, j, :], worker_state.std_models_timestep[None, j], ), axis=0, ) else: cascade_stack_all_components = np.concatenate( ( worker_state.precip_extrapolated_decomp[ None, worker_state.subtimestep_index ], worker_state.precip_models_cascades_timestep[None, j], worker_state.noise_extrapolated_decomp[ None, worker_state.subtimestep_index ], ), axis=0, ) # [(extr_field, n_model_fields, noise), n_cascade_levels, ...] means_stacked = np.concatenate( ( worker_state.mean_extrapolation[None, :], worker_state.mean_models_timestep[None, j], ), axis=0, ) sigmas_stacked = np.concatenate( ( worker_state.std_extrapolation[None, :], worker_state.std_models_timestep[None, j], ), axis=0, ) # First determine the blending weights if method is spn. The # weights for method bps have already been determined.' start_smoothing_to_final_weights = False if self.__config.timestep_start_full_nwp_weight is not None: if t_sub >= self.__config.timestep_start_full_nwp_weight: start_smoothing_to_final_weights = True if ( self.__config.weights_method == "spn" and not start_smoothing_to_final_weights ): worker_state.weights = np.zeros( ( cascade_stack_all_components.shape[0], self.__config.n_cascade_levels, ) ) for i in range(self.__config.n_cascade_levels): # Determine the normalized covariance matrix (containing) # the cross-correlations between the models cascade_stack_all_components_temp = np.stack( [ cascade_stack_all_components[n_model, i, :, :].flatten() for n_model in range(cascade_stack_all_components.shape[0] - 1) ] ) # -1 to exclude the noise component covariance_nwp_models = np.ma.corrcoef( np.ma.masked_invalid(cascade_stack_all_components_temp) ) # Determine the weights for this cascade level worker_state.weights[:, i] = calculate_weights_spn( correlations=worker_state.rho_final_blended_forecast[:, i], covariance=covariance_nwp_models, ) self.__state.weights = worker_state.weights # Create weights_with_noise to ensure there is always a 3D weights field, even # if self.__config.nowcasting_method is "external_nowcast" and n_ens_members is 1. worker_state.weights_with_noise = worker_state.weights.copy() worker_state.weights_model_only_with_noise = ( worker_state.weights_model_only.copy() ) if ( self.__config.nowcasting_method == "external_nowcast" and self.__config.noise_method is None ): # First determine the weights without noise worker_state.weights = worker_state.weights[:-1, :] / np.sum( worker_state.weights[:-1, :], axis=0 ) worker_state.weights_model_only = worker_state.weights_model_only[ :-1, : ] / np.sum(worker_state.weights_model_only[:-1, :], axis=0) # Blend the extrapolation, (NWP) model(s) and noise cascades worker_state.final_blended_forecast_cascades = ( blending.utils.blend_cascades( cascades_norm=cascade_stack_all_components, weights=worker_state.weights, ) ) # Also blend the cascade without the extrapolation component worker_state.final_blended_forecast_cascades_mod_only = ( blending.utils.blend_cascades( cascades_norm=cascade_stack_all_components[1:, :], weights=worker_state.weights_model_only, ) ) else: # Blend the extrapolation, (NWP) model(s) and noise cascades worker_state.final_blended_forecast_cascades = ( blending.utils.blend_cascades( cascades_norm=cascade_stack_all_components, weights=worker_state.weights_with_noise, ) ) # Also blend the cascade without the extrapolation component worker_state.final_blended_forecast_cascades_mod_only = ( blending.utils.blend_cascades( cascades_norm=cascade_stack_all_components[1:, :], weights=worker_state.weights_model_only, ) ) # Blend the means and standard deviations # Input is array of shape [number_components, scale_level, ...] ( worker_state.final_blended_forecast_means, worker_state.final_blended_forecast_stds, ) = blend_means_sigmas( means=means_stacked, sigmas=sigmas_stacked, weights=worker_state.weights_with_noise, ) # Also blend the means and sigmas for the cascade without extrapolation ( worker_state.final_blended_forecast_means_mod_only, worker_state.final_blended_forecast_stds_mod_only, ) = blend_means_sigmas( means=means_stacked[1:, :], sigmas=sigmas_stacked[1:, :], weights=worker_state.weights_model_only_with_noise, ) def __recompose_cascade_to_rainfall_field(self, j, worker_state): """ Recompose the blended cascade into a precipitation field using the blended means and standard deviations. If using the spectral domain, apply inverse FFT for reconstruction. """ worker_state.final_blended_forecast_recomposed = ( blending.utils.recompose_cascade( combined_cascade=worker_state.final_blended_forecast_cascades, combined_mean=worker_state.final_blended_forecast_means, combined_sigma=worker_state.final_blended_forecast_stds, ) ) # The recomposed cascade without the extrapolation (for NaN filling # outside the radar domain) worker_state.final_blended_forecast_recomposed_mod_only = ( blending.utils.recompose_cascade( combined_cascade=worker_state.final_blended_forecast_cascades_mod_only, combined_mean=worker_state.final_blended_forecast_means_mod_only, combined_sigma=worker_state.final_blended_forecast_stds_mod_only, ) ) if self.__config.domain == "spectral": # TODO: Check this! (Only tested with domain == 'spatial') worker_state.final_blended_forecast_recomposed = self.__params.fft_objs[ j ].irfft2(worker_state.final_blended_forecast_recomposed) worker_state.final_blended_forecast_recomposed_mod_only = ( self.__params.fft_objs[j].irfft2( worker_state.final_blended_forecast_recomposed_mod_only ) ) def __post_process_output( self, j, t_sub, final_blended_forecast_single_member, worker_state ): """ Apply post-processing steps to refine the final blended forecast. This involves masking, filling missing data with the blended NWP forecast, and applying probability matching to ensure consistency. **Steps:** 1. **Use Mask and Fill Missing Data:** - Areas without reliable radar extrapolation are filled using the blended NWP forecast to maintain spatial coherence. 2. **Lagrangian Blended Probability Matching:** - Uses the latest extrapolated radar rainfall field blended with the NWP model(s) forecast as a reference. - Ensures that the statistical distribution of the final forecast remains consistent with the benchmark dataset. 3. **Blend the Extrapolated Rainfall Field with NWP Forecasts:** - The extrapolated rainfall field is used only for post-processing. - The forecast is blended using predefined weights at scale level 2. - This ensures that both extrapolated and modeled precipitation contribute appropriately to the final output. 4. **Apply Probability Matching:** - Adjusts the final precipitation distribution using either empirical cumulative distribution functions (CDF) or mean adjustments to match the reference dataset. The final processed forecast is stored in `final_blended_forecast_single_member`. """ weights_probability_matching = worker_state.weights_with_noise[ :-1, 1 ] # Weights without noise, level 2 weights_probability_matching_normalized = weights_probability_matching / np.sum( weights_probability_matching ) # And the weights for outside the radar domain weights_probability_matching_mod_only = ( worker_state.weights_model_only_with_noise[:-1, 1] ) # Weights without noise, level 2 weights_probability_matching_normalized_mod_only = ( weights_probability_matching_mod_only / np.sum(weights_probability_matching_mod_only) ) # Stack the fields if self.__config.blend_nwp_members: precip_forecast_probability_matching_final = np.concatenate( ( worker_state.precip_extrapolated_probability_matching[ None, worker_state.subtimestep_index ], worker_state.precip_models_timestep, ), axis=0, ) else: precip_forecast_probability_matching_final = np.concatenate( ( worker_state.precip_extrapolated_probability_matching[ None, worker_state.subtimestep_index ], worker_state.precip_models_timestep[None, j], ), axis=0, ) # Blend it precip_forecast_probability_matching_blended = np.sum( weights_probability_matching_normalized.reshape( weights_probability_matching_normalized.shape[0], 1, 1 ) * precip_forecast_probability_matching_final, axis=0, ) if self.__config.blend_nwp_members: precip_forecast_probability_matching_blended_mod_only = np.sum( weights_probability_matching_normalized_mod_only.reshape( weights_probability_matching_normalized_mod_only.shape[0], 1, 1, ) * worker_state.precip_models_timestep, axis=0, ) else: precip_forecast_probability_matching_blended_mod_only = ( worker_state.precip_models_timestep[j] ) # The extrapolation components are NaN outside the advected # radar domain. This results in NaN values in the blended # forecast outside the radar domain. Therefore, fill these # areas with the "..._mod_only" blended forecasts, consisting # of the NWP and noise components. nan_indices = np.isnan(worker_state.final_blended_forecast_recomposed) if self.__config.smooth_radar_mask_range != 0: # Compute the smooth dilated mask new_mask = blending.utils.compute_smooth_dilated_mask( nan_indices, max_padding_size_in_px=self.__config.smooth_radar_mask_range, ) # Ensure mask values are between 0 and 1 mask_model = np.clip(new_mask, 0, 1) mask_radar = np.clip(1 - new_mask, 0, 1) # Handle NaNs in precip_forecast_new and precip_forecast_new_mod_only by setting NaNs to 0 in the blending step precip_forecast_recomposed_mod_only_no_nan = np.nan_to_num( worker_state.final_blended_forecast_recomposed_mod_only, nan=0 ) precip_forecast_recomposed_no_nan = np.nan_to_num( worker_state.final_blended_forecast_recomposed, nan=0 ) # Perform the blending of radar and model inside the radar domain using a weighted combination worker_state.final_blended_forecast_recomposed = np.nansum( [ mask_model * precip_forecast_recomposed_mod_only_no_nan, mask_radar * precip_forecast_recomposed_no_nan, ], axis=0, ) precip_forecast_probability_matching_blended = np.nansum( [ precip_forecast_probability_matching_blended * mask_radar, precip_forecast_probability_matching_blended_mod_only * mask_model, ], axis=0, ) else: worker_state.final_blended_forecast_recomposed[nan_indices] = ( worker_state.final_blended_forecast_recomposed_mod_only[nan_indices] ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = ( precip_forecast_probability_matching_blended_mod_only[nan_indices] ) # Finally, fill the remaining nan values, if present, with # the minimum value in the forecast nan_indices = np.isnan(worker_state.final_blended_forecast_recomposed) worker_state.final_blended_forecast_recomposed[nan_indices] = np.nanmin( worker_state.final_blended_forecast_recomposed ) nan_indices = np.isnan(precip_forecast_probability_matching_blended) precip_forecast_probability_matching_blended[nan_indices] = np.nanmin( precip_forecast_probability_matching_blended ) # Apply the masking and prob. matching precip_field_mask_temp = None if self.__config.mask_method is not None: # apply the precipitation mask to prevent generation of new # precipitation into areas where it was not originally # observed precip_forecast_min_value = ( worker_state.final_blended_forecast_recomposed.min() ) if self.__config.mask_method == "incremental": # The incremental mask is slightly different from the implementation in # nowcasts.steps.py, as it is not computed in the Lagrangian space. Instead, # we use precip_forecast_probability_matched and let the mask_rim increase with # the time step until mask_rim_max. This ensures that for the first t time # steps, the buffer mask keeps increasing. precip_field_mask = ( precip_forecast_probability_matching_blended >= self.__params.precip_threshold ) # Buffer the mask # Convert the precipitation field mask into an 8-bit unsigned integer mask obs_mask_uint8 = precip_field_mask.astype("uint8") # Perform an initial binary dilation using the provided structuring element dilated_mask = binary_dilation(obs_mask_uint8, self.__params.struct) # Create a binary structure element for incremental dilations struct_element = generate_binary_structure(2, 1) # Initialize a floating-point mask to accumulate dilations for a smooth transition accumulated_mask = dilated_mask.astype(float) # Iteratively dilate the mask and accumulate the results to create a grayscale rim mask_rim_temp = min( self.__params.mask_rim + t_sub - 1, self.__params.max_mask_rim ) for _ in range(mask_rim_temp): dilated_mask = binary_dilation(dilated_mask, struct_element) accumulated_mask += dilated_mask # Normalize the accumulated mask values between 0 and 1 precip_field_mask = accumulated_mask / np.max(accumulated_mask) # Get the final mask worker_state.final_blended_forecast_recomposed = ( precip_forecast_min_value + ( worker_state.final_blended_forecast_recomposed - precip_forecast_min_value ) * precip_field_mask ) precip_field_mask_temp = ( worker_state.final_blended_forecast_recomposed > precip_forecast_min_value ) elif self.__config.mask_method == "obs": # The mask equals the most recent benchmark # rainfall field precip_field_mask_temp = ( precip_forecast_probability_matching_blended >= self.__params.precip_threshold ) # Set to min value outside of mask worker_state.final_blended_forecast_recomposed[~precip_field_mask_temp] = ( precip_forecast_min_value ) # If probmatching_method is not None, resample the distribution from # both the extrapolation cascade and the model (NWP) cascade and use # that for the probability matching. if ( self.__config.probmatching_method is not None and self.__config.resample_distribution ): arr1 = worker_state.precip_extrapolated_probability_matching[ worker_state.subtimestep_index ] arr2 = worker_state.precip_models_timestep[j] # resample weights based on cascade level 2. # Areas where one of the fields is nan are not included. precip_forecast_probability_matching_resampled = ( probmatching.resample_distributions( first_array=arr1, second_array=arr2, probability_first_array=weights_probability_matching_normalized[0], randgen=worker_state.randgen_probmatching[j], ) ) else: precip_forecast_probability_matching_resampled = ( precip_forecast_probability_matching_blended.copy() ) if self.__config.probmatching_method == "cdf": # nan indices in the extrapolation nowcast nan_indices = np.isnan( worker_state.precip_extrapolated_probability_matching[ worker_state.subtimestep_index ] ) # Adjust the CDF of the forecast to match the resampled distribution combined from # extrapolation and model fields. # Rainfall outside the pure extrapolation domain is not taken into account. if np.any(np.isfinite(worker_state.final_blended_forecast_recomposed)): worker_state.final_blended_forecast_recomposed = ( probmatching.nonparam_match_empirical_cdf( worker_state.final_blended_forecast_recomposed, precip_forecast_probability_matching_resampled, nan_indices, ) ) precip_forecast_probability_matching_resampled = None elif self.__config.probmatching_method == "mean": # Use R_pm_blended as benchmark field and mean_probabiltity_matching_forecast = np.mean( precip_forecast_probability_matching_resampled[ precip_forecast_probability_matching_resampled >= self.__params.precip_threshold ] ) no_rain_mask = ( worker_state.final_blended_forecast_recomposed >= self.__params.precip_threshold ) mean_precip_forecast = np.mean( worker_state.final_blended_forecast_recomposed[no_rain_mask] ) worker_state.final_blended_forecast_recomposed[no_rain_mask] = ( worker_state.final_blended_forecast_recomposed[no_rain_mask] - mean_precip_forecast + mean_probabiltity_matching_forecast ) precip_forecast_probability_matching_resampled = None final_blended_forecast_single_member.append( worker_state.final_blended_forecast_recomposed ) return final_blended_forecast_single_member def __measure_time(self, label, start_time): """ Measure and print the time taken for a specific part of the process. Parameters: - label: A description of the part of the process being measured. - start_time: The timestamp when the process started (from time.time()). """ if self.__config.measure_time: elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") return elapsed_time return None def forecast( precip, precip_models, velocity, velocity_models, timesteps, timestep, issuetime, n_ens_members, precip_nowcast=None, n_cascade_levels=6, blend_nwp_members=False, precip_thr=None, norain_thr=0.0, kmperpixel=None, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", nowcasting_method="steps", noise_method="nonparametric", noise_stddev_adj=None, ar_order=2, vel_pert_method="bps", weights_method="bps", timestep_start_full_nwp_weight=None, conditional=False, probmatching_method="cdf", mask_method="incremental", resample_distribution=True, smooth_radar_mask_range=0, callback=None, return_output=True, seed=None, num_workers=1, fft_method="numpy", domain="spatial", outdir_path_skill="./tmp/", extrap_kwargs=None, filter_kwargs=None, noise_kwargs=None, vel_pert_kwargs=None, clim_kwargs=None, mask_kwargs=None, measure_time=False, ): """ Generate a blended nowcast ensemble by using the Short-Term Ensemble Prediction System (STEPS) method. Parameters ---------- precip: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. precip_models: array-like Either raw (NWP) model forecast data or decomposed (NWP) model forecast data. If you supply decomposed data, it needs to be an array of shape (n_models,timesteps+1) containing, per timestep (t=0 to lead time here) and per (NWP) model or model ensemble member, a dictionary with a list of cascades obtained by calling a method implemented in :py:mod:`pysteps.cascade.decomposition`. If you supply the original (NWP) model forecast data, it needs to be an array of shape (n_models,timestep+1,m,n) containing precipitation (or other) fields, which will then be decomposed in this function. Depending on your use case it can be advantageous to decompose the model forecasts outside beforehand, as this slightly reduces calculation times. This is possible with :py:func:`pysteps.blending.utils.decompose_NWP`, :py:func:`pysteps.blending.utils.compute_store_nwp_motion`, and :py:func:`pysteps.blending.utils.load_NWP`. However, if you have a lot of (NWP) model members (e.g. 1 model member per nowcast member), this can lead to excessive memory usage. To further reduce memory usage, both this array and the ``velocity_models`` array can be given as float32. They will then be converted to float64 before computations to minimize loss in precision. In case of one (deterministic) model as input, add an extra dimension to make sure precip_models is four dimensional prior to calling this function. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. velocity_models: array-like Array of shape (n_models,timestep,2,m,n) containing the x- and y-components of the advection field for the (NWP) model field per forecast lead time. All values are required to be finite. To reduce memory usage, this array can be given as float32. They will then be converted to float64 before computations to minimize loss in precision. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. timestep: float Time step of the motion vectors (minutes). Required if vel_pert_method is not None or mask_method is 'incremental'. issuetime: datetime is issued. n_ens_members: int The number of ensemble members to generate. This number should always be equal to or larger than the number of NWP ensemble members / number of NWP models. precip_nowcast: array-like, optional Optional input with array of shape (n_ens_members,timestep+1,m,n) containing and external nowcast as input to the blending. If precip_nowcast is provided, the autoregression step and advection step will be omitted for the extrapolation cascade of the blending procedure and instead, precip_nowcast will be used as estimate. Defaults to None (which is the standard STEPS) method described in :cite:`Imhoff2023`. Note that nowcasting_method should be set to 'external_nowcast' if precip_nowcast is not None. Note that in the current setup, only a deterministic precip_nowcast model can be provided and only one ensemble member (without noise generation) is returned. This will change soon. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. blend_nwp_members: bool Check if NWP models/members should be used individually, or if all of them are blended together per nowcast ensemble member. Standard set to false. precip_thr: float, optional Specifies the threshold value for minimum observable precipitation intensity. Required if mask_method is not None or conditional is True. norain_thr: float Specifies the threshold value for the fraction of rainy (see above) pixels in the radar rainfall field below which we consider there to be no rain. Depends on the amount of clutter typically present. Standard set to 0.0 kmperpixel: float, optional Spatial resolution of the input data (kilometers/pixel). Required if vel_pert_method is not None or mask_method is 'incremental'. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of :py:mod:`pysteps.extrapolation.interface`. decomp_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of :py:mod:`pysteps.cascade.interface`. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of :py:mod:`pysteps.cascade.interface`. nowcasting_method: {'steps', 'external_nowcast'}, Name of the nowcasting method used to generate the nowcasts. If an external nowcast is provided, the script will use this as input and bypass the autoregression and advection of the extrapolation cascade. Defaults to 'steps', which follows the method described in :cite:`Imhoff2023`. Note, if nowcasting_method is 'external_nowcast', precip_nowcast cannot be None. noise_method: {'parametric','nonparametric','ssft','nested',None}, optional Name of the noise generator to use for perturbating the precipitation field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None, no noise is generated. noise_stddev_adj: {'auto','fixed',None}, optional Optional adjustment for the standard deviations of the noise fields added to each cascade level. This is done to compensate incorrect std. dev. estimates of casace levels due to presence of no-rain areas. 'auto'=use the method implemented in :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs`. 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable noise std. dev adjustment. ar_order: int, optional The order of the autoregressive model to use. Must be >= 1. vel_pert_method: {'bps',None}, optional Name of the noise generator to use for perturbing the advection field. See the documentation of :py:mod:`pysteps.noise.interface`. If set to None, the advection field is not perturbed. weights_method: {'bps','spn'}, optional The calculation method of the blending weights. Options are the method by :cite:`BPS2006` and the covariance-based method by :cite:`SPN2013`. Defaults to bps. timestep_start_full_nwp_weight: int, optional. The timestep, which should be smaller than timesteps, at which a linear transition takes place from the calculated weights to full (1.0) NWP weight (and zero extrapolation and noise weight) to ensure the blending procedure becomes equal to the NWP forecast(s) at the last timestep of the blending procedure. If not provided, the blending stick to the theoretical weights provided by the chosen weights_method for a given lead time and skill of each blending component. conditional: bool, optional If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold precip_thr. probmatching_method: {'cdf','mean',None}, optional Method for matching the statistics of the forecast field with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, 'mean'=adjust only the conditional mean value of the forecast field in precipitation areas, None=no matching applied. Using 'mean' requires that mask_method is not None. mask_method: {'obs','incremental',None}, optional The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'obs' = apply precip_thr to the most recently observed precipitation intensity field, 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. resample_distribution: bool, optional Method to resample the distribution from the extrapolation and NWP cascade as input for the probability matching. Not resampling these distributions may lead to losing some extremes when the weight of both the extrapolation and NWP cascade is similar. Defaults to True. smooth_radar_mask_range: int, Default is 0. Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise blend near the edge of the radar domain (radar mask), where the radar data is either not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range should be a positive value, representing a buffer band of a number of pixels by which the mask is cropped and smoothed. The smooth radar mask removes the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input field precip, respectively. This can be used, for instance, writing the outputs into files. return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. seed: int, optional Optional seed number for the random generators. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see FFT methods in :py:func:`pysteps.utils.interface.get_method`). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical STEPS model). If "spectral", the AR(2) models and stochastic perturbations are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019b`. outdir_path_skill: string, optional Path to folder where the historical skill are stored. Defaults to path_workdir from rcparams. If no path is given, './tmp' will be used. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of :py:func:`pysteps.extrapolation.interface`. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of :py:mod:`pysteps.cascade.bandpass_filters`. noise_kwargs: dict, optional Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of :py:mod:`pysteps.noise.fftgenerators`. vel_pert_kwargs: dict, optional Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for the initializer of the velocity perturbator. The choice of the optimal parameters depends on the domain and the used optical flow method. Default parameters from :cite:`BPS2006`: p_par = [10.88, 0.23, -7.68] p_perp = [5.76, 0.31, -2.72] Parameters fitted to the data (optical flow/domain): darts/fmi: p_par = [13.71259667, 0.15658963, -16.24368207] p_perp = [8.26550355, 0.17820458, -9.54107834] darts/mch: p_par = [24.27562298, 0.11297186, -27.30087471] p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01] darts/fmi+mch: p_par = [16.55447057, 0.14160448, -19.24613059] p_perp = [14.75343395, 0.11785398, -16.26151612] lucaskanade/fmi: p_par = [2.20837526, 0.33887032, -2.48995355] p_perp = [2.21722634, 0.32359621, -2.57402761] lucaskanade/mch: p_par = [2.56338484, 0.3330941, -2.99714349] p_perp = [1.31204508, 0.3578426, -1.02499891] lucaskanade/fmi+mch: p_par = [2.31970635, 0.33734287, -2.64972861] p_perp = [1.90769947, 0.33446594, -2.06603662] vet/fmi: p_par = [0.25337388, 0.67542291, 11.04895538] p_perp = [0.02432118, 0.99613295, 7.40146505] vet/mch: p_par = [0.5075159, 0.53895212, 7.90331791] p_perp = [0.68025501, 0.41761289, 4.73793581] vet/fmi+mch: p_par = [0.29495222, 0.62429207, 8.6804131 ] p_perp = [0.23127377, 0.59010281, 5.98180004] fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set The above parameters have been fitted by using run_vel_pert_analysis.py and fit_vel_pert_params.py located in the scripts directory. See :py:mod:`pysteps.noise.motion` for additional documentation. clim_kwargs: dict, optional Optional dictionary containing keyword arguments for the climatological skill file. Arguments can consist of: 'outdir_path', 'n_models' (the number of NWP models) and 'window_length' (the minimum number of days the clim file should have, otherwise the default is used). mask_kwargs: dict Optional dictionary containing mask keyword arguments 'mask_f', 'mask_rim' and 'max_mask_rim', the factor defining the the mask increment and the (maximum) rim size, respectively. The mask increment is defined as mask_f*timestep/kmperpixel. measure_time: bool If set to True, measure, print and return the computation time. Returns ------- out: ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields precip. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). See also -------- :py:mod:`pysteps.extrapolation.interface`, :py:mod:`pysteps.cascade.interface`, :py:mod:`pysteps.noise.interface`, :py:func:`pysteps.noise.utils.compute_noise_stddev_adjs` References ---------- :cite:`Seed2003`, :cite:`BPS2004`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` Notes ----- 1. The blending currently does not blend the beta-parameters in the parametric noise method. It is recommended to use the non-parameteric noise method. 2. If blend_nwp_members is True, the BPS2006 method for the weights is suboptimal. It is recommended to use the SPN2013 method instead. 3. Not yet implemented (and neither in the steps nowcasting module): The regression of the lag-1 and lag-2 parameters to their climatological values. See also eq. 12 - 19 in :cite: `BPS2004`. By doing so, the Phi parameters change over time, which enhances the AR process. This can become a future development if this turns out to be a warranted functionality. """ blending_config = StepsBlendingConfig( n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, blend_nwp_members=blend_nwp_members, precip_threshold=precip_thr, norain_threshold=norain_thr, kmperpixel=kmperpixel, timestep=timestep, extrapolation_method=extrap_method, decomposition_method=decomp_method, bandpass_filter_method=bandpass_filter_method, nowcasting_method=nowcasting_method, noise_method=noise_method, noise_stddev_adj=noise_stddev_adj, ar_order=ar_order, velocity_perturbation_method=vel_pert_method, weights_method=weights_method, timestep_start_full_nwp_weight=timestep_start_full_nwp_weight, conditional=conditional, probmatching_method=probmatching_method, mask_method=mask_method, resample_distribution=resample_distribution, smooth_radar_mask_range=smooth_radar_mask_range, seed=seed, num_workers=num_workers, fft_method=fft_method, domain=domain, outdir_path_skill=outdir_path_skill, extrapolation_kwargs=extrap_kwargs, filter_kwargs=filter_kwargs, noise_kwargs=noise_kwargs, velocity_perturbation_kwargs=vel_pert_kwargs, climatology_kwargs=clim_kwargs, mask_kwargs=mask_kwargs, measure_time=measure_time, callback=callback, return_output=return_output, ) """ With the new refactoring, the blending nowcaster is a class that can be used in multiple ways. This method is here to ensure that the class can be used in a similar way as the old function. The new refactoring provides more possibilities, eg. when doing multiple forecasts in a row, the config does not need to be provided each time """ # Create an instance of the new class with all the provided arguments blended_nowcaster = StepsBlendingNowcaster( precip, precip_nowcast, precip_models, velocity, velocity_models, timesteps, issuetime, blending_config, ) forecast_steps_nowcast = blended_nowcaster.compute_forecast() return forecast_steps_nowcast # TODO: Where does this piece of code best fit: in utils or inside the class? def calculate_ratios(correlations): """Calculate explained variance ratios from correlation. Parameters ---------- Array of shape [component, scale_level, ...] containing correlation (skills) for each component (NWP and nowcast), scale level, and optionally along [y, x] dimensions. Returns ------- out : numpy array An array containing the ratios of explain variance for each component, scale level, ... """ # correlations: [component, scale, ...] square_corrs = np.square(correlations) # Calculate the ratio of the explained variance to the unexplained # variance of the nowcast and NWP model components out = square_corrs / (1 - square_corrs) # out: [component, scale, ...] return out # TODO: Where does this piece of code best fit: in utils or inside the class? def calculate_weights_bps(correlations): """Calculate BPS blending weights for STEPS blending from correlation. Parameters ---------- correlations : array-like Array of shape [component, scale_level, ...] containing correlation (skills) for each component (NWP and nowcast), scale level, and optionally along [y, x] dimensions. Returns ------- weights : array-like Array of shape [component+1, scale_level, ...] containing the weights to be used in STEPS blending for each original component plus an addtional noise component, scale level, and optionally along [y, x] dimensions. References ---------- :cite:`BPS2006` Notes ----- The weights in the BPS method can sum op to more than 1.0. """ # correlations: [component, scale, ...] # Check if the correlations are positive, otherwise rho = 10e-5 correlations = np.where(correlations < 10e-5, 10e-5, correlations) # If we merge more than one component with the noise cascade, we follow # the weights impolementation in either :cite:`BPS2006` or :cite:`SPN2013`. if correlations.shape[0] > 1: # Calculate weights for each source ratios = calculate_ratios(correlations) # ratios: [component, scale, ...] total_ratios = np.sum(ratios, axis=0) # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 weights = correlations * np.sqrt(ratios / total_ratios) # weights: [component, scale, ...] # Calculate the weight of the noise component. # Original BPS2006 method in the following two lines (eq. 13) total_square_weights = np.sum(np.square(weights), axis=0) noise_weight = np.sqrt(1.0 - total_square_weights) # Finally, add the noise_weights to the weights variable. weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) # Otherwise, the weight equals the correlation on that scale level and # the noise component weight equals 1 - this weight. This only occurs for # the weights calculation outside the radar domain where in the case of 1 # NWP model or ensemble member, no blending of multiple models has to take # place else: noise_weight = 1.0 - correlations weights = np.concatenate((correlations, noise_weight), axis=0) return weights # TODO: Where does this piece of code best fit: in utils or inside the class? def calculate_weights_spn(correlations, covariance): """Calculate SPN blending weights for STEPS blending from correlation. Parameters ---------- correlations : array-like Array of shape [n_components] containing correlation (skills) for each component (NWP models and nowcast). covariance : array-like Array of shape [n_components, n_components] containing the covariance matrix of the models that will be blended. If cov is set to None and correlations only contains one model, the weight equals the correlation on that scale level and the noise component weight equals 1 - this weight. Returns ------- weights : array-like Array of shape [component+1] containing the weights to be used in STEPS blending for each original component plus an addtional noise component. References ---------- :cite:`SPN2013` """ # Check if the correlations are positive, otherwise rho = 10e-5 correlations = np.where(correlations < 10e-5, 10e-5, correlations) if correlations.shape[0] > 1 and len(covariance) > 1: if isinstance(covariance, type(None)): raise ValueError("cov must contain a covariance matrix") else: # Make a numpy array out of cov and get the inverse covariance = np.where(covariance == 0.0, 10e-5, covariance) # Make sure the determinant of the matrix is not zero, otherwise # subtract 10e-5 from the cross-correlations between the models if np.linalg.det(covariance) == 0.0: covariance = covariance - 10e-5 # Ensure the correlation of the model with itself is always 1.0 for i, _ in enumerate(covariance): covariance[i][i] = 1.0 # Use a numpy array instead of a matrix cov_matrix = np.array(covariance) # Get the inverse of the matrix using scipy's inv function cov_matrix_inv = inv(cov_matrix) # The component weights are the dot product between cov_matrix_inv and cor_vec weights = np.dot(cov_matrix_inv, correlations) weights = np.nan_to_num( weights, copy=True, nan=10e-5, posinf=10e-5, neginf=10e-5 ) weights_dot_correlations = np.dot(weights, correlations) # If the dot product of the weights with the correlations is # larger than 1.0, we assign a weight of 0.0 to the noise (to make # it numerically stable) if weights_dot_correlations > 1.0: noise_weight = np.array([0]) # Calculate the noise weight else: noise_weight = np.sqrt(1.0 - weights_dot_correlations) # Convert weights to a 1D array weights = np.array(weights).flatten() # Ensure noise_weight is a 1D array before concatenation noise_weight = np.array(noise_weight).flatten() # Finally, add the noise_weights to the weights variable. weights = np.concatenate((weights, noise_weight), axis=0) # Otherwise, the weight equals the correlation on that scale level and # the noise component weight equals 1 - this weight. This only occurs for # the weights calculation outside the radar domain where in the case of 1 # NWP model or ensemble member, no blending of multiple models has to take # place else: noise_weight = 1.0 - correlations weights = np.concatenate((correlations, noise_weight), axis=0) # Make sure weights are always a real number weights = np.nan_to_num(weights, copy=True, nan=10e-5, posinf=10e-5, neginf=10e-5) return weights # TODO: Where does this piece of code best fit: in utils or inside the class? def calculate_end_weights( previous_weights, timestep, n_timesteps, start_full_nwp_weight, model_only=False ): """Calculate the linear transition from the previous weights to the final weights (1.0 for NWP and 0.0 for the extrapolation and noise components). This method uses the BPS weights determination method to determine the corresponding noise. Parameters ---------- previous_weights : array-like The weights from the previous timestep. This weight will be used to ensure a linear transition takes place from the last weights at the timestep of start_full_nwp_weight and the final weights (1.0 for NWP and 0.0 for the extrapolation and noise components). timestep : int The timestep or sub timestep for which the weight is calculated. Only used when start_full_nwp_weight is not None. n_timesteps: int The total number of forecast timesteps in the forecast. start_full_nwp_weight : int The timestep, which should be smaller than timesteps, at which a linear transition takes place from the calculated weights to full NWP weight (and zero extrapolation and noise weight) to ensure the blending procedure becomes equal to the NWP forecast(s) at the last timestep of the blending procedure. If not provided, the blending stick to the theoretical weights provided by the chosen weights_method for a given lead time and skill of each blending component. model_only : bool If set to True, the weights will only be determined for the model and noise components. Returns ------- weights : array-like Array of shape [component+1, scale_level, ...] containing the weights to be used in STEPS blending for each original component plus an addtional noise component, scale level, and optionally along [y, x] dimensions. References ---------- :cite:`BPS2006` Notes ----- The weights in the BPS method can sum op to more than 1.0. """ weights = previous_weights[:-1, :].copy() if not model_only: if timestep > start_full_nwp_weight and timestep < n_timesteps: weights[0, :] = weights[0, :] - ( (timestep - start_full_nwp_weight) / (n_timesteps - start_full_nwp_weight) * weights[0, :] ) weights[1:, :] = ( 1.0 / weights[1:, :].shape[0] * ( weights[1:, :] + ( (timestep - start_full_nwp_weight) / (n_timesteps - start_full_nwp_weight) * (1.0 - weights[1:, :]) ) ) ) elif timestep > start_full_nwp_weight and timestep == n_timesteps: weights[0, :] = 0.0 # If one model or model member is provided to blend together, # the weight equals 1.0, otherwise the sum of the weights # equals 1.0. weights[1:, :] = 1.0 / weights[1:, :].shape[0] else: if timestep > start_full_nwp_weight and timestep < n_timesteps: weights = ( 1.0 / weights.shape[0] * ( weights + ( (timestep - start_full_nwp_weight) / (n_timesteps - start_full_nwp_weight) * (1.0 - weights) ) ) ) elif timestep > start_full_nwp_weight and timestep == n_timesteps: weights[:] = 1.0 / weights.shape[0] if weights.shape[0] > 1: # Calculate the weight of the noise component. # Original BPS2006 method in the following two lines (eq. 13) total_square_weights = np.sum(np.square(weights), axis=0) noise_weight = np.sqrt(1.0 - total_square_weights) # Finally, add the noise_weights to the weights variable. weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) else: noise_weight = 1.0 - weights weights = np.concatenate((weights, noise_weight), axis=0) return weights # TODO: Where does this piece of code best fit: in utils or inside the class? def blend_means_sigmas(means, sigmas, weights): """Calculate the blended means and sigmas, the normalization parameters needed to recompose the cascade. This procedure uses the weights of the blending of the normalized cascades and follows eq. 32 and 33 in BPS2004. Parameters ---------- means : array-like Array of shape [number_components, scale_level, ...] with the mean for each component (NWP, nowcasts, noise). sigmas : array-like Array of shape [number_components, scale_level, ...] with the standard deviation for each component. weights : array-like An array of shape [number_components + 1, scale_level, ...] containing the weights to be used in this routine for each component plus noise, scale level, and optionally [y, x] dimensions, obtained by calling either :py:func:`pysteps.blending.steps.calculate_weights_bps` or :py:func:`pysteps.blending.steps.calculate_weights_spn`. Returns ------- combined_means : array-like An array of shape [scale_level, ...] containing per scale level (cascade) the weighted combination of means from multiple components (NWP, nowcasts and noise). combined_sigmas : array-like An array of shape [scale_level, ...] similar to combined_means, but containing the standard deviations. """ # Check if the dimensions are the same diff_dims = weights.ndim - means.ndim if diff_dims: for i in range(diff_dims): means = np.expand_dims(means, axis=means.ndim) diff_dims = weights.ndim - sigmas.ndim if diff_dims: for i in range(diff_dims): sigmas = np.expand_dims(sigmas, axis=sigmas.ndim) # Weight should have one component more (the noise component) than the # means and sigmas. Check this if ( weights.shape[0] - means.shape[0] != 1 or weights.shape[0] - sigmas.shape[0] != 1 ): raise ValueError( "The weights array does not have one (noise) component more than mu and sigma" ) else: # Throw away the last component, which is the noise component weights = weights[:-1] # Combine (blend) the means and sigmas combined_means = np.zeros(weights.shape[1]) combined_sigmas = np.zeros(weights.shape[1]) total_weight = np.sum((weights), axis=0) for i in range(weights.shape[0]): combined_means += (weights[i] / total_weight) * means[i] combined_sigmas += (weights[i] / total_weight) * sigmas[i] return combined_means, combined_sigmas ================================================ FILE: pysteps/blending/utils.py ================================================ # -*- coding: utf-8 -*- """ pysteps.blending.utils ====================== Module with common utilities used by the blending methods. .. autosummary:: :toctree: ../generated/ stack_cascades blend_cascades recompose_cascade blend_optical_flows decompose_NWP compute_store_nwp_motion load_NWP compute_smooth_dilated_mask """ import datetime import warnings from pathlib import Path import numpy as np from pysteps.cascade import get_method as cascade_get_method from pysteps.cascade.bandpass_filters import filter_gaussian from pysteps.exceptions import MissingOptionalDependency from pysteps.utils import get_method as utils_get_method from pysteps.utils.check_norain import check_norain as new_check_norain try: import netCDF4 NETCDF4_IMPORTED = True except ImportError: NETCDF4_IMPORTED = False try: import cv2 CV2_IMPORTED = True except ImportError: CV2_IMPORTED = False def stack_cascades(R_d, donorm=True): """Stack the given cascades into a larger array. Parameters ---------- R_d : dict Dictionary containing a list of cascades obtained by calling a method implemented in pysteps.cascade.decomposition. donorm : bool If True, normalize the cascade levels before stacking. Returns ------- out : tuple A three-element tuple containing a four-dimensional array of stacked cascade levels and arrays of mean values and standard deviations for each cascade level. """ R_c = [] mu_c = [] sigma_c = [] for cascade in R_d: R_ = [] R_i = cascade["cascade_levels"] n_levels = R_i.shape[0] mu_ = np.asarray(cascade["means"]) sigma_ = np.asarray(cascade["stds"]) if donorm: for j in range(n_levels): R__ = (R_i[j, :, :] - mu_[j]) / sigma_[j] R_.append(R__) else: R_ = R_i R_c.append(np.stack(R_)) mu_c.append(mu_) sigma_c.append(sigma_) return np.stack(R_c), np.stack(mu_c), np.stack(sigma_c) def blend_cascades(cascades_norm, weights): """Calculate blended normalized cascades using STEPS weights following eq. 10 in :cite:`BPS2006`. Parameters ---------- cascades_norm : array-like Array of shape [number_components + 1, scale_level, ...] with the cascade for each component (NWP, nowcasts, noise) and scale level, obtained by calling a method implemented in pysteps.blending.utils.stack_cascades weights : array-like An array of shape [number_components + 1, scale_level, ...] containing the weights to be used in this routine for each component plus noise, scale level, and optionally [y, x] dimensions, obtained by calling a method implemented in pysteps.blending.steps.calculate_weights Returns ------- combined_cascade : array-like An array of shape [scale_level, y, x] containing per scale level (cascade) the weighted combination of cascades from multiple components (NWP, nowcasts and noise) to be used in STEPS blending. """ # check inputs if isinstance(cascades_norm, (list, tuple)): cascades_norm = np.stack(cascades_norm) if isinstance(weights, (list, tuple)): weights = np.asarray(weights) # check weights dimensions match number of sources num_sources = cascades_norm.shape[0] num_sources_klevels = cascades_norm.shape[1] num_weights = weights.shape[0] num_weights_klevels = weights.shape[1] if num_weights != num_sources: raise ValueError( "dimension mismatch between cascades and weights.\n" "weights dimension must match the number of components in cascades.\n" f"number of models={num_sources}, number of weights={num_weights}" ) if num_weights_klevels != num_sources_klevels: raise ValueError( "dimension mismatch between cascades and weights.\n" "weights cascade levels dimension must match the number of cascades in cascades_norm.\n" f"number of cascade levels={num_sources_klevels}, number of weights={num_weights_klevels}" ) # cascade_norm component, scales, y, x # weights component, scales, .... # Reshape weights to make the calculation possible with numpy all_c_wn = weights.reshape(num_weights, num_weights_klevels, 1, 1) * cascades_norm combined_cascade = np.sum(all_c_wn, axis=0) # combined_cascade [scale, ...] return combined_cascade def recompose_cascade(combined_cascade, combined_mean, combined_sigma): """Recompose the cascades into a transformed rain rate field. Parameters ---------- combined_cascade : array-like An array of shape [scale_level, y, x] containing per scale level (cascade) the weighted combination of cascades from multiple components (NWP, nowcasts and noise) to be used in STEPS blending. combined_mean : array-like An array of shape [scale_level, ...] similar to combined_cascade, but containing the normalization parameter mean. combined_sigma : array-like An array of shape [scale_level, ...] similar to combined_cascade, but containing the normalization parameter standard deviation. Returns ------- out: array-like A two-dimensional array containing the recomposed cascade. """ # Renormalize with the blended sigma and mean values renorm = ( combined_cascade * combined_sigma.reshape(combined_cascade.shape[0], 1, 1) ) + combined_mean.reshape(combined_mean.shape[0], 1, 1) # print(renorm.shape) out = np.sum(renorm, axis=0) # print(out.shape) return out def blend_optical_flows(flows, weights): """Combine advection fields using given weights. Following :cite:`BPS2006` the second level of the cascade is used for the weights Parameters ---------- flows : array-like A stack of multiple advenction fields having shape (S, 2, m, n), where flows[N, :, :, :] contains the motion vectors for source N. Advection fields for each source can be obtanined by calling any of the methods implemented in pysteps.motion and then stack all together weights : array-like An array of shape [number_sources] containing the weights to be used to combine the advection fields of each source. weights are modified to make their sum equal to one. Returns ------- out: ndarray Return the blended advection field having shape (2, m, n), where out[0, :, :] contains the x-components of the blended motion vectors and out[1, :, :] contains the y-components. The velocities are in units of pixels / timestep. """ # check inputs if isinstance(flows, (list, tuple)): flows = np.stack(flows) if isinstance(weights, (list, tuple)): weights = np.asarray(weights) # check weights dimensions match number of sources num_sources = flows.shape[0] num_weights = weights.shape[0] if num_weights != num_sources: raise ValueError( "dimension mismatch between flows and weights.\n" "weights dimension must match the number of flows.\n" f"number of flows={num_sources}, number of weights={num_weights}" ) # normalize weigths weights = weights / np.sum(weights) # flows dimension sources, 2, m, n # weights dimension sources # move source axis to last to allow broadcasting # TODO: Check if broadcasting has worked well all_c_wn = weights * np.moveaxis(flows, 0, -1) # sum uses last axis combined_flows = np.sum(all_c_wn, axis=-1) # combined_flows [2, m, n] return combined_flows def decompose_NWP( R_NWP, NWP_model, analysis_time, timestep, valid_times, output_path, num_cascade_levels=8, num_workers=1, decomp_method="fft", fft_method="numpy", domain="spatial", normalize=True, compute_stats=True, compact_output=True, ): """Decomposes the NWP forecast data into cascades and saves it in a netCDF file Parameters ---------- R_NWP: array-like Array of dimension (n_timesteps, x, y) containing the precipitation forecast from some NWP model. NWP_model: str The name of the NWP model analysis_time: numpy.datetime64 The analysis time of the NWP forecast. The analysis time is assumed to be a numpy.datetime64 type as imported by the pysteps importer timestep: int Timestep in minutes between subsequent NWP forecast fields valid_times: array_like Array containing the valid times of the NWP forecast fields. The times are assumed to be numpy.datetime64 types as imported by the pysteps importer. output_path: str The location where to save the file with the NWP cascade. Defaults to the path_workdir specified in the rcparams file. num_cascade_levels: int, optional The number of frequency bands to use. Must be greater than 2. Defaults to 8. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. Other Parameters ---------------- decomp_method: str, optional A string defining the decomposition method to use. Defaults to "fft". fft_method: str or tuple, optional A string or a (function,kwargs) tuple defining the FFT method to use (see :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". This option is not used if input_domain and output_domain are both set to "spectral". domain: {"spatial", "spectral"}, optional If "spatial", the output cascade levels are transformed back to the spatial domain by using the inverse FFT. If "spectral", the cascade is kept in the spectral domain. Defaults to "spatial". normalize: bool, optional If True, normalize the cascade levels to zero mean and unit variance. Requires that compute_stats is True. Implies that compute_stats is True. Defaults to False. compute_stats: bool, optional If True, the output dictionary contains the keys "means" and "stds" for the mean and standard deviation of each output cascade level. Defaults to False. compact_output: bool, optional Applicable if output_domain is "spectral". If set to True, only the parts of the Fourier spectrum with non-negligible filter weights are stored. Defaults to False. Returns ------- None """ if not NETCDF4_IMPORTED: raise MissingOptionalDependency( "netCDF4 package is required to save the decomposed NWP data, " "but it is not installed" ) # Make a NetCDF file output_date = f"{analysis_time.astype('datetime64[us]').astype(datetime.datetime):%Y%m%d%H%M%S}" outfn = Path(output_path) / f"cascade_{NWP_model}_{output_date}.nc" ncf = netCDF4.Dataset(outfn, "w", format="NETCDF4") # Express times relative to the zero time zero_time = np.datetime64("1970-01-01T00:00:00", "ns") valid_times = np.array(valid_times) - zero_time analysis_time = analysis_time - zero_time # Set attributes of decomposition method ncf.domain = domain ncf.normalized = int(normalize) ncf.compact_output = int(compact_output) ncf.analysis_time = int(analysis_time) ncf.timestep = int(timestep) # Create dimensions ncf.createDimension("time", R_NWP.shape[0]) ncf.createDimension("cascade_levels", num_cascade_levels) ncf.createDimension("x", R_NWP.shape[2]) ncf.createDimension("y", R_NWP.shape[1]) # Create variables (decomposed cascade, means and standard deviations) R_d = ncf.createVariable( "pr_decomposed", np.float32, ("time", "cascade_levels", "y", "x"), zlib=True, complevel=4, ) means = ncf.createVariable("means", np.float64, ("time", "cascade_levels")) stds = ncf.createVariable("stds", np.float64, ("time", "cascade_levels")) v_times = ncf.createVariable("valid_times", np.float64, ("time",)) v_times.units = "nanoseconds since 1970-01-01 00:00:00" # The valid times are saved as an array of floats, because netCDF files can't handle datetime types v_times[:] = np.array([np.float64(valid_times[i]) for i in range(len(valid_times))]) # Decompose the NWP data filter_g = filter_gaussian(R_NWP.shape[1:], num_cascade_levels) fft = utils_get_method(fft_method, shape=R_NWP.shape[1:], n_threads=num_workers) decomp_method, _ = cascade_get_method(decomp_method) for i in range(R_NWP.shape[0]): R_ = decomp_method( field=R_NWP[i, :, :], bp_filter=filter_g, fft_method=fft, input_domain=domain, output_domain=domain, normalize=normalize, compute_stats=compute_stats, compact_output=compact_output, ) # Save data to netCDF file # print(R_["cascade_levels"]) R_d[i, :, :, :] = R_["cascade_levels"] means[i, :] = R_["means"] stds[i, :] = R_["stds"] # Close the file ncf.close() def compute_store_nwp_motion( precip_nwp, oflow_method, analysis_time, nwp_model, output_path, ): """Computes, per forecast lead time, the velocity field of an NWP model field. Parameters ---------- precip_nwp: array-like Array of dimension (n_timesteps, x, y) containing the precipitation forecast from some NWP model. oflow_method: {'constant', 'darts', 'lucaskanade', 'proesmans', 'vet'}, optional An optical flow method from pysteps.motion.get_method. analysis_time: numpy.datetime64 The analysis time of the NWP forecast. The analysis time is assumed to be a numpy.datetime64 type as imported by the pysteps importer. nwp_model: str The name of the NWP model. output_path: str, optional The location where to save the file with the NWP velocity fields. Defaults to the path_workdir specified in the rcparams file. Returns ------- Nothing """ # Set the output file output_date = f"{analysis_time.astype('datetime64[us]').astype(datetime.datetime):%Y%m%d%H%M%S}" outfn = Path(output_path) / f"motion_{nwp_model}_{output_date}.npy" # Get the velocity field per time step v_nwp = np.zeros((precip_nwp.shape[0], 2, precip_nwp.shape[1], precip_nwp.shape[2])) # Loop through the timesteps. We need two images to construct a motion # field, so we can start from timestep 1. for t in range(1, precip_nwp.shape[0]): v_nwp[t] = oflow_method(precip_nwp[t - 1 : t + 1, :, :]) # Make timestep 0 the same as timestep 1. v_nwp[0] = v_nwp[1] assert v_nwp.ndim == 4, "v_nwp must be a four-dimensional array" # Save it as a numpy array np.save(outfn, v_nwp) def load_NWP(input_nc_path_decomp, input_path_velocities, start_time, n_timesteps): """Loads the decomposed NWP and velocity data from the netCDF files Parameters ---------- input_nc_path_decomp: str Path to the saved netCDF file containing the decomposed NWP data. input_path_velocities: str Path to the saved numpy binary file containing the estimated velocity fields from the NWP data. start_time: numpy.datetime64 The start time of the nowcasting. Assumed to be a numpy.datetime64 type n_timesteps: int Number of time steps to forecast Returns ------- R_d: list A list of dictionaries with each element in the list corresponding to a different time step. Each dictionary has the same structure as the output of the decomposition function uv: array-like Array of shape (timestep,2,m,n) containing the x- and y-components of the advection field for the (NWP) model field per forecast lead time. """ if not NETCDF4_IMPORTED: raise MissingOptionalDependency( "netCDF4 package is required to load the decomposed NWP data, " "but it is not installed" ) # Open the file ncf_decomp = netCDF4.Dataset(input_nc_path_decomp, "r", format="NETCDF4") velocities = np.load(input_path_velocities) decomp_dict = { "domain": ncf_decomp.domain, "normalized": bool(ncf_decomp.normalized), "compact_output": bool(ncf_decomp.compact_output), } # Convert the start time and the timestep to datetime64 and timedelta64 type zero_time = np.datetime64("1970-01-01T00:00:00", "ns") analysis_time = np.timedelta64(int(ncf_decomp.analysis_time), "ns") + zero_time timestep = ncf_decomp.timestep timestep = np.timedelta64(timestep, "m") valid_times = ncf_decomp.variables["valid_times"][:] valid_times = np.array( [np.timedelta64(int(valid_times[i]), "ns") for i in range(len(valid_times))] ) valid_times = valid_times + zero_time # Find the indices corresponding with the required start and end time start_i = (start_time - analysis_time) // timestep assert analysis_time + start_i * timestep == start_time end_i = start_i + n_timesteps + 1 # Check if the requested end time (the forecast horizon) is in the stored data. # If not, raise an error if end_i > ncf_decomp.variables["pr_decomposed"].shape[0]: raise IndexError( "The requested forecast horizon is outside the stored NWP forecast horizon. Either request a shorter forecast horizon or store a longer NWP forecast horizon" ) # Add the valid times to the output decomp_dict["valid_times"] = valid_times[start_i:end_i] # Slice the velocity fields with the start and end indices uv = velocities[start_i:end_i, :, :, :] # Initialise the list of dictionaries which will serve as the output (cf: the STEPS function) R_d = list() pr_decomposed = ncf_decomp.variables["pr_decomposed"][start_i:end_i, :, :, :] means = ncf_decomp.variables["means"][start_i:end_i, :] stds = ncf_decomp.variables["stds"][start_i:end_i, :] for i in range(n_timesteps + 1): decomp_dict["cascade_levels"] = np.ma.filled( pr_decomposed[i], fill_value=np.nan ) decomp_dict["means"] = np.ma.filled(means[i], fill_value=np.nan) decomp_dict["stds"] = np.ma.filled(stds[i], fill_value=np.nan) R_d.append(decomp_dict.copy()) ncf_decomp.close() return R_d, uv def check_norain(precip_arr, precip_thr=None, norain_thr=0.0): """ DEPRECATED use :py:mod:`pysteps.utils.check_norain.check_norain` in stead Parameters ---------- precip_arr: array-like Array containing the input precipitation field precip_thr: float, optional Specifies the threshold value for minimum observable precipitation intensity. If None, the minimum value over the domain is taken. norain_thr: float, optional Specifies the threshold value for the fraction of rainy pixels in precip_arr below which we consider there to be no rain. Standard set to 0.0 Returns ------- norain: bool Returns whether the fraction of rainy pixels is below the norain_thr threshold. """ warnings.warn( "pysteps.blending.utils.check_norain has been deprecated, use pysteps.utils.check_norain.check_norain instead" ) return new_check_norain(precip_arr, precip_thr, norain_thr, None) def compute_smooth_dilated_mask( original_mask, max_padding_size_in_px=0, gaussian_kernel_size=9, inverted=False, non_linear_growth_kernel_sizes=False, ): """ Compute a smooth dilated mask using Gaussian blur and dilation with varying kernel sizes. Parameters ---------- original_mask : array_like Two-dimensional boolean array containing the input mask. max_padding_size_in_px : int The maximum size of the padding in pixels. Default is 100. gaussian_kernel_size : int, optional Size of the Gaussian kernel to use for blurring, this should be an uneven number. This option ensures that the nan-fields are large enough to start the smoothing. Without it, the method will also be applied to local nan-values in the radar domain. Default is 9, which is generally a recommended number to work with. inverted : bool, optional Typically, the smoothed mask works from the outside of the radar domain inward, using the max_padding_size_in_px. If set to True, it works from the edge of the radar domain outward (generally not recommended). Default is False. non_linear_growth_kernel_sizes : bool, optional If True, use non-linear growth for kernel sizes. Default is False. Returns ------- final_mask : array_like The smooth dilated mask normalized to the range [0,1]. """ if not CV2_IMPORTED: raise MissingOptionalDependency( "CV2 package is required to transform the mask into a smoot mask." " Please install it using `pip install opencv-python`." ) if max_padding_size_in_px < 0: raise ValueError("max_padding_size_in_px must be greater than or equal to 0.") # Check if gaussian_kernel_size is an uneven number assert gaussian_kernel_size % 2 # Convert the original mask to uint8 numpy array and invert if needed array_2d = np.array(original_mask, dtype=np.uint8) if inverted: array_2d = np.bitwise_not(array_2d) # Rescale the 2D array values to 0-255 (black or white) rescaled_array = array_2d * 255 # Apply Gaussian blur to the rescaled array blurred_image = cv2.GaussianBlur( rescaled_array, (gaussian_kernel_size, gaussian_kernel_size), 0 ) # Apply binary threshold to negate the blurring effect _, binary_image = cv2.threshold(blurred_image, 128, 255, cv2.THRESH_BINARY) # Define kernel sizes if non_linear_growth_kernel_sizes: lin_space = np.linspace(0, np.sqrt(max_padding_size_in_px), 10) non_lin_space = np.power(lin_space, 2) kernel_sizes = list(set(non_lin_space.astype(np.uint8))) else: kernel_sizes = np.linspace(0, max_padding_size_in_px, 10, dtype=np.uint8) # Process each kernel size final_mask = np.zeros_like(binary_image, dtype=np.float64) for kernel_size in kernel_sizes: if kernel_size == 0: dilated_image = binary_image else: kernel = cv2.getStructuringElement( cv2.MORPH_ELLIPSE, (kernel_size, kernel_size) ) dilated_image = cv2.dilate(binary_image, kernel) # Convert the dilated image to a binary array _, binary_array = cv2.threshold(dilated_image, 128, 1, cv2.THRESH_BINARY) final_mask += binary_array final_mask = final_mask / final_mask.max() return final_mask ================================================ FILE: pysteps/cascade/__init__.py ================================================ # -*- coding: utf-8 -*- """ Methods for constructing bandpass filters and decomposing 2d precipitation fields into different spatial scales. """ from .interface import get_method ================================================ FILE: pysteps/cascade/bandpass_filters.py ================================================ """ pysteps.cascade.bandpass_filters ================================ Bandpass filters for separating different spatial scales from two-dimensional images in the frequency domain. The methods in this module implement the following interface:: filter_xxx(shape, n, optional arguments) where shape is the shape of the input field, respectively, and n is the number of frequency bands to use. The output of each filter function is a dictionary containing the following key-value pairs: .. tabularcolumns:: |p{1.8cm}|L| +-----------------+-----------------------------------------------------------+ | Key | Value | +=================+===========================================================+ | weights_1d | 2d array of shape (n, r) containing 1d filter weights for | | | each frequency band k=1,2,...,n | +-----------------+-----------------------------------------------------------+ | weights_2d | 3d array of shape (n, M, int(N/2)+1) containing the 2d | | | filter weights for each frequency band k=1,2,...,n | +-----------------+-----------------------------------------------------------+ | central_freqs | 1d array of shape n containing the central frequencies of | | | the filters | +-----------------+-----------------------------------------------------------+ | shape | the shape of the input field in the spatial domain | +-----------------+-----------------------------------------------------------+ where r = int(max(N, M)/2)+1 By default, the filter weights are normalized so that for any Fourier wavenumber they sum to one. Available filters ----------------- .. autosummary:: :toctree: ../generated/ filter_uniform filter_gaussian """ import numpy as np def filter_uniform(shape, n): """ A dummy filter with one frequency band covering the whole domain. The weights are set to one. Parameters ---------- shape: int or tuple The dimensions (height, width) of the input field. If shape is an int, the domain is assumed to have square shape. n: int Not used. Needed for compatibility with the filter interface. Returns ------- out: dict A dictionary containing the filter. """ del n # Unused out = {} try: height, width = shape except TypeError: height, width = (shape, shape) r_max = int(max(width, height) / 2) + 1 out["weights_1d"] = np.ones((1, r_max)) out["weights_2d"] = np.ones((1, height, int(width / 2) + 1)) out["central_freqs"] = None out["central_wavenumbers"] = None out["shape"] = shape return out def filter_gaussian( shape, n, gauss_scale=0.5, d=1.0, normalize=True, return_weight_funcs=False, include_mean=True, ): """ Implements a set of Gaussian bandpass filters in logarithmic frequency scale. Parameters ---------- shape: int or tuple The dimensions (height, width) of the input field. If shape is an int, the domain is assumed to have square shape. n: int The number of frequency bands to use. Must be greater than 2. gauss_scale: float Optional scaling parameter. Proportional to the standard deviation of the Gaussian weight functions. d: scalar, optional Sample spacing (inverse of the sampling rate). Defaults to 1. normalize: bool If True, normalize the weights so that for any given wavenumber they sum to one. return_weight_funcs: bool If True, add callable weight functions to the output dictionary with the key 'weight_funcs'. include_mean: bool If True, include the first Fourier wavenumber (corresponding to the field mean) to the first filter. Returns ------- out: dict A dictionary containing the bandpass filters corresponding to the specified frequency bands. References ---------- :cite:`PCH2018` """ if n < 3: raise ValueError("n must be greater than 2") try: height, width = shape except TypeError: height, width = (shape, shape) max_length = max(width, height) rx = np.s_[: int(width / 2) + 1] if (height % 2) == 1: ry = np.s_[-int(height / 2) : int(height / 2) + 1] else: ry = np.s_[-int(height / 2) : int(height / 2)] y_grid, x_grid = np.ogrid[ry, rx] dy = int(height / 2) if height % 2 == 0 else int(height / 2) + 1 r_2d = np.roll(np.sqrt(x_grid * x_grid + y_grid * y_grid), dy, axis=0) r_max = int(max_length / 2) + 1 r_1d = np.arange(r_max) wfs, central_wavenumbers = _gaussweights_1d( max_length, n, gauss_scale=gauss_scale, ) weights_1d = np.empty((n, r_max)) weights_2d = np.empty((n, height, int(width / 2) + 1)) for i, wf in enumerate(wfs): weights_1d[i, :] = wf(r_1d) weights_2d[i, :, :] = wf(r_2d) if normalize: weights_1d_sum = np.sum(weights_1d, axis=0) weights_2d_sum = np.sum(weights_2d, axis=0) for k in range(weights_2d.shape[0]): weights_1d[k, :] /= weights_1d_sum weights_2d[k, :, :] /= weights_2d_sum for i in range(len(wfs)): if i == 0 and include_mean: weights_1d[i, 0] = 1.0 weights_2d[i, 0, 0] = 1.0 else: weights_1d[i, 0] = 0.0 weights_2d[i, 0, 0] = 0.0 out = {"weights_1d": weights_1d, "weights_2d": weights_2d} out["shape"] = shape central_wavenumbers = np.array(central_wavenumbers) out["central_wavenumbers"] = central_wavenumbers # Compute frequencies central_freqs = 1.0 * central_wavenumbers / max_length central_freqs[0] = 1.0 / max_length central_freqs[-1] = 0.5 # Nyquist freq central_freqs = 1.0 * d * central_freqs out["central_freqs"] = central_freqs if return_weight_funcs: out["weight_funcs"] = wfs return out def _gaussweights_1d(l, n, gauss_scale=0.5): q = pow(0.5 * l, 1.0 / n) r = [(pow(q, k - 1), pow(q, k)) for k in range(1, n + 1)] r = [0.5 * (r_[0] + r_[1]) for r_ in r] def log_e(x): if len(np.shape(x)) > 0: res = np.empty(x.shape) res[x == 0] = 0.0 res[x > 0] = np.log(x[x > 0]) / np.log(q) else: if x == 0.0: res = 0.0 else: res = np.log(x) / np.log(q) return res class GaussFunc: def __init__(self, c, s): self.c = c self.s = s def __call__(self, x): x = log_e(x) - self.c return np.exp(-(x**2.0) / (2.0 * self.s**2.0)) weight_funcs = [] central_wavenumbers = [] for i, ri in enumerate(r): rc = log_e(ri) weight_funcs.append(GaussFunc(rc, gauss_scale)) central_wavenumbers.append(ri) return weight_funcs, central_wavenumbers ================================================ FILE: pysteps/cascade/decomposition.py ================================================ """ pysteps.cascade.decomposition ============================= Methods for decomposing two-dimensional fields into multiple spatial scales and recomposing the individual scales to obtain the original field. The methods in this module implement the following interface:: decomposition_xxx(field, bp_filter, **kwargs) recompose_xxx(decomp, **kwargs) where field is the input field and bp_filter is a dictionary returned by a filter method implemented in :py:mod:`pysteps.cascade.bandpass_filters`. The decomp argument is a decomposition obtained by calling decomposition_xxx. Optional parameters can be passed in the keyword arguments. The output of each method is a dictionary with the following key-value pairs: +-------------------+----------------------------------------------------------+ | Key | Value | +===================+==========================================================+ | cascade_levels | three-dimensional array of shape (k,m,n), where k is the | | | number of cascade levels and the input fields have shape | | | (m,n) | | | if domain is "spectral" and compact output is requested | | | (see the table below), cascade_levels contains a list of | | | one-dimensional arrays | +-------------------+----------------------------------------------------------+ | domain | domain of the cascade decomposition: "spatial" or | | | "spectral" | +-------------------+----------------------------------------------------------+ | normalized | are the cascade levels normalized: True or False | +-------------------+----------------------------------------------------------+ The following key-value pairs are optional. They are included in the output if ``kwargs`` contains the "compute_stats" key with value set to True: +-------------------+----------------------------------------------------------+ | Key | Value | +===================+==========================================================+ | means | list of mean values for each cascade level | +-------------------+----------------------------------------------------------+ | stds | list of standard deviations for each cascade level | +-------------------+----------------------------------------------------------+ The following key-value pairs are included in the output if ``kwargs`` contains the key "output_domain" with value set to "spectral": +-------------------+----------------------------------------------------------+ | Key | Value | +===================+==========================================================+ | compact_output | True or False. If set to True, only the parts of the | | | Fourier spectrum with non-negligible filter weights are | | | stored. | +-------------------+----------------------------------------------------------+ | weight_masks | Applicable if compact_output is True. Contains a list of | | | masks, where a True value indicates that the | | | corresponding Fourier wavenumber is included in the | | | decomposition | +-------------------+----------------------------------------------------------+ Available methods ----------------- .. autosummary:: :toctree: ../generated/ decomposition_fft recompose_fft """ import numpy as np from pysteps import utils def decomposition_fft(field, bp_filter, **kwargs): """ Decompose a two-dimensional input field into multiple spatial scales by using the Fast Fourier Transform (FFT) and a set of bandpass filters. Parameters ---------- field: array_like Two-dimensional array containing the input field. All values are required to be finite. bp_filter: dict A filter returned by a method implemented in :py:mod:`pysteps.cascade.bandpass_filters`. Other Parameters ---------------- fft_method: str or tuple A string or a (function,kwargs) tuple defining the FFT method to use (see :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". This option is not used if input_domain and output_domain are both set to "spectral". normalize: bool If True, normalize the cascade levels to zero mean and unit variance. Requires that compute_stats is True. Implies that compute_stats is True. Defaults to False. mask: array_like Optional mask to use for computing the statistics for the cascade levels. Pixels with mask==False are excluded from the computations. This option is not used if output domain is "spectral". input_domain: {"spatial", "spectral"} The domain of the input field. If "spectral", the input is assumed to be in the spectral domain. Defaults to "spatial". output_domain: {"spatial", "spectral"} If "spatial", the output cascade levels are transformed back to the spatial domain by using the inverse FFT. If "spectral", the cascade is kept in the spectral domain. Defaults to "spatial". compute_stats: bool If True, the output dictionary contains the keys "means" and "stds" for the mean and standard deviation of each output cascade level. Defaults to False. compact_output: bool Applicable if output_domain is "spectral". If set to True, only the parts of the Fourier spectrum with non-negligible filter weights are stored. Defaults to False. subtract_mean: bool If set to True, subtract the mean value before the decomposition and store it to the output dictionary. Applicable if input_domain is "spatial". Defaults to False. Returns ------- out: ndarray A dictionary described in the module documentation. The number of cascade levels is determined from the filter (see :py:mod:`pysteps.cascade.bandpass_filters`). """ fft = kwargs.get("fft_method", "numpy") if isinstance(fft, str): fft = utils.get_method(fft, shape=field.shape) normalize = kwargs.get("normalize", False) mask = kwargs.get("mask", None) input_domain = kwargs.get("input_domain", "spatial") output_domain = kwargs.get("output_domain", "spatial") compute_stats = kwargs.get("compute_stats", True) compact_output = kwargs.get("compact_output", False) subtract_mean = kwargs.get("subtract_mean", False) if normalize and not compute_stats: compute_stats = True if len(field.shape) != 2: raise ValueError("The input is not two-dimensional array") if mask is not None and mask.shape != field.shape: raise ValueError( "Dimension mismatch between field and mask:" + "field.shape=" + str(field.shape) + ",mask.shape" + str(mask.shape) ) if field.shape[0] != bp_filter["weights_2d"].shape[1]: raise ValueError( "dimension mismatch between field and bp_filter: " + "field.shape[0]=%d , " % field.shape[0] + "bp_filter['weights_2d'].shape[1]" "=%d" % bp_filter["weights_2d"].shape[1] ) if ( input_domain == "spatial" and int(field.shape[1] / 2) + 1 != bp_filter["weights_2d"].shape[2] ): raise ValueError( "Dimension mismatch between field and bp_filter: " "int(field.shape[1]/2)+1=%d , " % (int(field.shape[1] / 2) + 1) + "bp_filter['weights_2d'].shape[2]" "=%d" % bp_filter["weights_2d"].shape[2] ) if ( input_domain == "spectral" and field.shape[1] != bp_filter["weights_2d"].shape[2] ): raise ValueError( "Dimension mismatch between field and bp_filter: " "field.shape[1]=%d , " % (field.shape[1] + 1) + "bp_filter['weights_2d'].shape[2]" "=%d" % bp_filter["weights_2d"].shape[2] ) if output_domain != "spectral": compact_output = False if np.any(~np.isfinite(field)): raise ValueError("field contains non-finite values") result = {} means = [] stds = [] if subtract_mean and input_domain == "spatial": field_mean = np.mean(field) field = field - field_mean result["field_mean"] = field_mean if input_domain == "spatial": field_fft = fft.rfft2(field) else: field_fft = field if output_domain == "spectral" and compact_output: weight_masks = [] field_decomp = [] for k in range(len(bp_filter["weights_1d"])): field_ = field_fft * bp_filter["weights_2d"][k, :, :] if output_domain == "spatial" or (compute_stats and mask is not None): field__ = fft.irfft2(field_) else: field__ = field_ if compute_stats: if output_domain == "spatial" or (compute_stats and mask is not None): if mask is not None: masked_field = field__[mask] else: masked_field = field__ mean = np.mean(masked_field) std = np.std(masked_field) else: mean = utils.spectral.mean(field_, bp_filter["shape"]) std = utils.spectral.std(field_, bp_filter["shape"]) means.append(mean) stds.append(std) if output_domain == "spatial": field_ = field__ if normalize: field_ = (field_ - mean) / std if output_domain == "spectral" and compact_output: weight_mask = bp_filter["weights_2d"][k, :, :] > 1e-12 field_ = field_[weight_mask] weight_masks.append(weight_mask) field_decomp.append(field_) result["domain"] = output_domain result["normalized"] = normalize result["compact_output"] = compact_output if output_domain == "spatial" or not compact_output: field_decomp = np.stack(field_decomp) result["cascade_levels"] = field_decomp if output_domain == "spectral" and compact_output: result["weight_masks"] = np.stack(weight_masks) if compute_stats: result["means"] = means result["stds"] = stds return result def recompose_fft(decomp, **kwargs): """ Recompose a cascade obtained with decomposition_fft by inverting the normalization and summing the cascade levels. Parameters ---------- decomp: dict A cascade decomposition returned by decomposition_fft. Returns ------- out: numpy.ndarray A two-dimensional array containing the recomposed cascade. """ levels = decomp["cascade_levels"] if decomp["normalized"]: mu = decomp["means"] sigma = decomp["stds"] if not decomp["normalized"] and not ( decomp["domain"] == "spectral" and decomp["compact_output"] ): result = np.sum(levels, axis=0) else: if decomp["compact_output"]: weight_masks = decomp["weight_masks"] result = np.zeros(weight_masks.shape[1:], dtype=complex) for i in range(len(levels)): if decomp["normalized"]: result[weight_masks[i]] += levels[i] * sigma[i] + mu[i] else: result[weight_masks[i]] += levels[i] else: result = [levels[i] * sigma[i] + mu[i] for i in range(len(levels))] result = np.sum(np.stack(result), axis=0) if "field_mean" in decomp: result += decomp["field_mean"] return result ================================================ FILE: pysteps/cascade/interface.py ================================================ """ pysteps.cascade.interface ========================= Interface for the cascade module. .. autosummary:: :toctree: ../generated/ get_method """ from pysteps.cascade import decomposition, bandpass_filters _cascade_methods = dict() _cascade_methods["fft"] = (decomposition.decomposition_fft, decomposition.recompose_fft) _cascade_methods["gaussian"] = bandpass_filters.filter_gaussian _cascade_methods["uniform"] = bandpass_filters.filter_uniform def get_method(name): """ Return a callable function for the bandpass filter or cascade decomposition method corresponding to the given name. For the latter, two functions are returned: the first is for the decomposing and the second is for recomposing the cascade. Filter methods: +-------------------+------------------------------------------------------+ | Name | Description | +===================+======================================================+ | gaussian | implementation of bandpass filter using Gaussian | | | weights | +-------------------+------------------------------------------------------+ | uniform | implementation of a filter where all weights are set | | | to one | +-------------------+------------------------------------------------------+ Decomposition/recomposition methods: +-------------------+------------------------------------------------------+ | Name | Description | +===================+======================================================+ | fft | decomposition into multiple spatial scales based on | | | the fast Fourier Transform (FFT) and a set of | | | bandpass filters | +-------------------+------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available names:" + str(list(_cascade_methods.keys())) ) from None try: return _cascade_methods[name] except KeyError: raise ValueError( "Unknown method {}\n".format(name) + "The available methods are:" + str(list(_cascade_methods.keys())) ) from None ================================================ FILE: pysteps/datasets.py ================================================ # -*- coding: utf-8 -*- """ pysteps.datasets ================ Utilities to download the pysteps data and to create a default pysteps configuration file pointing to that data. .. autosummary:: :toctree: ../generated/ download_pysteps_data create_default_pystepsrc info load_dataset """ import gzip import json import os import shutil import sys import time from datetime import datetime, timedelta from logging.handlers import RotatingFileHandler from tempfile import NamedTemporaryFile, TemporaryDirectory from urllib import request from urllib.error import HTTPError from zipfile import ZipFile from jsmin import jsmin import pysteps from pysteps import io from pysteps.exceptions import DirectoryNotEmpty from pysteps.utils import conversion # "event name" , "%Y%m%d%H%M" _precip_events = { "fmi": "201609281445", "fmi2": "201705091045", "mch": "201505151545", "mch2": "201607112045", "mch3": "201701310945", "opera": "201808241800", "knmi": "201008260000", "bom": "201806161000", "mrms": "201906100000", } _data_sources = { "fmi": "Finish Meteorological Institute", "mch": "MeteoSwiss", "bom": "Australian Bureau of Meteorology", "knmi": "Royal Netherlands Meteorological Institute", "opera": "OPERA", "mrms": "NSSL's Multi-Radar/Multi-Sensor System", } # Include this function here to avoid a dependency on pysteps.__init__.py def _decode_filesystem_path(path): if not isinstance(path, str): return path.decode(sys.getfilesystemencoding()) else: return path def info(): """ Describe the available datasets in the pysteps example data. >>> from pysteps import datasets >>> datasets.info() """ print("\nAvailable datasets:\n") print(f"{'Case':<8} {'Event date':<22} {'Source':<45}\n") for case_name, case_date in _precip_events.items(): _source = "".join([i for i in case_name if not i.isdigit()]) _source = _data_sources[_source] _case_date = datetime.strptime(_precip_events[case_name], "%Y%m%d%H%M") _case_date = datetime.strftime(_case_date, "%Y-%m-%d %H:%M UTC") print(f"{case_name:<8} {_case_date:<22} {_source:<45}") class ShowProgress(object): """ Class used to report the download progress. Usage:: >>> from urllib import request >>> pbar = ShowProgress() >>> request.urlretrieve("http://python.org/", "/tmp/index.html", pbar) >>> pbar.end() """ def __init__(self, bar_length=20): self.prev_msg_width = 0 self.init_time = None self.total_size = None self._progress_bar_length = bar_length def _clear_line(self): sys.stdout.write("\b" * self.prev_msg_width) sys.stdout.write("\r") def _print(self, msg): self.prev_msg_width = len(msg) sys.stdout.write(msg) def __call__(self, count, block_size, total_size, exact=True): self._clear_line() downloaded_size = count * block_size / (1024**2) if self.total_size is None and total_size > 0: self.total_size = total_size / (1024**2) if count == 0: self.init_time = time.time() progress_msg = "" else: if self.total_size is not None: progress = count * block_size / total_size block = int(round(self._progress_bar_length * progress)) elapsed_time = time.time() - self.init_time eta = (elapsed_time / progress - elapsed_time) / 60 bar_str = "#" * block + "-" * (self._progress_bar_length - block) if exact: downloaded_msg = ( f"({downloaded_size:.1f} Mb / {self.total_size:.1f} Mb)" ) else: downloaded_msg = ( f"(~{downloaded_size:.0f} Mb/ {self.total_size:.0f} Mb)" ) progress_msg = ( f"Progress: [{bar_str}]" + downloaded_msg + f" - Time left: {int(eta):d}:{int(eta * 60)} [m:s]" ) else: progress_msg = ( f"Progress: ({downloaded_size:.1f} Mb)" f" - Time left: unknown" ) self._print(progress_msg) @staticmethod def end(message="Download complete"): sys.stdout.write("\n" + message + "\n") def download_mrms_data(dir_path, initial_date, final_date, timestep=2, nodelay=False): """ Download a small dataset with 6 hours of the NSSL's Multi-Radar/Multi-Sensor System ([MRMS](https://www.nssl.noaa.gov/projects/mrms/)) precipitation product (grib format). All the available files in the archive in the indicated time period (`initial_date` to `final_date`) are downloaded. By default, the timestep between files downloaded is 2 min. If the `timestep` is exactly divisible by 2 min, the immediately lower multiple is used. For example, if `timestep=5min`, the value is lowered to 4 min. Note ---- To reduce the load on the archive's server, an internal delay of 5 seconds every 30 files downloaded is implemented. This delay can be disabled by setting `nodelay=True`. Parameters ---------- dir_path: str Path to directory where the MRMS data is be placed. If None, the default location defined in the pystepsrc file is used. The files are archived following the folder structure defined in the pystepsrc file. If the directory exists existing MRMS files may be overwritten. initial_date: datetime Beginning of the date period. final_date: datetime End of the date period. timestep: int or timedelta Timestep between downloaded files in minutes. nodelay: bool Do not implement a 5-seconds delay every 30 files downloaded. """ if dir_path is None: data_source = pysteps.rcparams.data_sources["mrms"] dir_path = data_source["root_path"] if not isinstance(timestep, (int, timedelta)): raise TypeError( "'timestep' must be an integer or a timedelta object." f"Received: {type(timestep)}" ) if isinstance(timestep, int): timestep = timedelta(seconds=timestep * 60) if timestep.total_seconds() < 120: raise ValueError( "The time step should be greater than 2 minutes." f"Received: {timestep.total_seconds()}" ) _remainder = timestep % timedelta(seconds=120) timestep -= _remainder if not os.path.isdir(dir_path): os.makedirs(dir_path) if nodelay: def delay(_counter): return 0 else: def delay(_counter): if _counter >= 30: _counter = 0 time.sleep(5) return _counter archive_url = "https://mtarchive.geol.iastate.edu" print(f"Downloading MRMS data from {archive_url}") current_date = initial_date counter = 0 while current_date <= final_date: counter = delay(counter) sub_dir = os.path.join(dir_path, datetime.strftime(current_date, "%Y/%m/%d")) if not os.path.isdir(sub_dir): os.makedirs(sub_dir) # Generate files URL from https://mtarchive.geol.iastate.edu dest_file_name = datetime.strftime( current_date, "PrecipRate_00.00_%Y%m%d-%H%M%S.grib2" ) rel_url_fmt = ( "/%Y/%m/%d" "/mrms/ncep/PrecipRate" "/PrecipRate_00.00_%Y%m%d-%H%M%S.grib2.gz" ) file_url = archive_url + datetime.strftime(current_date, rel_url_fmt) try: print(f"Downloading {file_url} ", end="") tmp_file_name, _ = request.urlretrieve(file_url) print("DONE") dest_file_path = os.path.join(sub_dir, dest_file_name) # Uncompress the data with gzip.open(tmp_file_name, "rb") as f_in: with open(dest_file_path, "wb") as f_out: shutil.copyfileobj(f_in, f_out) current_date = current_date + timedelta(seconds=60 * 2) counter += 1 except HTTPError as err: print(err) def download_pysteps_data(dir_path, force=True): """ Download pysteps data from github. Parameters ---------- dir_path: str Path to directory where the psyteps data will be placed. force: bool If the destination directory exits and force=False, a DirectoryNotEmpty exception if raised. If force=True, the data will we downloaded in the destination directory and may override existing files. """ # Check if directory exists but is not empty if os.path.exists(dir_path) and os.path.isdir(dir_path): if os.listdir(dir_path) and not force: raise DirectoryNotEmpty( dir_path + "is not empty.\n" "Set force=True force the extraction of the files." ) else: os.makedirs(dir_path) # NOTE: # The http response from github can either contain Content-Length (size of the file) # or use chunked Transfer-Encoding. # If Transfer-Encoding is chunked, then the Content-Length is not available since # the content is dynamically generated and we can't know the length a priori easily. pbar = ShowProgress() print("Downloading pysteps-data from github.") tmp_file_name, _ = request.urlretrieve( "https://github.com/pySTEPS/pysteps-data/archive/master.zip", reporthook=pbar, ) pbar.end(message="Download complete\n") with ZipFile(tmp_file_name, "r") as zip_obj: tmp_dir = TemporaryDirectory() # Extract all the contents of zip file in the temp directory common_path = os.path.commonprefix(zip_obj.namelist()) zip_obj.extractall(tmp_dir.name) shutil.copytree( os.path.join(tmp_dir.name, common_path), dir_path, dirs_exist_ok=True ) def create_default_pystepsrc( pysteps_data_dir, config_dir=None, file_name="pystepsrc", dryrun=False ): """ Create a default configuration file pointing to the pysteps data directory. If the configuration file already exists, it will backup the existing file by appending the extensions '.1', '.2', up to '.5.' to the filename. A maximum of 5 files are kept. .2, up to app.log.5. File rotation is implemented for the backup files. For example, if the default configuration filename is 'pystepsrc' and the files pystepsrc, pystepsrc.1, pystepsrc.2, etc. exist, they are renamed to respectively pystepsrc.1, pystepsrc.2, pystepsrc.2, etc. Finally, after the existing files are backed up, the new configuration file is written. Parameters ---------- pysteps_data_dir: str Path to the directory with the pysteps data. config_dir: str Destination directory for the configuration file. Default values: $HOME/.pysteps (unix and Mac OS X) or $USERPROFILE/pysteps (windows). The directory is created if it does not exists. file_name: str Configuration file name. `pystepsrc` by default. dryrun: bool Do not create the parameter file, nor create backups of existing files. No changes are made in the file system. It just returns the file path. Returns ------- dest_path: str Configuration file path. """ pysteps_lib_root = os.path.dirname(_decode_filesystem_path(pysteps.__file__)) # Load the library built-in configuration file with open(os.path.join(pysteps_lib_root, "pystepsrc"), "r") as f: rcparams_json = json.loads(jsmin(f.read())) for key, value in rcparams_json["data_sources"].items(): value["root_path"] = os.path.abspath( os.path.join(pysteps_data_dir, value["root_path"]) ) if config_dir is None: home_dir = os.path.expanduser("~") if os.name == "nt": subdir = "pysteps" else: subdir = ".pysteps" config_dir = os.path.join(home_dir, subdir) dest_path = os.path.join(config_dir, file_name) if not dryrun: if not os.path.isdir(config_dir): os.makedirs(config_dir) # Backup existing configuration files if it exists and rotate previous backups if os.path.isfile(dest_path): RotatingFileHandler(dest_path, backupCount=6).doRollover() with open(dest_path, "w") as f: json.dump(rcparams_json, f, indent=4) return os.path.normpath(dest_path) def load_dataset(case="fmi", frames=14): """ Load a sequence of radar composites from the pysteps example data. To print the available datasets run >>> from pysteps import datasets >>> datasets.info() This function load by default 14 composites, corresponding to a 1h and 10min time window. For example, the first two composites can be used to obtain the motion field of the precipitation pattern, while the remaining twelve composites can be used to evaluate the quality of our forecast. Calling this function requires the pysteps-data installed, otherwise an exception is raised. To install the pysteps example data check the `example_data` section. Parameters ---------- case: str Case to load. frames: int Number composites (radar images). Max allowed value: 24 (35 for MRMS product) Default: 14 Returns ------- rainrate: array-like Precipitation data in mm/h. Dimensions: [time, lat, lon] metadata: dict The metadata observations attributes. timestep: number Time interval between composites in minutes. """ case = case.lower() if case == "mrms": max_frames = 36 else: max_frames = 24 if frames > max_frames: raise ValueError( f"The number of frames should be smaller than {max_frames + 1}" ) case_date = datetime.strptime(_precip_events[case], "%Y%m%d%H%M") source = "".join([i for i in case if not i.isdigit()]) data_source = pysteps.rcparams.data_sources[source] # Find the input files from the archive file_names = io.archive.find_by_date( case_date, data_source["root_path"], data_source["path_fmt"], data_source["fn_pattern"], data_source["fn_ext"], data_source["timestep"], num_prev_files=0, num_next_files=frames - 1, ) if None in file_names[0]: raise FileNotFoundError(f"Error loading {case} case. Some files are missing.") # Read the radar composites importer = io.get_method(data_source["importer"], "importer") importer_kwargs = data_source["importer_kwargs"] reflectivity, _, metadata = io.read_timeseries( file_names, importer, **importer_kwargs ) # Convert to rain rate precip, metadata = conversion.to_rainrate(reflectivity, metadata) return precip, metadata, data_source["timestep"] ================================================ FILE: pysteps/decorators.py ================================================ # -*- coding: utf-8 -*- """ pysteps.decorators ================== Decorators used to define reusable building blocks that can change or extend the behavior of some functions in pysteps. .. autosummary:: :toctree: ../generated/ postprocess_import check_input_frames prepare_interpolator memoize """ import inspect import uuid import warnings from collections import defaultdict from functools import wraps import numpy as np def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text): """ Update the functions docstrings by replacing the `{extra_kwargs_doc}` occurences in the docstring by the `extra_kwargs_doc_text` value. """ # Clean up indentation from docstrings for the # docstrings to be merged correctly. extra_kwargs_doc = inspect.cleandoc(extra_kwargs_doc_text) target_func.__doc__ = inspect.cleandoc(target_func.__doc__) # Add extra kwargs docstrings target_func.__doc__ = target_func.__doc__.format_map( defaultdict(str, extra_kwargs_doc=extra_kwargs_doc) ) return target_func def postprocess_import(fillna=np.nan, dtype="double"): """ Postprocess the imported precipitation data. Operations: - Allow type casting (dtype keyword) - Set invalid or missing data to predefined value (fillna keyword) This decorator replaces the text "{extra_kwargs}" in the function's docstring with the documentation of the keywords used in the postprocessing. The additional docstrings are added as "Other Parameters" in the importer function. Parameters ---------- dtype: str Default data type for precipitation. Double precision by default. fillna: float or np.nan Default value used to represent the missing data ("No Coverage"). By default, np.nan is used. If the importer returns a MaskedArray, all the masked values are set to the fillna value. If a numpy array is returned, all the invalid values (nan and inf) are set to the fillna value. """ def _postprocess_import(importer): @wraps(importer) def _import_with_postprocessing(*args, **kwargs): precip, *other_args = importer(*args, **kwargs) _dtype = kwargs.get("dtype", dtype) accepted_precisions = ["float32", "float64", "single", "double"] if _dtype not in accepted_precisions: raise ValueError( "The selected precision does not correspond to a valid value." "The accepted values are: " + str(accepted_precisions) ) if isinstance(precip, np.ma.MaskedArray): invalid_mask = np.ma.getmaskarray(precip) precip.data[invalid_mask] = fillna else: # If plain numpy arrays are used, the importers should indicate # the invalid values with np.nan. _fillna = kwargs.get("fillna", fillna) if _fillna is not np.nan: mask = ~np.isfinite(precip) precip[mask] = _fillna return (precip.astype(_dtype),) + tuple(other_args) extra_kwargs_doc = """ Other Parameters ---------------- dtype: str Data-type to which the array is cast. Valid values: "float32", "float64", "single", and "double". fillna: float or np.nan Value used to represent the missing data ("No Coverage"). By default, np.nan is used. """ _add_extra_kwrds_to_docstrings(_import_with_postprocessing, extra_kwargs_doc) return _import_with_postprocessing return _postprocess_import def check_input_frames( minimum_input_frames=2, maximum_input_frames=np.inf, just_ndim=False ): """ Check that the input_images used as inputs in the optical-flow methods have the correct shape (t, x, y ). """ def _check_input_frames(motion_method_func): @wraps(motion_method_func) def new_function(*args, **kwargs): """ Return new function with the checks prepended to the target motion_method_func function. """ input_images = args[0] if input_images.ndim != 3: raise ValueError( "input_images dimension mismatch.\n" f"input_images.shape: {str(input_images.shape)}\n" "(t, x, y ) dimensions expected" ) if not just_ndim: num_of_frames = input_images.shape[0] if minimum_input_frames < num_of_frames > maximum_input_frames: raise ValueError( f"input_images frames {num_of_frames} mismatch.\n" f"Minimum frames: {minimum_input_frames}\n" f"Maximum frames: {maximum_input_frames}\n" ) return motion_method_func(*args, **kwargs) return new_function return _check_input_frames def prepare_interpolator(nchunks=4): """ Check that all the inputs have the correct shape, and that all values are finite. It also split the destination grid in `nchunks` parts, and process each part independently. """ def _preamble_interpolation(interpolator): @wraps(interpolator) def _interpolator_with_preamble(xy_coord, values, xgrid, ygrid, **kwargs): nonlocal nchunks # https://stackoverflow.com/questions/5630409/ values = values.copy() xy_coord = xy_coord.copy() input_ndims = values.ndim input_nvars = 1 if input_ndims == 1 else values.shape[1] input_nsamples = values.shape[0] coord_ndims = xy_coord.ndim coord_nsamples = xy_coord.shape[0] grid_shape = (ygrid.size, xgrid.size) if np.any(~np.isfinite(values)): raise ValueError("argument 'values' contains non-finite values") if np.any(~np.isfinite(xy_coord)): raise ValueError("argument 'xy_coord' contains non-finite values") if input_ndims > 2: raise ValueError( "argument 'values' must have 1 (n) or 2 dimensions (n, m), " f"but it has {input_ndims}" ) if not coord_ndims == 2: raise ValueError( "argument 'xy_coord' must have 2 dimensions (n, 2), " f"but it has {coord_ndims}" ) if not input_nsamples == coord_nsamples: raise ValueError( "the number of samples in argument 'values' does not match the " f"number of coordinates {input_nsamples}!={coord_nsamples}" ) # only one sample, return uniform output if input_nsamples == 1: output_array = np.ones((input_nvars,) + grid_shape) for n, v in enumerate(values[0, ...]): output_array[n, ...] *= v return output_array.squeeze() # all equal elements, return uniform output if values.max() == values.min(): return np.ones((input_nvars,) + grid_shape) * values.ravel()[0] # split grid in n chunks nchunks = int(kwargs.get("nchunks", nchunks) ** 0.5) if nchunks > 1: subxgrids = np.array_split(xgrid, nchunks) subxgrids = [x for x in subxgrids if x.size > 0] subygrids = np.array_split(ygrid, nchunks) subygrids = [y for y in subygrids if y.size > 0] # generate a unique identifier to be used for caching # intermediate results kwargs["hkey"] = uuid.uuid1().int else: subxgrids = [xgrid] subygrids = [ygrid] interpolated = np.zeros((input_nvars,) + grid_shape) indx = 0 for subxgrid in subxgrids: deltax = subxgrid.size indy = 0 for subygrid in subygrids: deltay = subygrid.size interpolated[:, indy : (indy + deltay), indx : (indx + deltax)] = ( interpolator(xy_coord, values, subxgrid, subygrid, **kwargs) ) indy += deltay indx += deltax return interpolated.squeeze() extra_kwargs_doc = """ nchunks: int, optional Split and process the destination grid in nchunks. Useful for large grids to limit the memory footprint. """ _add_extra_kwrds_to_docstrings(_interpolator_with_preamble, extra_kwargs_doc) return _interpolator_with_preamble return _preamble_interpolation def memoize(maxsize=10): """ Add a Least Recently Used (LRU) cache to any function. Caching is purely based on the optional keyword argument 'hkey', which needs to be a hashable. Parameters ---------- maxsize: int, optional The maximum number of elements stored in the LRU cache. """ def _memoize(func): cache = dict() hkeys = [] @wraps(func) def _func_with_cache(*args, **kwargs): hkey = kwargs.pop("hkey", None) if hkey in cache: return cache[hkey] result = func(*args, **kwargs) if hkey is not None: cache[hkey] = result hkeys.append(hkey) if len(hkeys) > maxsize: cache.pop(hkeys.pop(0)) return result return _func_with_cache return _memoize def deprecate_args(old_new_args, deprecation_release): """ Support deprecated argument names while issuing deprecation warnings. Parameters ---------- old_new_args: dict[str, str] Mapping from old to new argument names. deprecation_release: str Specify which future release will convert this warning into an error. """ def _deprecate(func): @wraps(func) def wrapper(*args, **kwargs): kwargs_names = list(kwargs.keys()) for key_old in kwargs_names: if key_old in old_new_args: key_new = old_new_args[key_old] kwargs[key_new] = kwargs.pop(key_old) warnings.warn( f"Argument '{key_old}' has been renamed to '{key_new}'. " f"This will raise a TypeError in pysteps {deprecation_release}.", FutureWarning, ) return func(*args, **kwargs) return wrapper return _deprecate ================================================ FILE: pysteps/downscaling/__init__.py ================================================ # -*- coding: utf-8 -*- """Implementations of deterministic and ensemble downscaling methods.""" from pysteps.downscaling.interface import get_method ================================================ FILE: pysteps/downscaling/interface.py ================================================ """ pysteps.downscaling.interface ============================= Interface for the downscaling module. It returns a callable function for computing downscaling. .. autosummary:: :toctree: ../generated/ get_method """ from pysteps.downscaling import rainfarm _downscale_methods = dict() _downscale_methods["rainfarm"] = rainfarm.downscale def get_method(name): """ Return a callable function for computing downscaling. Description: Return a callable function for computing deterministic or ensemble precipitation downscaling. Implemented methods: +-----------------+-------------------------------------------------------+ | Name | Description | +=================+=======================================================+ | rainfarm | the rainfall downscaling by a filtered autoregressive | | | model (RainFARM) method developed in | | | :cite:`Rebora2006` | +-----------------+-------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available names:" + str(list(_downscale_methods.keys())) ) from None try: return _downscale_methods[name] except KeyError: raise ValueError( "Unknown downscaling method {}\n".format(name) + "The available methods are:" + str(list(_downscale_methods.keys())) ) from None ================================================ FILE: pysteps/downscaling/rainfarm.py ================================================ # -*- coding: utf-8 -*- """ pysteps.downscaling.rainfarm ============================ Implementation of the RainFARM stochastic downscaling method as described in :cite:`Rebora2006` and :cite:`DOnofrio2014`. RainFARM is a downscaling algorithm for rainfall fields developed by Rebora et al. (2006). The method can represent the realistic small-scale variability of the downscaled precipitation field by means of Gaussian random fields. .. autosummary:: :toctree: ../generated/ downscale """ import warnings import numpy as np from scipy.signal import convolve from pysteps.utils.spectral import rapsd from pysteps.utils.dimension import aggregate_fields def _gaussianize(precip): """ Gaussianize field using rank ordering as in :cite:`DOnofrio2014`. """ m, n = np.shape(precip) nn = m * n ii = np.argsort(precip.reshape(nn)) precip_gaussianize = np.zeros(nn) precip_gaussianize[ii] = sorted(np.random.normal(0, 1, nn)) precip_gaussianize = precip_gaussianize.reshape(m, n) sd = np.std(precip_gaussianize) if sd == 0: sd = 1 return precip_gaussianize / sd def _compute_freq_array(array, ds_factor=1): """ Compute the frequency array following a given downscaling factor. """ freq_i = np.fft.fftfreq(array.shape[0] * ds_factor, d=1 / ds_factor) freq_j = np.fft.fftfreq(array.shape[1] * ds_factor, d=1 / ds_factor) freq_sqr = freq_i[:, None] ** 2 + freq_j[None, :] ** 2 return np.sqrt(freq_sqr) def _log_slope(log_k, log_power_spectrum): """ Calculate the log-slope of the power spectrum given an array of logarithmic wavenumbers and an array of logarithmic power spectrum values. """ lk_min = log_k.min() lk_max = log_k.max() lk_range = lk_max - lk_min lk_min += (1 / 6) * lk_range lk_max -= (1 / 6) * lk_range selected = (lk_min <= log_k) & (log_k <= lk_max) lk_sel = log_k[selected] ps_sel = log_power_spectrum[selected] alpha = np.polyfit(lk_sel, ps_sel, 1)[0] alpha = -alpha return alpha def _estimate_alpha(array, k): """ Estimate the alpha parameter using the power spectrum of the input array. """ fp = np.fft.fft2(array) fp_abs = abs(fp) log_power_spectrum = np.log(fp_abs**2) valid = (k != 0) & np.isfinite(log_power_spectrum) alpha = _log_slope(np.log(k[valid]), log_power_spectrum[valid]) return alpha def _compute_noise_field(freq_array_highres, alpha): """ Compute a field of correlated noise field using the given frequency array and alpha value. """ white_noise_field = np.random.rand(*freq_array_highres.shape) white_noise_field_complex = np.exp(complex(0, 1) * 2 * np.pi * white_noise_field) with warnings.catch_warnings(): warnings.simplefilter("ignore") noise_field_complex = white_noise_field_complex * np.sqrt( freq_array_highres**-alpha ) noise_field_complex[0, 0] = 0 return np.fft.ifft2(noise_field_complex).real def _apply_spectral_fusion( array_low, array_high, freq_array_low, freq_array_high, ds_factor ): """ Apply spectral fusion to merge two arrays in the frequency domain. """ # Validate inputs if array_low.shape != freq_array_low.shape: raise ValueError("Shape of array_low must match shape of freq_array_low.") if array_high.shape != freq_array_high.shape: raise ValueError("Shape of array_high must match shape of freq_array_high.") nax, _ = np.shape(array_low) nx, _ = np.shape(array_high) k0 = nax // 2 # Calculate power spectral density at specific frequency def compute_psd(array, fft_size): return rapsd(array, fft_method=np.fft)[k0 - 1] * fft_size**2 psd_low = compute_psd(array_low, nax) psd_high = compute_psd(array_high, nx) # Normalize high-resolution array normalization_factor = np.sqrt(psd_low / psd_high) array_high *= normalization_factor # Perform FFT on both arrays fft_low = np.fft.fft2(array_low) fft_high = np.fft.fft2(array_high) # Initialize the merged FFT array with low-resolution data fft_merged = np.zeros_like(fft_high, dtype=np.complex128) fft_merged[0:k0, 0:k0] = fft_low[0:k0, 0:k0] fft_merged[nx - k0 : nx, 0:k0] = fft_low[k0 : 2 * k0, 0:k0] fft_merged[0:k0, nx - k0 : nx] = fft_low[0:k0, k0 : 2 * k0] fft_merged[nx - k0 : nx, nx - k0 : nx] = fft_low[k0 : 2 * k0, k0 : 2 * k0] fft_merged[k0, 0] = np.conj(fft_merged[nx - k0, 0]) fft_merged[0, k0] = np.conj(fft_merged[0, nx - k0]) # Compute frequency arrays freq_i = np.fft.fftfreq(nx, d=1 / ds_factor) freq_i = np.tile(freq_i, (nx, 1)) freq_j = freq_i.T # Compute frequency domain adjustment ddx = np.pi * (1 / nax - 1 / nx) / np.abs(freq_i[0, 1] - freq_i[0, 0]) freq_squared_high = freq_array_high**2 freq_squared_low_center = freq_array_low[k0, k0] ** 2 # Fuse in the frequency domain mask_high = freq_squared_high > freq_squared_low_center mask_low = ~mask_high fft_merged = fft_high * mask_high + fft_merged * mask_low * np.exp( -1j * ddx * freq_i - 1j * ddx * freq_j ) # Inverse FFT to obtain the merged array in the spatial domain merged = np.real(np.fft.ifftn(fft_merged)) / fft_merged.size return merged def _compute_kernel_radius(ds_factor): return int(round(ds_factor / np.sqrt(np.pi))) def _make_tophat_kernel(ds_factor): """Compute 2d uniform (tophat) kernel""" radius = _compute_kernel_radius(ds_factor) mx, my = np.mgrid[-radius : radius + 0.01, -radius : radius + 0.01] tophat = ((mx**2 + my**2) <= radius**2).astype(float) return tophat / tophat.sum() def _make_gaussian_kernel(ds_factor): """ Compute 2d gaussian kernel ref: https://github.com/scipy/scipy/blob/de80faf9d3480b9dbb9b888568b64499e0e70c19/scipy/ndimage/_filters.py#L179 the smoothing sigma has width half a large pixel """ radius = _compute_kernel_radius(ds_factor) sigma = ds_factor / 2 sigma2 = sigma * sigma x = np.arange(-radius, radius + 1) kern1d = np.exp(-0.5 / sigma2 * x**2) kern2d = np.outer(kern1d, kern1d) return kern2d / kern2d.sum() def _balanced_spatial_average(array, kernel): """ Compute the balanced spatial average of an array using a given kernel while handling missing or invalid values. """ array = array.copy() mask_valid = np.isfinite(array) array[~mask_valid] = 0.0 array_conv = convolve(array, kernel, mode="same") array_conv /= convolve(mask_valid, kernel, mode="same") array_conv[~mask_valid] = np.nan return array_conv _make_kernel = dict() _make_kernel["gaussian"] = _make_gaussian_kernel _make_kernel["tophat"] = _make_tophat_kernel _make_kernel["uniform"] = _make_tophat_kernel def downscale( precip, ds_factor, alpha=None, threshold=None, return_alpha=False, kernel_type=None, spectral_fusion=False, ): """ Downscale a rainfall field by increasing its spatial resolution by a positive integer factor. Parameters ---------- precip: array_like Array of shape (m, n) containing the input field. The input is expected to contain rain rate values. All values are required to be finite. alpha: float, optional Spectral slope. If None, the slope is estimated from the input array. ds_factor: positive int Downscaling factor, it specifies by how many times to increase the initial grid resolution. threshold: float, optional Set all values lower than the threshold to zero. return_alpha: bool, optional Whether to return the estimated spectral slope ``alpha``. kernel_type: {None, "gaussian", "uniform", "tophat"} The name of the smoothing operator. If None no smoothing is applied. spectral_fusion: bool, optional Whether to apply spectral merging as in :cite:`DOnofrio2014`. Returns ------- precip_highres: ndarray Array of shape (m * ds_factor, n * ds_factor) containing the downscaled field. alpha: float Returned only when ``return_alpha=True``. Notes ----- Currently, the pysteps implementation of RainFARM only covers spatial downscaling. That is, it can improve the spatial resolution of a rainfall field. However, unlike the original algorithm from Rebora et al. (2006), it cannot downscale the temporal dimension. It implements spectral merging from D'Onofrio et al. (2014). References ---------- :cite:`Rebora2006` :cite:`DOnofrio2014` """ # Validate inputs if not np.isfinite(precip).all(): raise ValueError("All values in 'precip' must be finite.") if not isinstance(ds_factor, int) or ds_factor <= 0: raise ValueError("'ds_factor' must be a positive integer.") # Preprocess the input field if spectral fusion is enabled precip_transformed = _gaussianize(precip) if spectral_fusion else precip # Compute frequency arrays for the original and high-resolution fields freq_array = _compute_freq_array(precip_transformed) freq_array_highres = _compute_freq_array(precip_transformed, ds_factor) # Estimate spectral slope alpha if not provided if alpha is None: alpha = _estimate_alpha(precip_transformed, freq_array) # Generate noise field noise_field = _compute_noise_field(freq_array_highres, alpha) # Apply spectral fusion if enabled if spectral_fusion: noise_field /= noise_field.shape[0] ** 2 noise_field = np.exp(noise_field) noise_field = _apply_spectral_fusion( precip_transformed, noise_field, freq_array, freq_array_highres, ds_factor ) # Normalize and exponentiate the noise field noise_field /= noise_field.std() noise_field = np.exp(noise_field) # Aggregate the noise field to low resolution noise_lowres = aggregate_fields(noise_field, ds_factor, axis=(0, 1)) # Expand input and noise fields to high resolution precip_expanded = np.kron(precip, np.ones((ds_factor, ds_factor))) noise_lowres_expanded = np.kron(noise_lowres, np.ones((ds_factor, ds_factor))) # Apply smoothing if a kernel type is provided if kernel_type: if kernel_type not in _make_kernel: raise ValueError( f"kernel type '{kernel_type}' is invalid, available kernels: {list(_make_kernel)}" ) kernel = _make_kernel[kernel_type](ds_factor) precip_expanded = _balanced_spatial_average(precip_expanded, kernel) noise_lowres_expanded = _balanced_spatial_average(noise_lowres_expanded, kernel) # Normalize the high-res precipitation field by the low-res noise field norm_k0 = precip_expanded / noise_lowres_expanded precip_highres = noise_field * norm_k0 # Apply thresholding if specified if threshold is not None: precip_highres[precip_highres < threshold] = 0 # Return the downscaled field and optionally the spectral slope alpha if return_alpha: return precip_highres, alpha return precip_highres ================================================ FILE: pysteps/exceptions.py ================================================ # -*- coding: utf-8 -*- # Custom pySteps exceptions class MissingOptionalDependency(Exception): """Raised when an optional dependency is needed but not found.""" pass class DirectoryNotEmpty(Exception): """Raised when the destination directory in a file copy operation is not empty.""" pass class DataModelError(Exception): """Raised when a file is not compilant with the Data Information Model.""" pass ================================================ FILE: pysteps/extrapolation/__init__.py ================================================ # -*- coding: utf-8 -*- """ Methods for advection-based extrapolation of precipitation fields. Currently the module contains an implementation of the semi-Lagrangian method described in :cite:`GZ2002` and the eulerian persistence.""" from pysteps.extrapolation.interface import get_method ================================================ FILE: pysteps/extrapolation/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.extrapolation.interface =============================== The functions in the extrapolation module implement the following interface:: ``extrapolate(extrap, precip, velocity, timesteps, outval=np.nan, **keywords)`` where *extrap* is an extrapolator object returned by the initialize function, *precip* is a (m,n) array with input precipitation field to be advected and *velocity* is a (2,m,n) array containing the x- and y-components of the m x n advection field. timesteps is an integer or list specifying the time steps to extrapolate. If an integer is given, a range of uniformly spaced steps 1,2,...,timesteps is created. If a list is given, it is assumed to represent a sequence of monotonously increasing time steps. One time unit is assumed to represent the time step of the advection field. The optional argument *outval* specifies the value for pixels advected from outside the domain. Optional keyword arguments that are specific to a given extrapolation method are passed as a dictionary. The output of each method is an array that contains the time series of extrapolated fields of shape (num_timesteps, m, n). .. currentmodule:: pysteps.extrapolation.interface .. autosummary:: :toctree: ../generated/ get_method eulerian_persistence """ import numpy as np from pysteps.extrapolation import semilagrangian def eulerian_persistence(precip, velocity, timesteps, outval=np.nan, **kwargs): """ A dummy extrapolation method to apply Eulerian persistence to a two-dimensional precipitation field. The method returns the a sequence of the same initial field with no extrapolation applied (i.e. Eulerian persistence). Parameters ---------- precip : array-like Array of shape (m,n) containing the input precipitation field. All values are required to be finite. velocity : array-like Not used by the method. timesteps : int or list of floats Number of time steps or a list of time steps. outval : float, optional Not used by the method. Other Parameters ---------------- return_displacement : bool If True, return the total advection velocity (displacement) between the initial input field and the advected one integrated along the trajectory. Default : False Returns ------- out : array or tuple If return_displacement=False, return a sequence of the same initial field of shape (num_timesteps,m,n). Otherwise, return a tuple containing the replicated fields and a (2,m,n) array of zeros. References ---------- :cite:`GZ2002` """ del velocity, outval # Unused by _eulerian_persistence if isinstance(timesteps, int): num_timesteps = timesteps else: num_timesteps = len(timesteps) return_displacement = kwargs.get("return_displacement", False) extrapolated_precip = np.repeat(precip[np.newaxis, :, :], num_timesteps, axis=0) if not return_displacement: return extrapolated_precip else: return extrapolated_precip, np.zeros((2,) + extrapolated_precip.shape) def _do_nothing(precip, velocity, timesteps, outval=np.nan, **kwargs): """Return None.""" del precip, velocity, timesteps, outval, kwargs # Unused return None def _return_none(**kwargs): del kwargs # Not used return None _extrapolation_methods = dict() _extrapolation_methods["eulerian"] = eulerian_persistence _extrapolation_methods["semilagrangian"] = semilagrangian.extrapolate _extrapolation_methods[None] = _do_nothing _extrapolation_methods["none"] = _do_nothing def get_method(name): """ Return two-element tuple for the extrapolation method corresponding to the given name. The elements of the tuple are callable functions for the initializer of the extrapolator and the extrapolation method, respectively. The available options are:\n +-----------------+--------------------------------------------------------+ | Name | Description | +=================+========================================================+ | None | returns None | +-----------------+--------------------------------------------------------+ | eulerian | this methods does not apply any advection to the input | | | precipitation field (Eulerian persistence) | +-----------------+--------------------------------------------------------+ | semilagrangian | implementation of the semi-Lagrangian method described | | | in :cite:`GZ2002` | +-----------------+--------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() try: return _extrapolation_methods[name] except KeyError: raise ValueError( "Unknown method {}\n".format(name) + "The available methods are:" + str(list(_extrapolation_methods.keys())) ) from None ================================================ FILE: pysteps/extrapolation/semilagrangian.py ================================================ """ pysteps.extrapolation.semilagrangian ==================================== Implementation of the semi-Lagrangian method described in :cite:`GZ2002`. .. autosummary:: :toctree: ../generated/ extrapolate """ import time import warnings import numpy as np from scipy.ndimage import map_coordinates def extrapolate( precip, velocity, timesteps, outval=np.nan, xy_coords=None, allow_nonfinite_values=False, vel_timestep=1, **kwargs, ): """Apply semi-Lagrangian backward extrapolation to a two-dimensional precipitation field. Parameters ---------- precip: array-like or None Array of shape (m,n) containing the input precipitation field. All values are required to be finite by default. If set to None, only the displacement field is returned without interpolating the inputs. This requires that return_displacement is set to True. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the m*n advection field. All values are required to be finite by default. timesteps: int or list of floats If timesteps is integer, it specifies the number of time steps to extrapolate. If a list is given, each element is the desired extrapolation time step from the current time. The elements of the list are required to be in ascending order. outval: float, optional Optional argument for specifying the value for pixels advected from outside the domain. If outval is set to 'min', the value is taken as the minimum value of precip. Default: np.nan xy_coords: ndarray, optional Array with the coordinates of the grid dimension (2, m, n ). * xy_coords[0]: x coordinates * xy_coords[1]: y coordinates By default, the *xy_coords* are computed for each extrapolation. allow_nonfinite_values: bool, optional If True, allow non-finite values in the precipitation and advection fields. This option is useful if the input fields contain a radar mask (i.e. pixels with no observations are set to nan). Other Parameters ---------------- displacement_prev: array-like Optional initial displacement vector field of shape (2,m,n) for the extrapolation. Default: None n_iter: int Number of inner iterations in the semi-Lagrangian scheme. If n_iter > 0, the integration is done using the midpoint rule. Otherwise, the advection vectors are taken from the starting point of each interval. Default: 1 return_displacement: bool If True, return the displacement between the initial input field and the one obtained by integrating along the advection field. Default: False vel_timestep: float The time step of the velocity field. It is assumed to have the same unit as the timesteps argument. Applicable if timesteps is a list. Default: 1. interp_order: int The order of interpolation to use. Default: 1 (linear). Setting this to 0 (nearest neighbor) gives the best computational performance but may produce visible artefacts. Setting this to 3 (cubic) gives the best ability to reproduce small-scale variability but may significantly increase the computation time. Returns ------- out: array or tuple If return_displacement=False, return a time series extrapolated fields of shape (num_timesteps,m,n). Otherwise, return a tuple containing the extrapolated fields and the integrated trajectory (displacement) along the advection field. References ---------- :cite:`GZ2002` """ if precip is not None and precip.ndim != 2: raise ValueError("precip must be a two-dimensional array") if velocity.ndim != 3: raise ValueError("velocity must be a three-dimensional array") if not allow_nonfinite_values: if precip is not None and np.any(~np.isfinite(precip)): raise ValueError("precip contains non-finite values") if np.any(~np.isfinite(velocity)): raise ValueError("velocity contains non-finite values") if precip is not None and np.all(~np.isfinite(precip)): raise ValueError("precip contains only non-finite values") if np.all(~np.isfinite(velocity)): raise ValueError("velocity contains only non-finite values") if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: raise ValueError("timesteps is not in ascending order") # defaults verbose = kwargs.get("verbose", False) displacement_prev = kwargs.get("displacement_prev", None) n_iter = kwargs.get("n_iter", 1) return_displacement = kwargs.get("return_displacement", False) interp_order = kwargs.get("interp_order", 1) map_coordinates_mode = kwargs.get("map_coordinates_mode", "constant") if precip is None and not return_displacement: raise ValueError("precip is None but return_displacement is False") if "D_prev" in kwargs.keys(): warnings.warn( "deprecated argument D_prev is ignored, use displacement_prev instead", ) # if interp_order > 1, apply separate masking to preserve nan and # non-precipitation values if precip is not None and interp_order > 1: minval = np.nanmin(precip) mask_min = (precip > minval).astype(float) if allow_nonfinite_values: mask_finite = np.isfinite(precip) precip = precip.copy() precip[~mask_finite] = 0.0 mask_finite = mask_finite.astype(float) else: mask_finite = np.ones(precip.shape) prefilter = True if interp_order > 1 else False if isinstance(timesteps, int): timesteps = np.arange(1, timesteps + 1) vel_timestep = 1.0 elif np.any(np.diff(timesteps) <= 0.0): raise ValueError("the given timestep sequence is not monotonously increasing") timestep_diff = np.hstack([[timesteps[0]], np.diff(timesteps)]) if verbose: print("Computing the advection with the semi-lagrangian scheme.") t0 = time.time() if precip is not None and outval == "min": outval = np.nanmin(precip) if xy_coords is None: x_values, y_values = np.meshgrid( np.arange(velocity.shape[2]), np.arange(velocity.shape[1]), copy=False ) xy_coords = np.stack([x_values, y_values]) def interpolate_motion(displacement, velocity_inc, td): coords_warped = xy_coords + displacement coords_warped = [coords_warped[1, :, :], coords_warped[0, :, :]] velocity_inc_x = map_coordinates( velocity[0, :, :], coords_warped, mode="nearest", order=1, prefilter=False ) velocity_inc_y = map_coordinates( velocity[1, :, :], coords_warped, mode="nearest", order=1, prefilter=False ) velocity_inc[0, :, :] = velocity_inc_x velocity_inc[1, :, :] = velocity_inc_y if n_iter > 1: velocity_inc /= n_iter velocity_inc *= td / vel_timestep precip_extrap = [] if displacement_prev is None: displacement = np.zeros((2, velocity.shape[1], velocity.shape[2])) velocity_inc = velocity.copy() * timestep_diff[0] / vel_timestep else: displacement = displacement_prev.copy() velocity_inc = np.empty(velocity.shape) interpolate_motion(displacement, velocity_inc, timestep_diff[0]) for ti, td in enumerate(timestep_diff): if n_iter > 0: for k in range(n_iter): interpolate_motion(displacement - velocity_inc / 2.0, velocity_inc, td) displacement -= velocity_inc interpolate_motion(displacement, velocity_inc, td) else: if ti > 0 or displacement_prev is not None: interpolate_motion(displacement, velocity_inc, td) displacement -= velocity_inc coords_warped = xy_coords + displacement coords_warped = [coords_warped[1, :, :], coords_warped[0, :, :]] if precip is not None: precip_warped = map_coordinates( precip, coords_warped, mode=map_coordinates_mode, cval=outval, order=interp_order, prefilter=prefilter, ) if interp_order > 1: mask_warped = map_coordinates( mask_min, coords_warped, mode=map_coordinates_mode, cval=0, order=1, prefilter=False, ) precip_warped[mask_warped < 0.5] = minval mask_warped = map_coordinates( mask_finite, coords_warped, mode=map_coordinates_mode, cval=0, order=1, prefilter=False, ) precip_warped[mask_warped < 0.5] = np.nan precip_extrap.append(np.reshape(precip_warped, precip.shape)) if verbose: print("--- %s seconds ---" % (time.time() - t0)) if precip is not None: if not return_displacement: return np.stack(precip_extrap) else: return np.stack(precip_extrap), displacement else: return None, displacement ================================================ FILE: pysteps/feature/__init__.py ================================================ # -*- coding: utf-8 -*- """Implementations of feature detection methods.""" from pysteps.feature.interface import get_method ================================================ FILE: pysteps/feature/blob.py ================================================ """ pysteps.feature.blob ==================== Blob detection methods. .. autosummary:: :toctree: ../generated/ detection """ import numpy as np from pysteps.exceptions import MissingOptionalDependency from scipy.ndimage import gaussian_laplace try: from skimage import feature SKIMAGE_IMPORTED = True except ImportError: SKIMAGE_IMPORTED = False def detection( input_image, max_num_features=None, method="log", threshold=0.5, min_sigma=3, max_sigma=20, overlap=0.5, return_sigmas=False, **kwargs, ): """ .. _`feature.blob_*`:\ https://scikit-image.org/docs/dev/auto_examples/features_detection/plot_blob.html Interface to the `feature.blob_*`_ methods implemented in scikit-image. A blob is defined as a scale-space maximum of a Gaussian-filtered image. .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- input_image: array_like Array of shape (m, n) containing the input image. Nan values are ignored. max_num_features : int, optional The maximum number of blobs to detect. Set to None for no restriction. If specified, the most significant blobs are chosen based on their intensities in the corresponding Laplacian of Gaussian (LoG)-filtered images. method: {'log', 'dog', 'doh'}, optional The method to use: 'log' = Laplacian of Gaussian, 'dog' = Difference of Gaussian, 'doh' = Determinant of Hessian. threshold: float, optional Detection threshold. min_sigma: float, optional The minimum standard deviation for the Gaussian kernel. max_sigma: float, optional The maximum standard deviation for the Gaussian kernel. overlap: float, optional A value between 0 and 1. If the area of two blobs overlaps by a fraction greater than the value for overlap, the smaller blob is eliminated. return_sigmas: bool, optional If True, return the standard deviations of the Gaussian kernels corresponding to the detected blobs. Returns ------- points: ndarray_ Array of shape (p, 2) or (p, 3) indicating the pixel coordinates of *p* detected blobs. If return_sigmas is True, the third column contains the standard deviations of the Gaussian kernels corresponding to the blobs. """ if method not in ["log", "dog", "doh"]: raise ValueError("unknown method %s, must be 'log', 'dog' or 'doh'" % method) if not SKIMAGE_IMPORTED: raise MissingOptionalDependency( "skimage is required for the blob_detection routine but it is not installed" ) if method == "log": detector = feature.blob_log elif method == "dog": detector = feature.blob_dog else: detector = feature.blob_doh blobs = detector( input_image, min_sigma=min_sigma, max_sigma=max_sigma, threshold=threshold, overlap=overlap, **kwargs, ) if max_num_features is not None and blobs.shape[0] > max_num_features: blob_intensities = [] for i in range(blobs.shape[0]): gl_image = -gaussian_laplace(input_image, blobs[i, 2]) * blobs[i, 2] ** 2 blob_intensities.append(gl_image[int(blobs[i, 0]), int(blobs[i, 1])]) idx = np.argsort(blob_intensities)[::-1] blobs = blobs[idx[:max_num_features], :] if not return_sigmas: return np.column_stack([blobs[:, 1], blobs[:, 0]]) else: return np.column_stack([blobs[:, 1], blobs[:, 0], blobs[:, 2]]) ================================================ FILE: pysteps/feature/interface.py ================================================ """ pysteps.feature.interface ========================= Interface for the feature detection module. It returns a callable function for detecting features from two-dimensional images. The feature detectors implement the following interface: ``detection(input_image, **keywords)`` The input is a two-dimensional image. Additional arguments to the specific method can be given via ``**keywords``. The output is an array of shape (n, m), where each row corresponds to one of the n features. The first two columns contain the coordinates (x, y) of the features, and additional information can be specified in the remaining columns. All implemented methods support the following keyword arguments: +------------------+-----------------------------------------------------+ | Key | Value | +==================+=====================================================+ | max_num_features | maximum number of features to detect | +------------------+-----------------------------------------------------+ .. autosummary:: :toctree: ../generated/ get_method """ from pysteps.feature import blob from pysteps.feature import tstorm from pysteps.feature import shitomasi _detection_methods = dict() _detection_methods["blob"] = blob.detection _detection_methods["tstorm"] = tstorm.detection _detection_methods["shitomasi"] = shitomasi.detection def get_method(name): """ Return a callable function for feature detection. Implemented methods: +-----------------+-------------------------------------------------------+ | Name | Description | +=================+=======================================================+ | blob | blob detection in scale space | +-----------------+-------------------------------------------------------+ | tstorm | Thunderstorm cell detection | +-----------------+-------------------------------------------------------+ | shitomasi | Shi-Tomasi corner detection | +-----------------+-------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available names:" + str(list(_detection_methods.keys())) ) from None try: return _detection_methods[name] except KeyError: raise ValueError( "Unknown detection method {}\n".format(name) + "The available methods are:" + str(list(_detection_methods.keys())) ) from None ================================================ FILE: pysteps/feature/shitomasi.py ================================================ """ pysteps.feature.shitomasi ========================= Shi-Tomasi features detection method to detect corners in an image. .. autosummary:: :toctree: ../generated/ detection """ import numpy as np from numpy.ma.core import MaskedArray from pysteps.exceptions import MissingOptionalDependency try: import cv2 CV2_IMPORTED = True except ImportError: CV2_IMPORTED = False def detection( input_image, max_corners=1000, max_num_features=None, quality_level=0.01, min_distance=10, block_size=5, buffer_mask=5, use_harris=False, k=0.04, verbose=False, **kwargs, ): """ .. _`Shi-Tomasi`:\ https://docs.opencv.org/3.4.1/dd/d1a/group__imgproc__feature.html#ga1d6bb77486c8f92d79c8793ad995d541 Interface to the OpenCV `Shi-Tomasi`_ features detection method to detect corners in an image. Corners are used for local tracking methods. .. _MaskedArray:\ https://docs.scipy.org/doc/numpy/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html .. _`Harris detector`:\ https://docs.opencv.org/3.4.1/dd/d1a/group__imgproc__feature.html#gac1fc3598018010880e370e2f709b4345 .. _cornerMinEigenVal:\ https://docs.opencv.org/3.4.1/dd/d1a/group__imgproc__feature.html#ga3dbce297c1feb859ee36707e1003e0a8 Parameters ---------- input_image: ndarray_ or MaskedArray_ Array of shape (m, n) containing the input image. In case of ndarray_, invalid values (Nans or infs) are masked, otherwise the mask of the MaskedArray_ is used. Such mask defines a region where features are not detected. The fill value for the masked pixels is taken as the minimum of all valid pixels. max_corners: int, optional The ``maxCorners`` parameter in the `Shi-Tomasi`_ corner detection method. It represents the maximum number of points to be tracked (corners). If set to zero, all detected corners are used. max_num_features: int, optional If specified, this argument is substituted for max_corners. Set to None for no restriction. Added for compatibility with the feature detector interface. quality_level: float, optional The ``qualityLevel`` parameter in the `Shi-Tomasi`_ corner detection method. It represents the minimal accepted quality for the image corners. min_distance: int, optional The ``minDistance`` parameter in the `Shi-Tomasi`_ corner detection method. It represents minimum possible Euclidean distance in pixels between corners. block_size: int, optional The ``blockSize`` parameter in the `Shi-Tomasi`_ corner detection method. It represents the window size in pixels used for computing a derivative covariation matrix over each pixel neighbourhood. use_harris: bool, optional Whether to use a `Harris detector`_ or cornerMinEigenVal_. k: float, optional Free parameter of the Harris detector. buffer_mask: int, optional A mask buffer width in pixels. This extends the input mask (if any) to limit edge effects. verbose: bool, optional Print the number of features detected. Returns ------- points: ndarray_ Array of shape (p, 2) indicating the pixel coordinates of *p* detected corners. References ---------- Jianbo Shi and Carlo Tomasi. Good features to track. In Computer Vision and Pattern Recognition, 1994. Proceedings CVPR'94., 1994 IEEE Computer Society Conference on, pages 593–600. IEEE, 1994. """ if not CV2_IMPORTED: raise MissingOptionalDependency( "opencv package is required for the goodFeaturesToTrack() " "routine but it is not installed" ) input_image = input_image.copy() if input_image.ndim != 2: raise ValueError("input_image must be a two-dimensional array") # Check if a MaskedArray is used. If not, mask the ndarray if not isinstance(input_image, MaskedArray): input_image = np.ma.masked_invalid(input_image) np.ma.set_fill_value(input_image, input_image.min()) # buffer the quality mask to ensure that no vectors are computed nearby # the edges of the radar mask mask = np.ma.getmaskarray(input_image).astype("uint8") if buffer_mask > 0: mask = cv2.dilate( mask, np.ones((int(buffer_mask), int(buffer_mask)), np.uint8), 1 ) input_image[mask] = np.ma.masked # scale image between 0 and 255 im_min = input_image.min() im_max = input_image.max() if im_max - im_min > 1e-8: input_image = (input_image.filled() - im_min) / (im_max - im_min) * 255 else: input_image = input_image.filled() - im_min # convert to 8-bit input_image = np.ndarray.astype(input_image, "uint8") mask = ~mask & 1 params = dict( maxCorners=max_num_features if max_num_features is not None else max_corners, qualityLevel=quality_level, minDistance=min_distance, blockSize=block_size, useHarrisDetector=use_harris, k=k, ) points = cv2.goodFeaturesToTrack(input_image, mask=mask, **params) if points is None: points = np.empty(shape=(0, 2)) else: points = points[:, 0, :] if verbose: print(f"--- {points.shape[0]} good features to track detected ---") return points ================================================ FILE: pysteps/feature/tstorm.py ================================================ """ pysteps.feature.tstorm ====================== Thunderstorm cell detection module, part of Thunderstorm Detection and Tracking (DATing) This module was implemented following the procedures used in the TRT Thunderstorms Radar Tracking algorithm (:cite:`TRT2004`) used operationally at MeteoSwiss. Full documentation is published in :cite:`Feldmann2021`. Modifications include advecting the identified thunderstorms with the optical flow obtained from pysteps, as well as additional options in the thresholding. References ............... :cite:`TRT2004` :cite:`Feldmann2021` @author: mfeldman .. autosummary:: :toctree: ../generated/ detection breakup longdistance get_profile """ import numpy as np import scipy.ndimage as ndi from pysteps.exceptions import MissingOptionalDependency try: import skimage SKIMAGE_IMPORTED = True except ImportError: SKIMAGE_IMPORTED = False if SKIMAGE_IMPORTED: import skimage.measure as skime import skimage.morphology as skim import skimage.segmentation as skis try: import pandas as pd PANDAS_IMPORTED = True except ImportError: PANDAS_IMPORTED = False def detection( input_image, max_num_features=None, minref=35, maxref=48, mindiff=6, minsize=50, minmax=41, mindis=10, output_feat=False, output_splits_merges=False, time="000000000", ): """ This function detects thunderstorms using a multi-threshold approach. It is recommended to use a 2-D Cartesian maximum reflectivity composite, however the function will process any 2-D array. The thunderstorm cell detection requires both scikit-image and pandas. Parameters ---------- input_image: array-like Array of shape (m,n) containing input image, usually maximum reflectivity in dBZ with a resolution of 1 km. Nan values are ignored. max_num_features : int, optional The maximum number of cells to detect. Set to None for no restriction. If specified, the most significant cells are chosen based on their area. minref: float, optional Lower threshold for object detection. Lower values will be set to NaN. The default is 35 dBZ. maxref: float, optional Upper threshold for object detection. Higher values will be set to this value. The default is 48 dBZ. mindiff: float, optional Minimal difference between two identified maxima within same area to split area into two objects. The default is 6 dBZ. minsize: float, optional Minimal area for possible detected object. The default is 50 pixels. minmax: float, optional Minimum value of maximum in identified objects. Objects with a maximum lower than this will be discarded. The default is 41 dBZ. mindis: float, optional Minimum distance between two maxima of identified objects. Objects with a smaller distance will be merged. The default is 10 km. output_feat: bool, optional Set to True to return only the cell coordinates. output_split_merge: bool, optional Set to True to return additional columns in the dataframe for describing the splitting and merging of cells. Note that columns are initialized with None, and the information needs to be analyzed while tracking. time: string, optional Date and time as string. Used to label time in the resulting dataframe. The default is '000000000'. Returns ------- cells_id: pandas dataframe Pandas dataframe containing all detected cells and their respective properties corresponding to the input image. Columns of dataframe: ID - cell ID, time - time stamp, x - array of all x-coordinates of cell, y - array of all y-coordinates of cell, cen_x - x-coordinate of cell centroid, cen_y - y-coordinate of cell centroid, max_ref - maximum (reflectivity) value of cell, cont - cell contours labels: array-like Array of shape (m,n), grid of labelled cells. """ if not SKIMAGE_IMPORTED: raise MissingOptionalDependency( "skimage is required for thunderstorm DATing " "but it is not installed" ) if not PANDAS_IMPORTED: raise MissingOptionalDependency( "pandas is required for thunderstorm DATing " "but it is not installed" ) filt_image = np.zeros(input_image.shape) filt_image[input_image >= minref] = input_image[input_image >= minref] filt_image[input_image > maxref] = maxref max_image = np.zeros(filt_image.shape) max_image[filt_image == maxref] = 1 labels, n_groups = ndi.label(max_image) for n in range(1, n_groups + 1): indx, indy = np.where(labels == n) if len(indx) > 3: max_image[indx[0], indy[0]] = 2 filt_image[max_image == 2] = maxref + 1 binary = np.zeros(filt_image.shape) binary[filt_image > 0] = 1 labels, n_groups = ndi.label(binary) for n in range(1, n_groups + 1): ind = np.where(labels == n) size = len(ind[0]) maxval = np.nanmax(input_image[ind]) if size < minsize: # removing too small areas binary[labels == n] = 0 labels[labels == n] = 0 if maxval < minmax: # removing areas with too low max value binary[labels == n] = 0 labels[labels == n] = 0 filt_image = filt_image * binary if mindis % 2 == 0: elem = mindis - 1 else: elem = mindis struct = np.ones([elem, elem]) if np.nanmax(filt_image.flatten()) < minref: maxima = np.zeros(filt_image.shape) else: maxima = skim.h_maxima(filt_image, h=mindiff, footprint=struct) loc_max = np.where(maxima > 0) loc_max = longdistance(loc_max, mindis) i_cell = labels[loc_max] n_cell = np.unique(labels)[1:] for n in n_cell: if n not in i_cell: binary[labels == n] = 0 labels[labels == n] = 0 maxima_dis = np.zeros(maxima.shape) maxima_dis[loc_max] = 1 areas, lines = breakup(input_image, np.nanmin(input_image.flatten()), maxima_dis) cells_id, labels = get_profile( areas, binary, input_image, loc_max, time, minref, output_splits_merges=output_splits_merges, ) if max_num_features is not None: idx = np.argsort(cells_id.area.to_numpy())[::-1] if not output_feat: if max_num_features is None: return cells_id, labels else: for i in idx[max_num_features:]: labels[labels == cells_id.ID[i]] = 0 return cells_id.loc[idx[:max_num_features]], labels if output_feat: out = np.column_stack([np.array(cells_id.cen_x), np.array(cells_id.cen_y)]) if max_num_features is not None: out = out[idx[:max_num_features], :] return out def breakup(ref, minval, maxima): """ This function segments the entire 2-D array into areas belonging to each identified maximum according to a watershed algorithm. """ ref_t = np.zeros(ref.shape) ref_t[:] = minval ref_t[ref > minval] = ref[ref > minval] markers = ndi.label(maxima)[0] areas = skis.watershed(-ref_t, markers=markers) lines = skis.watershed(-ref_t, markers=markers, watershed_line=True) return areas, lines def longdistance(loc_max, mindis): """ This function computes the distance between all maxima and rejects maxima that are less than a minimum distance apart. """ x_max = loc_max[1] y_max = loc_max[0] n = 0 while n < len(y_max): disx = x_max[n] - x_max disy = y_max[n] - y_max dis = np.sqrt(disx * disx + disy * disy) close = np.where(dis < mindis)[0] close = np.delete(close, np.where(close <= n)) if len(close) > 0: x_max = np.delete(x_max, close) y_max = np.delete(y_max, close) n += 1 new_max = y_max, x_max return new_max def get_profile(areas, binary, ref, loc_max, time, minref, output_splits_merges=False): """ This function returns the identified cells in a dataframe including their x,y locations, location of their maxima, maximum reflectivity and contours. Optionally, the dataframe can include columns for storing information regarding splitting and merging of cells. """ cells = areas * binary cell_labels = cells[loc_max] labels = np.zeros(cells.shape) cells_id = [] for n, cell_label in enumerate(cell_labels): this_id = n + 1 x = np.where(cells == cell_label)[1] y = np.where(cells == cell_label)[0] cell_unique = np.zeros(cells.shape) cell_unique[cells == cell_label] = 1 maxref = np.nanmax(ref[y, x]) contours = skime.find_contours(cell_unique, 0.8) cells_id.append( { "ID": this_id, "time": time, "x": x, "y": y, "cen_x": np.round(np.nanmean(x)).astype(int), "cen_y": np.round(np.nanmean(y)).astype(int), "max_ref": maxref, "cont": contours, "area": len(x), } ) if output_splits_merges: cells_id[-1].update( { "splitted": None, "split_IDs": None, "merged": None, "merged_IDs": None, "results_from_split": None, "will_merge": None, } ) labels[cells == cell_labels[n]] = this_id columns = [ "ID", "time", "x", "y", "cen_x", "cen_y", "max_ref", "cont", "area", ] if output_splits_merges: columns.extend( [ "splitted", "split_IDs", "merged", "merged_IDs", "results_from_split", "will_merge", ] ) cells_id = pd.DataFrame( data=cells_id, index=range(len(cell_labels)), columns=columns, ) if output_splits_merges: cells_id["split_IDs"] = cells_id["split_IDs"].astype("object") cells_id["merged_IDs"] = cells_id["merged_IDs"].astype("object") return cells_id, labels ================================================ FILE: pysteps/io/__init__.py ================================================ # -*- coding: utf-8 -*- """ Methods for browsing data archives, reading 2d precipitation fields and writing forecasts into files. """ from .interface import get_method, discover_importers, importers_info from .archive import * from .exporters import * from .importers import * from .nowcast_importers import * from .readers import * ================================================ FILE: pysteps/io/archive.py ================================================ # -*- coding: utf-8 -*- """ pysteps.io.archive ================== Utilities for finding archived files that match the given criteria. .. autosummary:: :toctree: ../generated/ find_by_date """ from datetime import datetime, timedelta import fnmatch import os def find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=0, num_next_files=0, silent=False, ): """ List input files whose timestamp matches the given date. Parameters ---------- date: datetime.datetime The given date. root_path: str The root path to search the input files. path_fmt: str Path format. It may consist of directory names separated by '/', date/time specifiers beginning with '%' (e.g. %Y/%m/%d) and wildcards (?) that match any single character. fn_pattern: str The name pattern of the input files without extension. The pattern can contain time specifiers (e.g. %H, %M and %S). fn_ext: str Extension of the input files. timestep: float Time step between consecutive input files (minutes). num_prev_files: int Optional, number of previous files to find before the given timestamp. num_next_files: int Optional, number of future files to find after the given timestamp. silent: bool Optional, whether to suppress all messages from the method. Returns ------- out: tuple If num_prev_files=0 and num_next_files=0, return a pair containing the found file name and the corresponding timestamp as a datetime.datetime object. Otherwise, return a tuple of two lists, the first one for the file names and the second one for the corresponding timestemps. The lists are sorted in ascending order with respect to timestamp. A None value is assigned if a file name corresponding to a given timestamp is not found. """ filenames = [] timestamps = [] for i in range(num_prev_files + num_next_files + 1): curdate = ( date + timedelta(minutes=num_next_files * timestep) - timedelta(minutes=i * timestep) ) fn = _find_matching_filename( curdate, root_path, path_fmt, fn_pattern, fn_ext, silent ) filenames.append(fn) timestamps.append(curdate) if all(filename is None for filename in filenames): raise IOError("no input data found in %s" % root_path) if (num_prev_files + num_next_files) > 0: return filenames[::-1], timestamps[::-1] else: return filenames, timestamps def _find_matching_filename( date, root_path, path_fmt, fn_pattern, fn_ext, silent=False ): path = _generate_path(date, root_path, path_fmt) if os.path.exists(path): fn = datetime.strftime(date, fn_pattern) + "." + fn_ext # test for wildcars if "?" in fn: filenames = os.listdir(path) if len(filenames) > 0: for filename in filenames: if fnmatch.fnmatch(filename, fn): fn = filename break fn = os.path.join(path, fn) if os.path.exists(fn): return fn else: if not silent: print(f"file not found: {fn}") return None else: if not silent: print(f"path not found: {path}") return None def _generate_path(date, root_path, path_format): """Generate file path.""" if not isinstance(date, datetime): raise TypeError("The input 'date' argument must be a datetime object") if path_format != "": sub_path = date.strftime(path_format) return os.path.join(root_path, sub_path) else: return root_path ================================================ FILE: pysteps/io/exporters.py ================================================ # -*- coding: utf-8 -*- """ pysteps.io.exporters ==================== Methods for exporting forecasts of 2d precipitation fields into various file formats. Each exporter method in this module has its own initialization function that implements the following interface:: initialize_forecast_exporter_xxx(outpath, outfnprefix, startdate, timestep, n_timesteps, shape, metadata, n_ens_members=1, incremental=None, **kwargs) where xxx specifies the file format. This function creates the output files and writes the metadata. See the documentation of the initialization methods for the format of the output files and their names. The datasets are written by calling :py:func:`pysteps.io.exporters.export_forecast_dataset`, and the files are closed by calling :py:func:`pysteps.io.exporters.close_forecast_files`. The arguments of initialize_forecast_exporter_xxx are described in the following table: .. tabularcolumns:: |p{2cm}|p{2cm}|L| +---------------+-------------------+-----------------------------------------+ | Argument | Type/values | Description | +===============+===================+=========================================+ | outpath | str | output path | +---------------+-------------------+-----------------------------------------+ | outfnprefix | str | prefix of output file names | +---------------+-------------------+-----------------------------------------+ | startdate | datetime.datetime | start date of the forecast | +---------------+-------------------+-----------------------------------------+ | timestep | int | length of the forecast time step | | | | (minutes) | +---------------+-------------------+-----------------------------------------+ | n_timesteps | int | number of time steps in the forecast | | | | this argument is ignored if | | | | incremental is set to 'timestep'. | +---------------+-------------------+-----------------------------------------+ | shape | tuple | two-element tuple defining the shape | | | | (height,width) of the forecast grids | +---------------+-------------------+-----------------------------------------+ | metadata | dict | metadata dictionary containing the | | | | projection,x1,x2,y1,y2 and unit | | | | attributes described in the | | | | documentation of pysteps.io.importers | +---------------+-------------------+-----------------------------------------+ | n_ens_members | int | number of ensemble members in the | | | | forecast | | | | this argument is ignored if incremental | | | | is set to 'member' | +---------------+-------------------+-----------------------------------------+ | incremental | {None, 'timestep',| allow incremental writing of datasets | | | 'member'} | the available options are: | | | | 'timestep' = write a forecast or a | | | | forecast ensemble for a given | | | | time step | | | | 'member' = write a forecast sequence | | | | for a given ensemble member | +---------------+-------------------+-----------------------------------------+ Optional exporter-specific arguments are passed with ``kwargs``. The return value is a dictionary containing an exporter object. This can be used with :py:func:`pysteps.io.exporters.export_forecast_dataset` to write the datasets to the output files. Available Exporters ------------------- .. autosummary:: :toctree: ../generated/ initialize_forecast_exporter_geotiff initialize_forecast_exporter_kineros initialize_forecast_exporter_netcdf Generic functions ----------------- .. autosummary:: :toctree: ../generated/ export_forecast_dataset close_forecast_files """ import os from datetime import datetime import numpy as np from pysteps.exceptions import MissingOptionalDependency try: from osgeo import gdal, osr # Preserve current behavior explicitly (no GDAL exceptions) and avoid the # GDAL 4.0 future-warning emitted when neither mode is selected. if hasattr(gdal, "DontUseExceptions"): gdal.DontUseExceptions() GDAL_IMPORTED = True except ImportError: GDAL_IMPORTED = False try: import netCDF4 NETCDF4_IMPORTED = True except ImportError: NETCDF4_IMPORTED = False try: import pyproj PYPROJ_IMPORTED = True except ImportError: PYPROJ_IMPORTED = False def initialize_forecast_exporter_geotiff( outpath, outfnprefix, startdate, timestep, n_timesteps, shape, metadata, n_ens_members=1, incremental=None, **kwargs, ): """ Initialize a GeoTIFF forecast exporter. The output files are named as '__.tif', where startdate is in YYmmddHHMM format and t is lead time (minutes). GDAL needs to be installed to use this exporter. Parameters ---------- outpath: str Output path. outfnprefix: str Prefix for output file names. startdate: datetime.datetime Start date of the forecast. timestep: int Time step of the forecast (minutes). n_timesteps: int Number of time steps in the forecast. This argument is ignored if incremental is set to 'timestep'. shape: tuple of int Two-element tuple defining the shape (height,width) of the forecast grids. metadata: dict Metadata dictionary containing the projection,x1,x2,y1,y2 and unit attributes described in the documentation of :py:mod:`pysteps.io.importers`. n_ens_members: int Number of ensemble members in the forecast. incremental: {None,'timestep'}, optional Allow incremental writing of datasets into the GeoTIFF files. Set to 'timestep' to enable writing forecasts or forecast ensembles separately for each time step. If set to None, incremental writing is disabled and the whole forecast is written in a single function call. The 'member' option is not currently implemented. Returns ------- exporter: dict The return value is a dictionary containing an exporter object. This can be used with :py:func:`pysteps.io.exporters.export_forecast_dataset` to write the datasets. """ if len(shape) != 2: raise ValueError("shape has %d elements, 2 expected" % len(shape)) del kwargs # kwargs not used if not GDAL_IMPORTED: raise MissingOptionalDependency( "gdal package is required for GeoTIFF " "exporters but it is not installed" ) if incremental == "member": raise ValueError( "incremental writing of GeoTIFF files with" + " the 'member' option is not supported" ) exporter = dict( method="geotiff", outfnprefix=outfnprefix, startdate=startdate, timestep=timestep, num_timesteps=n_timesteps, shape=shape, metadata=metadata, num_ens_members=n_ens_members, incremental=incremental, dst=[], ) driver = gdal.GetDriverByName("GTiff") exporter["driver"] = driver if incremental != "timestep": for i in range(n_timesteps): outfn = _get_geotiff_filename( outfnprefix, startdate, n_timesteps, timestep, i ) outfn = os.path.join(outpath, outfn) dst = _create_geotiff_file(outfn, driver, shape, metadata, n_ens_members) exporter["dst"].append(dst) else: exporter["num_files_written"] = 0 return exporter # TODO(exporters): This is a draft version of the kineros exporter. # Revise the variable names and # the structure of the file if necessary. def initialize_forecast_exporter_kineros( outpath, outfnprefix, startdate, timestep, n_timesteps, shape, metadata, n_ens_members=1, incremental=None, **kwargs, ): """ Initialize a KINEROS2 format exporter for the rainfall ".pre" files specified in https://www.tucson.ars.ag.gov/kineros/. Grid points are treated as individual rain gauges and a separate file is produced for each ensemble member. The output files are named as _N.pre, where is the index of ensemble member starting from zero. Parameters ---------- outpath: str Output path. outfnprefix: str Prefix for output file names. startdate: datetime.datetime Start date of the forecast. timestep: int Time step of the forecast (minutes). n_timesteps: int Number of time steps in the forecast this argument is ignored if incremental is set to 'timestep'. shape: tuple of int Two-element tuple defining the shape (height,width) of the forecast grids. metadata: dict Metadata dictionary containing the projection,x1,x2,y1,y2 and unit attributes described in the documentation of :py:mod:`pysteps.io.importers`. n_ens_members: int Number of ensemble members in the forecast. This argument is ignored if incremental is set to 'member'. incremental: {None}, optional Currently not implemented for this method. Returns ------- exporter: dict The return value is a dictionary containing an exporter object. This c an be used with :py:func:`pysteps.io.exporters.export_forecast_dataset` to write datasets into the given file format. """ if incremental is not None: raise ValueError( "unknown option %s: " + "incremental writing is not supported" % incremental ) exporter = {} # one file for each member n_ens_members = np.min((99, n_ens_members)) fns = [] for i in range(n_ens_members): outfn = "%s_N%02d%s" % (outfnprefix, i, ".pre") outfn = os.path.join(outpath, outfn) with open(outfn, "w") as fd: # write header fd.writelines("! pysteps-generated nowcast.\n") fd.writelines("! created the %s.\n" % datetime.now().strftime("%c")) # TODO(exporters): Add pySTEPS version here fd.writelines("! Member = %02d.\n" % i) fd.writelines("! Startdate = %s.\n" % startdate.strftime("%c")) fns.append(outfn) fd.close() h, w = shape if metadata["unit"] == "mm/h": var_name = "Intensity" var_long_name = "Intensity in mm/hr" var_unit = "mm/hr" elif metadata["unit"] == "mm": var_name = "Depth" var_long_name = "Accumulated depth in mm" var_unit = "mm" else: raise ValueError("unsupported unit %s" % metadata["unit"]) xr = np.linspace(metadata["x1"], metadata["x2"], w + 1)[:-1] xr += 0.5 * (xr[1] - xr[0]) yr = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1] yr += 0.5 * (yr[1] - yr[0]) xy_coords = np.stack(np.meshgrid(xr, yr)) exporter["method"] = "kineros" exporter["ncfile"] = fns exporter["XY_coords"] = xy_coords exporter["var_name"] = var_name exporter["var_long_name"] = var_long_name exporter["var_unit"] = var_unit exporter["startdate"] = startdate exporter["timestep"] = timestep exporter["metadata"] = metadata exporter["incremental"] = incremental exporter["num_timesteps"] = n_timesteps exporter["num_ens_members"] = n_ens_members exporter["shape"] = shape return exporter # TODO(exporters): This is a draft version of the netcdf exporter. # Revise the variable names and # the structure of the file if necessary. def initialize_forecast_exporter_netcdf( outpath, outfnprefix, startdate, timestep, n_timesteps, shape, metadata, n_ens_members=1, datatype=np.float32, incremental=None, fill_value=None, scale_factor=None, offset=None, **kwargs, ): """ Initialize a netCDF forecast exporter. All outputs are written to a single file named as '_.nc'. Parameters ---------- outpath: str Output path. outfnprefix: str Prefix for output file names. startdate: datetime.datetime Start date of the forecast. timestep: int Time step of the forecast (minutes). n_timesteps: int or list of integers Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. shape: tuple of int Two-element tuple defining the shape (height,width) of the forecast grids. metadata: dict Metadata dictionary containing the projection, x1, x2, y1, y2, unit attributes (projection and variable units) described in the documentation of :py:mod:`pysteps.io.importers`. n_ens_members: int Number of ensemble members in the forecast. This argument is ignored if incremental is set to 'member'. datatype: np.dtype, optional The datatype of the output values. Defaults to np.float32. incremental: {None,'timestep','member'}, optional Allow incremental writing of datasets into the netCDF files.\n The available options are: 'timestep' = write a forecast or a forecast ensemble for a given time step; 'member' = write a forecast sequence for a given ensemble member. If set to None, incremental writing is disabled. fill_value: int, optional Fill_value for missing data. Defaults to None, which means that the standard netCDF4 fill_value is used. scale_factor: float, optional The scale factor to scale the data as: store_value = scale_factor * precipitation_value + offset. Defaults to None. The scale_factor can be used to reduce data storage. offset: float, optional The offset to offset the data as: store_value = scale_factor * precipitation_value + offset. Defaults to None. Other Parameters ---------------- institution: str The instute, company or community that has created the nowcast. Default: the pySTEPS community (https://pysteps.github.io) references: str Any references to be included in the netCDF file. Defaults to " ". comment: str Any comments about the data or storage protocol that should be included in the netCDF file. Defaults to " ". Returns ------- exporter: dict The return value is a dictionary containing an exporter object. This c an be used with :py:func:`pysteps.io.exporters.export_forecast_dataset` to write datasets into the given file format. """ if not NETCDF4_IMPORTED: raise MissingOptionalDependency( "netCDF4 package is required for netcdf " "exporters but it is not installed" ) if not PYPROJ_IMPORTED: raise MissingOptionalDependency( "pyproj package is required for netcdf " "exporters but it is not installed" ) if incremental not in [None, "timestep", "member"]: raise ValueError( f"unknown option {incremental}: incremental must be " + "'timestep' or 'member'" ) n_timesteps_is_list = isinstance(n_timesteps, list) if n_timesteps_is_list: num_timesteps = len(n_timesteps) else: num_timesteps = n_timesteps if incremental == "timestep": num_timesteps = None elif incremental == "member": n_ens_members = None elif incremental is not None: raise ValueError( f"unknown argument value incremental='{str(incremental)}': " + "must be 'timestep' or 'member'" ) n_ens_gt_one = False if n_ens_members is not None: if n_ens_members > 1: n_ens_gt_one = True # Kwargs to be used as description strings in the netCDF institution = kwargs.get( "institution", "the pySTEPS community (https://pysteps.github.io)" ) references = kwargs.get("references", "") comment = kwargs.get("comment", "") exporter = {} outfn = os.path.join(outpath, outfnprefix + ".nc") ncf = netCDF4.Dataset(outfn, "w", format="NETCDF4") ncf.Conventions = "CF-1.7" ncf.title = "pysteps-generated nowcast" ncf.institution = institution ncf.source = "pysteps" # TODO(exporters): Add pySTEPS version here ncf.history = "" ncf.references = references ncf.comment = comment h, w = shape ncf.createDimension("ens_number", size=n_ens_members) ncf.createDimension("time", size=num_timesteps) ncf.createDimension("y", size=h) ncf.createDimension("x", size=w) if metadata["unit"] == "mm/h": var_name = "precip_intensity" var_standard_name = None var_long_name = "instantaneous precipitation rate" var_unit = "mm h-1" elif metadata["unit"] == "mm": var_name = "precip_accum" var_standard_name = None var_long_name = "accumulated precipitation" var_unit = "mm" elif metadata["unit"] == "dBZ": var_name = "reflectivity" var_long_name = "equivalent reflectivity factor" var_standard_name = "equivalent_reflectivity_factor" var_unit = "dBZ" else: raise ValueError("unknown unit %s" % metadata["unit"]) xr = np.linspace(metadata["x1"], metadata["x2"], w + 1)[:-1] xr += 0.5 * (xr[1] - xr[0]) yr = np.linspace(metadata["y1"], metadata["y2"], h + 1)[:-1] yr += 0.5 * (yr[1] - yr[0]) # flip yr vector if yorigin is upper if metadata["yorigin"] == "upper": yr = np.flip(yr) var_xc = ncf.createVariable("x", np.float32, dimensions=("x",)) var_xc[:] = xr var_xc.axis = "X" var_xc.standard_name = "projection_x_coordinate" var_xc.long_name = "x-coordinate in Cartesian system" var_xc.units = metadata["cartesian_unit"] var_yc = ncf.createVariable("y", np.float32, dimensions=("y",)) var_yc[:] = yr var_yc.axis = "Y" var_yc.standard_name = "projection_y_coordinate" var_yc.long_name = "y-coordinate in Cartesian system" var_yc.units = metadata["cartesian_unit"] x_2d, y_2d = np.meshgrid(xr, yr) pr = pyproj.Proj(metadata["projection"]) lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True) var_lon = ncf.createVariable("lon", float, dimensions=("y", "x")) var_lon[:] = lon.reshape(shape) var_lon.standard_name = "longitude" var_lon.long_name = "longitude coordinate" # TODO(exporters): Don't hard-code the unit. var_lon.units = "degrees_east" var_lat = ncf.createVariable("lat", float, dimensions=("y", "x")) var_lat[:] = lat.reshape(shape) var_lat.standard_name = "latitude" var_lat.long_name = "latitude coordinate" # TODO(exporters): Don't hard-code the unit. var_lat.units = "degrees_north" ncf.projection = metadata["projection"] ( grid_mapping_var_name, grid_mapping_name, grid_mapping_params, ) = _convert_proj4_to_grid_mapping(metadata["projection"]) # skip writing the grid mapping if a matching name was not found if grid_mapping_var_name is not None: var_gm = ncf.createVariable(grid_mapping_var_name, int, dimensions=()) var_gm.grid_mapping_name = grid_mapping_name for i in grid_mapping_params.items(): var_gm.setncattr(i[0], i[1]) if incremental == "member" or n_ens_gt_one: var_ens_num = ncf.createVariable("ens_number", int, dimensions=("ens_number",)) if incremental != "member": var_ens_num[:] = list(range(1, n_ens_members + 1)) var_ens_num.long_name = "ensemble member" var_ens_num.standard_name = "realization" var_ens_num.units = "" var_time = ncf.createVariable("time", int, dimensions=("time",)) if incremental != "timestep": if n_timesteps_is_list: var_time[:] = np.array(n_timesteps) * timestep * 60 else: var_time[:] = [i * timestep * 60 for i in range(1, n_timesteps + 1)] var_time.long_name = "forecast time" startdate_str = datetime.strftime(startdate, "%Y-%m-%d %H:%M:%S") var_time.units = "seconds since %s" % startdate_str if incremental == "member" or n_ens_gt_one: var_f = ncf.createVariable( var_name, datatype=datatype, dimensions=("ens_number", "time", "y", "x"), compression="zlib", zlib=True, complevel=9, fill_value=fill_value, ) else: var_f = ncf.createVariable( var_name, datatype=datatype, dimensions=("time", "y", "x"), compression="zlib", zlib=True, complevel=9, fill_value=fill_value, ) if var_standard_name is not None: var_f.standard_name = var_standard_name var_f.long_name = var_long_name var_f.coordinates = "y x" var_f.units = var_unit if grid_mapping_var_name is not None: var_f.grid_mapping = grid_mapping_var_name # Add gain and offset if scale_factor is not None: var_f.scale_factor = scale_factor if offset is not None: var_f.add_offset = offset exporter["method"] = "netcdf" exporter["ncfile"] = ncf exporter["var_F"] = var_f if incremental == "member" or n_ens_gt_one: exporter["var_ens_num"] = var_ens_num exporter["var_time"] = var_time exporter["var_name"] = var_name exporter["startdate"] = startdate exporter["timestep"] = timestep exporter["metadata"] = metadata exporter["incremental"] = incremental exporter["num_timesteps"] = num_timesteps exporter["timesteps"] = n_timesteps exporter["num_ens_members"] = n_ens_members exporter["shape"] = shape return exporter def export_forecast_dataset(field, exporter): """Write a forecast array into a file. If the exporter was initialized with n_ens_members>1, the written dataset has dimensions (n_ens_members,num_timesteps,shape[0],shape[1]), where shape refers to the shape of the two-dimensional forecast grids. Otherwise, the dimensions are (num_timesteps,shape[0],shape[1]). If the exporter was initialized with incremental!=None, the array is appended to the existing dataset either along the ensemble member or time axis. Parameters ---------- exporter: dict An exporter object created with any initialization method implemented in :py:mod:`pysteps.io.exporters`. field: array_like The array to write. The required shape depends on the choice of the 'incremental' parameter the exporter was initialized with: +-----------------+---------------------------------------------------+ | incremental | required shape | +=================+===================================================+ | None | (num_ens_members,num_timesteps,shape[0],shape[1]) | +-----------------+---------------------------------------------------+ | 'timestep' | (num_ens_members,shape[0],shape[1]) | +-----------------+---------------------------------------------------+ | 'member' | (num_timesteps,shape[0],shape[1]) | +-----------------+---------------------------------------------------+ If the exporter was initialized with num_ens_members=1, the num_ens_members dimension is dropped. """ if exporter["method"] == "netcdf" and not NETCDF4_IMPORTED: raise MissingOptionalDependency( "netCDF4 package is required for netcdf " "exporters but it is not installed" ) if exporter["incremental"] is None: if exporter["num_ens_members"] > 1: shp = ( exporter["num_ens_members"], exporter["num_timesteps"], exporter["shape"][0], exporter["shape"][1], ) else: shp = ( exporter["num_timesteps"], exporter["shape"][0], exporter["shape"][1], ) if field.shape != shp: raise ValueError( "field has invalid shape: %s != %s" % (str(field.shape), str(shp)) ) elif exporter["incremental"] == "timestep": if exporter["num_ens_members"] > 1: shp = ( exporter["num_ens_members"], exporter["shape"][0], exporter["shape"][1], ) else: shp = exporter["shape"] if field.shape != shp: raise ValueError( "field has invalid shape: %s != %s" % (str(field.shape), str(shp)) ) elif exporter["incremental"] == "member": shp = (exporter["num_timesteps"], exporter["shape"][0], exporter["shape"][1]) if field.shape != shp: raise ValueError( "field has invalid shape: %s != %s" % (str(field.shape), str(shp)) ) if exporter["method"] == "geotiff": _export_geotiff(field, exporter) elif exporter["method"] == "netcdf": _export_netcdf(field, exporter) elif exporter["method"] == "kineros": _export_kineros(field, exporter) else: raise ValueError("unknown exporter method %s" % exporter["method"]) def close_forecast_files(exporter): """ Close the files associated with a forecast exporter. Finish writing forecasts and close the output files opened by a forecast exporter. Parameters ---------- exporter: dict An exporter object created with any initialization method implemented in :py:mod:`pysteps.io.exporters`. """ if exporter["method"] == "geotiff": pass # NOTE: There is no explicit "close" method in GDAL. # The files are closed when all objects referencing to the GDAL # datasets are deleted (i.e. when the exporter object is deleted). if exporter["method"] == "kineros": pass # no need to close the file else: exporter["ncfile"].close() def _export_geotiff(F, exporter): def init_band(band): band.SetScale(1.0) band.SetOffset(0.0) band.SetUnitType(exporter["metadata"]["unit"]) if exporter["incremental"] is None: for i in range(exporter["num_timesteps"]): if exporter["num_ens_members"] == 1: band = exporter["dst"][i].GetRasterBand(1) init_band(band) band.WriteArray(F[i, :, :]) else: for j in range(exporter["num_ens_members"]): band = exporter["dst"][i].GetRasterBand(j + 1) init_band(band) band.WriteArray(F[j, i, :, :]) elif exporter["incremental"] == "timestep": i = exporter["num_files_written"] outfn = _get_geotiff_filename( exporter["outfnprefix"], exporter["startdate"], exporter["num_timesteps"], exporter["timestep"], i, ) dst = _create_geotiff_file( outfn, exporter["driver"], exporter["shape"], exporter["metadata"], exporter["num_ens_members"], ) for j in range(exporter["num_ens_members"]): band = dst.GetRasterBand(j + 1) init_band(band) if exporter["num_ens_members"] > 1: band.WriteArray(F[j, :, :]) else: band.WriteArray(F) exporter["num_files_written"] += 1 elif exporter["incremental"] == "member": for i in range(exporter["num_timesteps"]): # NOTE: This does not work because the GeoTIFF driver does not # support adding bands. An alternative solution needs to be # implemented. exporter["dst"][i].AddBand(gdal.GDT_Float32) band = exporter["dst"][i].GetRasterBand(exporter["dst"][i].RasterCount) init_band(band) band.WriteArray(F[i, :, :]) def _export_kineros(field, exporter): num_timesteps = exporter["num_timesteps"] num_ens_members = exporter["num_ens_members"] timestep = exporter["timestep"] xgrid = exporter["XY_coords"][0, :, :].flatten() ygrid = exporter["XY_coords"][1, :, :].flatten() timemin = [(t + 1) * timestep for t in range(num_timesteps)] if field.ndim == 3: field = field.reshape((1,) + field.shape) for n in range(num_ens_members): file_name = exporter["ncfile"][n] field_tmp = field[n, :, :, :].reshape((num_timesteps, -1)) if exporter["var_name"] == "Depth": field_tmp = np.cumsum(field_tmp, axis=0) with open(file_name, "a") as fd: for m in range(field_tmp.shape[1]): fd.writelines("BEGIN RG%03d\n" % (m + 1)) fd.writelines(" X = %.2f, Y = %.2f\n" % (xgrid[m], ygrid[m])) fd.writelines(" N = %i\n" % num_timesteps) fd.writelines(" TIME %s\n" % exporter["var_name"].upper()) fd.writelines("! (min) (%s)\n" % exporter["var_unit"]) for t in range(num_timesteps): line_new = "{:6.1f} {:11.2f}\n".format(timemin[t], field_tmp[t, m]) fd.writelines(line_new) fd.writelines("END\n\n") def _export_netcdf(field, exporter): var_f = exporter["var_F"] if exporter["incremental"] is None: var_f[:] = field elif exporter["incremental"] == "timestep": if exporter["num_ens_members"] > 1: var_f[:, var_f.shape[1], :, :] = field else: var_f[var_f.shape[0], :, :] = field var_time = exporter["var_time"] if isinstance(exporter["timesteps"], list): var_time[len(var_time) - 1] = ( exporter["timesteps"][len(var_time) - 1] * exporter["timestep"] * 60 ) else: var_time[len(var_time) - 1] = len(var_time) * exporter["timestep"] * 60 else: var_f[var_f.shape[0], :, :, :] = field var_ens_num = exporter["var_ens_num"] var_ens_num[len(var_ens_num) - 1] = len(var_ens_num) # TODO(exporters): Write methods for converting Proj.4 projection definitions # into CF grid mapping attributes. Currently this has been implemented for # the stereographic projection. # The conversions implemented here are take from: # https://github.com/cf-convention/cf-convention.github.io/blob/master/wkt-proj-4.md def _convert_proj4_to_grid_mapping(proj4str): tokens = proj4str.split("+") d = {} for t in tokens[1:]: t = t.split("=") if len(t) > 1: d[t[0]] = t[1].strip() params = {} # TODO(exporters): implement more projection types here if d["proj"] == "stere": grid_mapping_var_name = "polar_stereographic" grid_mapping_name = "polar_stereographic" v = d["lon_0"] if d["lon_0"][-1] not in ["E", "W"] else d["lon_0"][:-1] params["straight_vertical_longitude_from_pole"] = float(v) v = d["lat_0"] if d["lat_0"][-1] not in ["N", "S"] else d["lat_0"][:-1] params["latitude_of_projection_origin"] = float(v) if "lat_ts" in list(d.keys()): params["standard_parallel"] = float(d["lat_ts"]) elif "k_0" in list(d.keys()): params["scale_factor_at_projection_origin"] = float(d["k_0"]) params["false_easting"] = float(d["x_0"]) params["false_northing"] = float(d["y_0"]) elif d["proj"] == "aea": # Albers Conical Equal Area grid_mapping_var_name = "proj" grid_mapping_name = "albers_conical_equal_area" params["false_easting"] = float(d["x_0"]) if "x_0" in d else float(0) params["false_northing"] = float(d["y_0"]) if "y_0" in d else float(0) v = d["lon_0"] if "lon_0" in d else float(0) params["longitude_of_central_meridian"] = float(v) v = d["lat_0"] if "lat_0" in d else float(0) params["latitude_of_projection_origin"] = float(v) v1 = d["lat_1"] if "lat_1" in d else float(0) v2 = d["lat_2"] if "lat_2" in d else float(0) params["standard_parallel"] = (float(v1), float(v2)) elif d["proj"] == "lcc": grid_mapping_var_name = "lcc" grid_mapping_name = "lambert_conformal_conic" params["false_easting"] = float(d["x_0"]) if "x_0" in d else float(0) params["false_northing"] = float(d["y_0"]) if "y_0" in d else float(0) v = d["lon_0"] if "lon_0" in d else float(0) params["longitude_of_central_meridian"] = float(v) v = d["lat_0"] if "lat_0" in d else float(0) params["latitude_of_projection_origin"] = float(v) v1 = d["lat_1"] if "lat_1" in d else float(0) v2 = d["lat_2"] if "lat_2" in d else float(0) params["standard_parallel"] = (float(v1), float(v2)) v = d["ellps"] if "ellps" in d else "" if len(v): params["reference_ellipsoid_name"] = v v = d["towgs84"] if "towgs84" in d else "" if len(v): params["towgs84"] = v else: print("unknown projection", d["proj"]) return None, None, None return grid_mapping_var_name, grid_mapping_name, params def _create_geotiff_file(outfn, driver, shape, metadata, num_bands): dst = driver.Create( outfn, shape[1], shape[0], num_bands, gdal.GDT_Float32, ["COMPRESS=DEFLATE", "PREDICTOR=3"], ) sx = (metadata["x2"] - metadata["x1"]) / shape[1] sy = (metadata["y2"] - metadata["y1"]) / shape[0] dst.SetGeoTransform([metadata["x1"], sx, 0.0, metadata["y2"], 0.0, -sy]) sr = osr.SpatialReference() sr.ImportFromProj4(metadata["projection"]) dst.SetProjection(sr.ExportToWkt()) return dst def _get_geotiff_filename(prefix, startdate, n_timesteps, timestep, timestep_index): if n_timesteps * timestep == 0: raise ValueError("n_timesteps x timestep can't be 0.") timestep_format_str = ( f"{{time_str:0{int(np.floor(np.log10(n_timesteps * timestep))) + 1}d}}" ) startdate_str = datetime.strftime(startdate, "%Y%m%d%H%M") timestep_str = timestep_format_str.format(time_str=(timestep_index + 1) * timestep) return f"{prefix}_{startdate_str}_{timestep_str}.tif" ================================================ FILE: pysteps/io/importers.py ================================================ """ pysteps.io.importers ==================== Methods for importing files containing two-dimensional radar mosaics. The methods in this module implement the following interface:: import_xxx(filename, optional arguments) where **xxx** is the name (or abbreviation) of the file format and filename is the name of the input file. The output of each method is a three-element tuple containing a two-dimensional radar mosaic, the corresponding quality field and a metadata dictionary. If the file contains no quality information, the quality field is set to None. Pixels containing missing data are set to nan. The metadata dictionary contains the following recommended key-value pairs: .. tabularcolumns:: |p{2cm}|L| +------------------+----------------------------------------------------------+ | Key | Value | +==================+==========================================================+ | projection | PROJ.4-compatible projection definition | +------------------+----------------------------------------------------------+ | x1 | x-coordinate of the lower-left corner of the data raster | +------------------+----------------------------------------------------------+ | y1 | y-coordinate of the lower-left corner of the data raster | +------------------+----------------------------------------------------------+ | x2 | x-coordinate of the upper-right corner of the data raster| +------------------+----------------------------------------------------------+ | y2 | y-coordinate of the upper-right corner of the data raster| +------------------+----------------------------------------------------------+ | xpixelsize | grid resolution in x-direction | +------------------+----------------------------------------------------------+ | ypixelsize | grid resolution in y-direction | +------------------+----------------------------------------------------------+ | cartesian_unit | the physical unit of the cartesian x- and y-coordinates: | | | e.g. 'm' or 'km' | +------------------+----------------------------------------------------------+ | yorigin | a string specifying the location of the first element in | | | the data raster w.r.t. y-axis: | | | 'upper' = upper border | | | 'lower' = lower border | +------------------+----------------------------------------------------------+ | institution | name of the institution who provides the data | +------------------+----------------------------------------------------------+ | unit | the physical unit of the data: 'mm/h', 'mm' or 'dBZ' | +------------------+----------------------------------------------------------+ | transform | the transformation of the data: None, 'dB', 'Box-Cox' or | | | others | +------------------+----------------------------------------------------------+ | accutime | the accumulation time in minutes of the data, float | +------------------+----------------------------------------------------------+ | threshold | the rain/no rain threshold with the same unit, | | | transformation and accutime of the data. | +------------------+----------------------------------------------------------+ | zerovalue | the value assigned to the no rain pixels with the same | | | unit, transformation and accutime of the data. | +------------------+----------------------------------------------------------+ | zr_a | the Z-R constant a in Z = a*R**b | +------------------+----------------------------------------------------------+ | zr_b | the Z-R exponent b in Z = a*R**b | +------------------+----------------------------------------------------------+ Available Importers ------------------- .. autosummary:: :toctree: ../generated/ import_bom_rf3 import_fmi_geotiff import_fmi_pgm import_knmi_hdf5 import_mch_gif import_mch_hdf5 import_mch_metranet import_mrms_grib import_odim_hdf5 import_opera_hdf5 import_saf_crri import_dwd_hdf5 import_dwd_radolan """ import gzip import os import array import datetime from functools import partial import numpy as np from matplotlib.pyplot import imread from pysteps.decorators import postprocess_import from pysteps.exceptions import DataModelError from pysteps.exceptions import MissingOptionalDependency from pysteps.utils import aggregate_fields try: from osgeo import gdal, gdalconst, osr # Preserve current behavior explicitly (no GDAL exceptions) and avoid the # GDAL 4.0 future-warning emitted when neither mode is selected. if hasattr(gdal, "DontUseExceptions"): gdal.DontUseExceptions() GDAL_IMPORTED = True except ImportError: GDAL_IMPORTED = False try: import h5py H5PY_IMPORTED = True except ImportError: H5PY_IMPORTED = False try: import metranet METRANET_IMPORTED = True except ImportError: METRANET_IMPORTED = False try: import netCDF4 NETCDF4_IMPORTED = True except ImportError: NETCDF4_IMPORTED = False try: from PIL import Image PIL_IMPORTED = True except ImportError: PIL_IMPORTED = False try: import pyproj PYPROJ_IMPORTED = True except ImportError: PYPROJ_IMPORTED = False try: import pygrib PYGRIB_IMPORTED = True except ImportError: PYGRIB_IMPORTED = False def _check_coords_range(selected_range, coordinate, full_range): """ Check that the coordinates range arguments follow the expected pattern in the **import_mrms_grib** function.""" if selected_range is None: return sorted(full_range) if not isinstance(selected_range, (list, tuple)): if len(selected_range) != 2: raise ValueError( f"The {coordinate} range must be None or a two-element tuple or list" ) selected_range = list(selected_range) # Make mutable for i in range(2): if selected_range[i] is None: selected_range[i] = full_range selected_range.sort() return tuple(selected_range) def _get_grib_projection(grib_msg): """Get the projection parameters from the grib file.""" projparams = grib_msg.projparams # Some versions of pygrib defines the regular lat/lon projections as "cyl", # which causes errors in pyproj and cartopy. Here we replace it for "longlat". if projparams["proj"] == "cyl": projparams["proj"] = "longlat" # Grib C tables (3-2) # https://apps.ecmwf.int/codes/grib/format/grib2/ctables/3/2 # https://en.wikibooks.org/wiki/PROJ.4 _grib_shapes_of_earth = dict() _grib_shapes_of_earth[0] = {"R": 6367470} _grib_shapes_of_earth[1] = {"R": 6367470} _grib_shapes_of_earth[2] = {"ellps": "IAU76"} _grib_shapes_of_earth[4] = {"ellps": "GRS80"} _grib_shapes_of_earth[5] = {"ellps": "WGS84"} _grib_shapes_of_earth[6] = {"R": 6371229} _grib_shapes_of_earth[8] = { "datum": "WGS84", "R": 6371200, } _grib_shapes_of_earth[9] = {"datum": "OSGB36"} # pygrib defines the ellipsoids using "a" and "b" only. # Here we replace the for the PROJ.4 SpheroidCodes if they are available. if grib_msg["shapeOfTheEarth"] in _grib_shapes_of_earth: keys_to_remove = ["a", "b"] for key in keys_to_remove: if key in projparams: del projparams[key] projparams.update(_grib_shapes_of_earth[grib_msg["shapeOfTheEarth"]]) return projparams def _get_threshold_value(precip): """ Get the the rain/no rain threshold with the same unit, transformation and accutime of the data. If all the values are NaNs, the returned value is `np.nan`. Otherwise, np.min(precip[precip > precip.min()]) is returned. Returns ------- threshold: float """ valid_mask = np.isfinite(precip) if valid_mask.any(): _precip = precip[valid_mask] min_precip = _precip.min() above_min_mask = _precip > min_precip if above_min_mask.any(): return np.min(_precip[above_min_mask]) else: return min_precip else: return np.nan @postprocess_import(dtype="float32") def import_mrms_grib(filename, extent=None, window_size=4, **kwargs): """ Importer for NSSL's Multi-Radar/Multi-Sensor System ([MRMS](https://www.nssl.noaa.gov/projects/mrms/)) rainrate product (grib format). The rainrate values are expressed in mm/h, and the dimensions of the data array are [latitude, longitude]. The first grid point (0,0) corresponds to the upper left corner of the domain, while (last i, last j) denote the lower right corner. Due to the large size of the dataset (3500 x 7000), a float32 type is used by default to reduce the memory footprint. However, be aware that when this array is passed to a pystep function, it may be converted to double precision, doubling the memory footprint. To change the precision of the data, use the ``dtype`` keyword. Also, by default, the original data is downscaled by 4 (resulting in a ~4 km grid spacing). In case that the original grid spacing is needed, use ``window_size=1``. But be aware that a single composite in double precipitation will require 186 Mb of memory. Finally, if desired, the precipitation data can be extracted over a sub region of the full domain using the `extent` keyword. By default, the entire domain is returned. Notes ----- In the MRMS grib files, "-3" is used to represent "No Coverage" or "Missing data". However, in this reader replace those values by the value specified in the `fillna` argument (NaN by default). Note that "missing values" are not the same as "no precipitation" values. Missing values indicates regions with no valid measures. While zero precipitation indicates regions with valid measurements, but with no precipitation detected. Parameters ---------- filename: str Name of the file to import. extent: None or array-like Longitude and latitude range (in degrees) of the data to be retrieved. (min_lon, max_lon, min_lat, max_lat). By default (None), the entire domain is retrieved. The extent can be in any form that can be converted to a flat array of 4 elements array (e.g., lists or tuples). window_size: array_like or int Array containing down-sampling integer factor along each axis. If an integer value is given, the same block shape is used for all the image dimensions. Default: window_size=4. {extra_kwargs_doc} Returns ------- precipitation: 2D array, float32 Precipitation field in mm/h. The dimensions are [latitude, longitude]. The first grid point (0,0) corresponds to the upper left corner of the domain, while (last i, last j) denote the lower right corner. quality: None Not implement. metadata: dict Associated metadata (pixel sizes, map projections, etc.). """ del kwargs if not PYGRIB_IMPORTED: raise MissingOptionalDependency( "pygrib package is required to import NCEP's MRMS products but it is not installed" ) try: grib_file = pygrib.open(filename) except OSError: raise OSError(f"Error opening NCEP's MRMS file. " f"File Not Found: {filename}") if isinstance(window_size, int): window_size = (window_size, window_size) if extent is not None: extent = np.asarray(extent) if (extent.ndim != 1) or (extent.size != 4): raise ValueError( "The extent must be None or a flat array with 4 elements.\n" f"Received: extent.shape = {str(extent.shape)}" ) # The MRMS grib file contain one message with the precipitation intensity grib_file.rewind() grib_msg = grib_file.read(1)[0] # Read the only message # ------------------------- # Read the grid information lr_lon = grib_msg["longitudeOfLastGridPointInDegrees"] lr_lat = grib_msg["latitudeOfLastGridPointInDegrees"] ul_lon = grib_msg["longitudeOfFirstGridPointInDegrees"] ul_lat = grib_msg["latitudeOfFirstGridPointInDegrees"] # Ni - Number of points along a latitude circle (west-east) # Nj - Number of points along a longitude meridian (south-north) # The lat/lon grid has a 0.01 degrees spacing. lats = np.linspace(ul_lat, lr_lat, grib_msg["Nj"]) lons = np.linspace(ul_lon, lr_lon, grib_msg["Ni"]) precip = grib_msg.values no_data_mask = precip == -3 # Missing values # Create a function with default arguments for aggregate_fields block_reduce = partial(aggregate_fields, method="mean", trim=True) if window_size != (1, 1): # Downscale data lats = block_reduce(lats, window_size[0]) lons = block_reduce(lons, window_size[1]) # Update the limits ul_lat, lr_lat = ( lats[0], lats[-1], ) # Lat from North to south! ul_lon, lr_lon = lons[0], lons[-1] precip[no_data_mask] = 0 # block_reduce does not handle nan values precip = block_reduce(precip, window_size, axis=(0, 1)) # Consider that if a single invalid observation is located in the block, # then mark that value as invalid. no_data_mask = block_reduce( no_data_mask.astype("int"), window_size, axis=(0, 1), ).astype(bool) lons, lats = np.meshgrid(lons, lats) precip[no_data_mask] = np.nan if extent is not None: # clip domain ul_lon, lr_lon = _check_coords_range( (extent[0], extent[1]), "longitude", (ul_lon, lr_lon), ) lr_lat, ul_lat = _check_coords_range( (extent[2], extent[3]), "latitude", (ul_lat, lr_lat), ) mask_lat = (lats >= lr_lat) & (lats <= ul_lat) mask_lon = (lons >= ul_lon) & (lons <= lr_lon) nlats = np.count_nonzero(mask_lat[:, 0]) nlons = np.count_nonzero(mask_lon[0, :]) precip = precip[mask_lon & mask_lat].reshape(nlats, nlons) proj_params = _get_grib_projection(grib_msg) pr = pyproj.Proj(proj_params) proj_def = " ".join([f"+{key}={value} " for key, value in proj_params.items()]) xsize = grib_msg["iDirectionIncrementInDegrees"] * window_size[0] ysize = grib_msg["jDirectionIncrementInDegrees"] * window_size[1] x1, y1 = pr(ul_lon, lr_lat) x2, y2 = pr(lr_lon, ul_lat) metadata = dict( institution="NOAA National Severe Storms Laboratory", xpixelsize=xsize, ypixelsize=ysize, unit="mm/h", accutime=2.0, transform=None, zerovalue=0, projection=proj_def.strip(), yorigin="upper", threshold=_get_threshold_value(precip), x1=x1 - xsize / 2, x2=x2 + xsize / 2, y1=y1 - ysize / 2, y2=y2 + ysize / 2, cartesian_unit="degrees", ) return precip, None, metadata @postprocess_import() def import_bom_rf3(filename, **kwargs): """ Import a NetCDF radar rainfall product from the BoM Rainfields3. Parameters ---------- filename: str Name of the file to import. {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the rainfall field in mm/h imported from the Bureau RF3 netcdf, the quality field and the metadata. The quality field is currently set to None. """ if not NETCDF4_IMPORTED: raise MissingOptionalDependency( "netCDF4 package is required to import BoM Rainfields3 products " "but it is not installed" ) precip, geodata = _import_bom_rf3_data(filename) metadata = geodata metadata["transform"] = None metadata["zerovalue"] = np.nanmin(precip) metadata["threshold"] = _get_threshold_value(precip) return precip, None, metadata def _import_bom_rf3_data(filename): ds_rainfall = netCDF4.Dataset(filename) geodata = _import_bom_rf3_geodata(ds_rainfall) if "precipitation" in ds_rainfall.variables.keys(): precipitation = ds_rainfall.variables["precipitation"][:] else: precipitation = None ds_rainfall.close() return precipitation, geodata def _import_bom_rf3_geodata(ds_rainfall): geodata = {} if "proj" in ds_rainfall.variables.keys(): projection = ds_rainfall.variables["proj"] if getattr(projection, "grid_mapping_name") == "albers_conical_equal_area": projdef = "+proj=aea " lon_0 = getattr(projection, "longitude_of_central_meridian") projdef += " +lon_0=" + f"{lon_0:.3f}" lat_0 = getattr(projection, "latitude_of_projection_origin") projdef += " +lat_0=" + f"{lat_0:.3f}" standard_parallels = getattr(projection, "standard_parallel") projdef += " +lat_1=" + f"{standard_parallels[0]:.3f}" projdef += " +lat_2=" + f"{standard_parallels[1]:.3f}" else: projdef = None geodata["projection"] = projdef if "valid_min" in ds_rainfall.variables["x"].ncattrs(): xmin = getattr(ds_rainfall.variables["x"], "valid_min") xmax = getattr(ds_rainfall.variables["x"], "valid_max") ymin = getattr(ds_rainfall.variables["y"], "valid_min") ymax = getattr(ds_rainfall.variables["y"], "valid_max") else: xmin = min(ds_rainfall.variables["x"]) xmax = max(ds_rainfall.variables["x"]) ymin = min(ds_rainfall.variables["y"]) ymax = max(ds_rainfall.variables["y"]) xpixelsize = abs(ds_rainfall.variables["x"][1] - ds_rainfall.variables["x"][0]) ypixelsize = abs(ds_rainfall.variables["y"][1] - ds_rainfall.variables["y"][0]) factor_scale = 1.0 if "units" in ds_rainfall.variables["x"].ncattrs(): if getattr(ds_rainfall.variables["x"], "units") == "km": factor_scale = 1000.0 geodata["x1"] = xmin * factor_scale geodata["y1"] = ymin * factor_scale geodata["x2"] = xmax * factor_scale geodata["y2"] = ymax * factor_scale geodata["xpixelsize"] = xpixelsize * factor_scale geodata["ypixelsize"] = ypixelsize * factor_scale geodata["cartesian_unit"] = "m" geodata["yorigin"] = "upper" # get the accumulation period valid_time = None if "valid_time" in ds_rainfall.variables.keys(): times = ds_rainfall.variables["valid_time"] calendar = "standard" if "calendar" in times.ncattrs(): calendar = times.calendar valid_time = netCDF4.num2date(times[:], units=times.units, calendar=calendar) start_time = None if "start_time" in ds_rainfall.variables.keys(): times = ds_rainfall.variables["start_time"] calendar = "standard" if "calendar" in times.ncattrs(): calendar = times.calendar start_time = netCDF4.num2date(times[:], units=times.units, calendar=calendar) time_step = None if start_time is not None: if valid_time is not None: time_step = (valid_time - start_time).seconds // 60 geodata["accutime"] = time_step # get the unit of precipitation if "units" in ds_rainfall.variables["precipitation"].ncattrs(): units = getattr(ds_rainfall.variables["precipitation"], "units") if units in ("kg m-2", "mm"): geodata["unit"] = "mm" geodata["institution"] = "Commonwealth of Australia, Bureau of Meteorology" return geodata @postprocess_import() def import_fmi_geotiff(filename, **kwargs): """ Import a reflectivity field (dBZ) from an FMI GeoTIFF file. Parameters ---------- filename: str Name of the file to import. {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the precipitation field, the associated quality field and metadata. The quality field is currently set to None. """ if not GDAL_IMPORTED: raise MissingOptionalDependency( "gdal package is required to import " "FMI's radar reflectivity composite in GeoTIFF format " "but it is not installed" ) f = gdal.Open(filename, gdalconst.GA_ReadOnly) rb = f.GetRasterBand(1) precip = rb.ReadAsArray().astype(float) mask = precip == 255 precip = (precip - 64.0) / 2.0 precip[mask] = np.nan sr = osr.SpatialReference() pr = f.GetProjection() sr.ImportFromWkt(pr) projdef = sr.ExportToProj4() gt = f.GetGeoTransform() metadata = {} metadata["projection"] = projdef metadata["x1"] = gt[0] metadata["y1"] = gt[3] + gt[5] * f.RasterYSize metadata["x2"] = metadata["x1"] + gt[1] * f.RasterXSize metadata["y2"] = gt[3] metadata["xpixelsize"] = abs(gt[1]) metadata["ypixelsize"] = abs(gt[5]) if gt[5] < 0: metadata["yorigin"] = "upper" else: metadata["yorigin"] = "lower" metadata["institution"] = "Finnish Meteorological Institute" metadata["unit"] = "dBZ" metadata["transform"] = "dB" metadata["accutime"] = 5.0 metadata["threshold"] = _get_threshold_value(precip) metadata["zerovalue"] = np.nanmin(precip) metadata["cartesian_unit"] = "m" metadata["zr_a"] = 223.0 metadata["zr_b"] = 1.53 return precip, None, metadata @postprocess_import() def import_fmi_pgm(filename, gzipped=False, **kwargs): """ Import a 8-bit PGM radar reflectivity composite from the FMI archive. Parameters ---------- filename: str Name of the file to import. gzipped: bool If True, the input file is treated as a compressed gzip file. {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the reflectivity composite in dBZ and the associated quality field and metadata. The quality field is currently set to None. Notes ----- Reading georeferencing metadata is supported only for stereographic projection. For other projections, the keys related to georeferencing are not set. """ if not PYPROJ_IMPORTED: raise MissingOptionalDependency( "pyproj package is required to import " "FMI's radar reflectivity composite " "but it is not installed" ) if gzipped is False: precip = imread(filename) else: precip = imread(gzip.open(filename, "r")) pgm_metadata = _import_fmi_pgm_metadata(filename, gzipped=gzipped) geodata = _import_fmi_pgm_geodata(pgm_metadata) mask = precip == pgm_metadata["missingval"] precip = precip.astype(float) precip[mask] = np.nan precip = (precip - 64.0) / 2.0 metadata = geodata metadata["institution"] = "Finnish Meteorological Institute" metadata["accutime"] = 5.0 metadata["unit"] = "dBZ" metadata["transform"] = "dB" metadata["zerovalue"] = np.nanmin(precip) metadata["threshold"] = _get_threshold_value(precip) metadata["zr_a"] = 223.0 metadata["zr_b"] = 1.53 return precip, None, metadata def _import_fmi_pgm_geodata(metadata): geodata = {} projdef = "" if "type" in metadata.keys() and metadata["type"][0] == "stereographic": projdef += "+proj=stere " projdef += " +lon_0=" + metadata["centrallongitude"][0] + "E" projdef += " +lat_0=" + metadata["centrallatitude"][0] + "N" projdef += " +lat_ts=" + metadata["truelatitude"][0] # These are hard-coded because the projection definition # is missing from the PGM files. projdef += " +a=6371288" projdef += " +x_0=380886.310" projdef += " +y_0=3395677.920" projdef += " +no_defs" # geodata["projection"] = projdef ll_lon, ll_lat = [float(v) for v in metadata["bottomleft"]] ur_lon, ur_lat = [float(v) for v in metadata["topright"]] pr = pyproj.Proj(projdef) x1, y1 = pr(ll_lon, ll_lat) x2, y2 = pr(ur_lon, ur_lat) geodata["x1"] = x1 geodata["y1"] = y1 geodata["x2"] = x2 geodata["y2"] = y2 geodata["cartesian_unit"] = "m" geodata["xpixelsize"] = float(metadata["metersperpixel_x"][0]) geodata["ypixelsize"] = float(metadata["metersperpixel_y"][0]) geodata["yorigin"] = "upper" return geodata def _import_fmi_pgm_metadata(filename, gzipped=False): metadata = {} if not gzipped: f = open(filename, "rb") else: f = gzip.open(filename, "rb") file_line = f.readline() while not file_line.startswith(b"#"): file_line = f.readline() while file_line.startswith(b"#"): x = file_line.decode() x = x[1:].strip().split(" ") if len(x) >= 2: k = x[0] v = x[1:] metadata[k] = v else: file_line = f.readline() continue file_line = f.readline() file_line = f.readline().decode() metadata["missingval"] = int(file_line) f.close() return metadata @postprocess_import() def import_knmi_hdf5( filename, qty="ACRR", accutime=5.0, pixelsize=1000.0, **kwargs, ): """ Import a precipitation or reflectivity field (and optionally the quality field) from a HDF5 file conforming to the KNMI Data Centre specification. Parameters ---------- filename: str Name of the file to import. qty: {'ACRR', 'DBZH'} The quantity to read from the file. The currently supported identifiers are: 'ACRR'=hourly rainfall accumulation (mm) and 'DBZH'=max-reflectivity (dBZ). The default value is 'ACRR'. accutime: float The accumulation time of the dataset in minutes. A 5 min accumulation is used as default, but hourly, daily and monthly accumulations are also available. pixelsize: float The pixel size of a raster cell in meters. The default value for the KNMI datasets is a 1000 m grid cell size, but datasets with 2400 m pixel size are also available. {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing precipitation accumulation [mm] / reflectivity [dBZ] of the KNMI product, the associated quality field and metadata. The quality field is currently set to None. Notes ----- Every KNMI data type has a slightly different naming convention. The standard setup is based on the accumulated rainfall product on 1 km2 spatial and 5 min temporal resolution. See https://data.knmi.nl/datasets?q=radar for a list of all available KNMI radar data. """ # TODO: Add quality field. if not H5PY_IMPORTED: raise MissingOptionalDependency( "h5py package is required to import " "KNMI's radar datasets " "but it is not installed" ) if qty not in ["ACRR", "DBZH"]: raise ValueError( "unknown quantity %s: the available options are 'ACRR' and 'DBZH' " ) #### # Precipitation fields #### f = h5py.File(filename, "r") dset = f["image1"]["image_data"] precip_intermediate = np.copy(dset) # copy the content # In case precip is a rainfall accumulation (ACRR), precip is divided by 100.0, # because the data is saved as hundreds of mm (so, as integers). 65535 is # the no data value. The precision of the data is two decimals (0.01 mm). if qty == "ACRR": precip = np.where( precip_intermediate == 65535, np.nan, precip_intermediate / 100.0, ) # In case reflectivities are imported, the no data value is 255. Values are # saved as integers. The reflectivities are not directly saved in dBZ, but # as: dBZ = 0.5 * pixel_value - 32.0 (this used to be 31.5). if qty == "DBZH": precip = np.where( precip_intermediate == 255, np.nan, precip_intermediate * 0.5 - 32.0, ) if precip is None: raise IOError("requested quantity not found") #### # Meta data #### metadata = {} if qty == "ACRR": unit = "mm" transform = None elif qty == "DBZH": unit = "dBZ" transform = "dB" # The 'where' group of mch- and Opera-data, is called 'geographic' in the # KNMI data. geographic = f["geographic"] proj4str = "+proj=stere +lat_0=90 +lon_0=0.0 +lat_ts=60.0 +a=6378137 +b=6356752 +x_0=0 +y_0=0" pr = pyproj.Proj(proj4str) metadata["projection"] = proj4str # Get coordinates latlon_corners = geographic.attrs["geo_product_corners"] ll_lat = latlon_corners[1] ll_lon = latlon_corners[0] ur_lat = latlon_corners[5] ur_lon = latlon_corners[4] lr_lat = latlon_corners[7] lr_lon = latlon_corners[6] ul_lat = latlon_corners[3] ul_lon = latlon_corners[2] ll_x, ll_y = pr(ll_lon, ll_lat) ur_x, ur_y = pr(ur_lon, ur_lat) lr_x, lr_y = pr(lr_lon, lr_lat) ul_x, ul_y = pr(ul_lon, ul_lat) x1 = min(ll_x, ul_x) y1 = min(ll_y, lr_y) x2 = max(lr_x, ur_x) y2 = max(ul_y, ur_y) # Fill in the metadata metadata["x1"] = x1 metadata["y1"] = y1 metadata["x2"] = x2 metadata["y2"] = y2 metadata["xpixelsize"] = pixelsize metadata["ypixelsize"] = pixelsize metadata["cartesian_unit"] = "m" metadata["yorigin"] = "upper" metadata["institution"] = "KNMI - Royal Netherlands Meteorological Institute" metadata["accutime"] = accutime metadata["unit"] = unit metadata["transform"] = transform metadata["zerovalue"] = 0.0 metadata["threshold"] = _get_threshold_value(precip) metadata["zr_a"] = 200.0 metadata["zr_b"] = 1.6 f.close() return precip, None, metadata @postprocess_import() def import_mch_gif(filename, product, unit, accutime, **kwargs): """ Import a 8-bit gif radar reflectivity composite from the MeteoSwiss archive. Parameters ---------- filename: str Name of the file to import. product: {"AQC", "CPC", "RZC", "AZC"} The name of the MeteoSwiss QPE product.\n Currently supported prducts: +------+----------------------------+ | Name | Product | +======+============================+ | AQC | Acquire | +------+----------------------------+ | CPC | CombiPrecip | +------+----------------------------+ | RZC | Precip | +------+----------------------------+ | AZC | RZC accumulation | +------+----------------------------+ unit: {"mm/h", "mm", "dBZ"} the physical unit of the data accutime: float the accumulation time in minutes of the data {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the precipitation field in mm/h imported from a MeteoSwiss gif file and the associated quality field and metadata. The quality field is currently set to None. """ if not PIL_IMPORTED: raise MissingOptionalDependency( "PIL package is required to import " "radar reflectivity composite from MeteoSwiss" "but it is not installed" ) geodata = _import_mch_geodata() metadata = geodata # import gif file img = Image.open(filename) if product.lower() in ["azc", "rzc", "precip"]: # convert 8-bit GIF colortable to RGB values img_rgb = img.convert("RGB") # load lookup table if product.lower() == "azc": lut_filename = os.path.join( os.path.dirname(__file__), "mch_lut_8bit_Metranet_AZC_V104.txt", ) else: lut_filename = os.path.join( os.path.dirname(__file__), "mch_lut_8bit_Metranet_v103.txt", ) lut = np.genfromtxt(lut_filename, skip_header=1) lut = dict( zip( zip(lut[:, 1], lut[:, 2], lut[:, 3]), lut[:, -1], ) ) # apply lookup table conversion precip = np.zeros(len(img_rgb.getdata())) for i, dn in enumerate(img_rgb.getdata()): precip[i] = lut.get(dn, np.nan) # convert to original shape width, height = img.size precip = precip.reshape(height, width) # set values outside observational range to NaN, # and values in non-precipitating areas to zero. precip[precip < 0] = 0 precip[precip > 9999] = np.nan elif product.lower() in [ "aqc", "cpc", "acquire ", "combiprecip", ]: # convert digital numbers to physical values img = np.array(img).astype(int) # build lookup table [mm/5min] lut = np.zeros(256) a = 316.0 b = 1.5 for i in range(256): if (i < 2) or (i > 250 and i < 255): lut[i] = 0.0 elif i == 255: lut[i] = np.nan else: lut[i] = (10.0 ** ((i - 71.5) / 20.0) / a) ** (1.0 / b) # apply lookup table precip = lut[img] else: raise ValueError("unknown product %s" % product) metadata["accutime"] = accutime metadata["unit"] = unit metadata["transform"] = None metadata["zerovalue"] = np.nanmin(precip) metadata["threshold"] = _get_threshold_value(precip) metadata["institution"] = "MeteoSwiss" metadata["product"] = product metadata["zr_a"] = 316.0 metadata["zr_b"] = 1.5 return precip, None, metadata @postprocess_import() def import_mch_hdf5(filename, qty="RATE", **kwargs): """ Import a precipitation field (and optionally the quality field) from a MeteoSwiss HDF5 file conforming to the ODIM specification. Parameters ---------- filename: str Name of the file to import. qty: {'RATE', 'ACRR', 'DBZH'} The quantity to read from the file. The currently supported identitiers are: 'RATE'=instantaneous rain rate (mm/h), 'ACRR'=hourly rainfall accumulation (mm) and 'DBZH'=max-reflectivity (dBZ). The default value is 'RATE'. {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the OPERA product for the requested quantity and the associated quality field and metadata. The quality field is read from the file if it contains a dataset whose quantity identifier is 'QIND'. """ if not H5PY_IMPORTED: raise MissingOptionalDependency( "h5py package is required to import " "radar reflectivity composites using ODIM HDF5 specification " "but it is not installed" ) if qty not in ["ACRR", "DBZH", "RATE"]: raise ValueError( "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'" ) f = h5py.File(filename, "r") precip = None quality = None for dsg in f.items(): if dsg[0].startswith("dataset"): what_grp_found = False # check if the "what" group is in the "dataset" group if "what" in list(dsg[1].keys()): qty_, gain, offset, nodata, undetect = _read_mch_hdf5_what_group( dsg[1]["what"] ) what_grp_found = True for dg in dsg[1].items(): if dg[0][0:4] == "data": # check if the "what" group is in the "data" group if "what" in list(dg[1].keys()): ( qty_, gain, offset, nodata, undetect, ) = _read_mch_hdf5_what_group(dg[1]["what"]) elif not what_grp_found: raise DataModelError( "Non ODIM compliant file: " "no what group found from {} " "or its subgroups".format(dg[0]) ) if qty_.decode() in [qty, "QIND"]: arr = dg[1]["data"][...] mask_n = arr == nodata mask_u = arr == undetect mask = np.logical_and(~mask_u, ~mask_n) if qty_.decode() == qty: precip = np.empty(arr.shape) precip[mask] = arr[mask] * gain + offset precip[mask_u] = np.nan precip[mask_n] = np.nan elif qty_.decode() == "QIND": quality = np.empty(arr.shape, dtype=float) quality[mask] = arr[mask] quality[~mask] = np.nan if precip is None: raise IOError("requested quantity %s not found" % qty) where = f["where"] geodata = _import_mch_geodata() metadata = geodata # TODO: use those from the hdf5 file instead # xpixelsize = where.attrs["xscale"] * 1000.0 # ypixelsize = where.attrs["yscale"] * 1000.0 # xsize = where.attrs["xsize"] # ysize = where.attrs["ysize"] if qty == "ACRR": unit = "mm" transform = None elif qty == "DBZH": unit = "dBZ" transform = "dB" else: unit = "mm/h" transform = None if np.any(np.isfinite(precip)): thr = np.nanmin(precip[precip > np.nanmin(precip)]) else: thr = np.nan metadata.update( { "yorigin": "upper", "institution": "MeteoSwiss", "accutime": 5.0, "unit": unit, "transform": transform, "zerovalue": np.nanmin(precip), "threshold": thr, "zr_a": 316.0, "zr_b": 1.5, } ) f.close() return precip, quality, metadata def _read_mch_hdf5_what_group(whatgrp): qty = whatgrp.attrs["quantity"] if "quantity" in whatgrp.attrs.keys() else "RATE" gain = whatgrp.attrs["gain"] if "gain" in whatgrp.attrs.keys() else 1.0 offset = whatgrp.attrs["offset"] if "offset" in whatgrp.attrs.keys() else 0.0 nodata = whatgrp.attrs["nodata"] if "nodata" in whatgrp.attrs.keys() else 0 undetect = whatgrp.attrs["undetect"] if "undetect" in whatgrp.attrs.keys() else -1.0 return qty, gain, offset, nodata, undetect @postprocess_import() def import_mch_metranet(filename, product, unit, accutime): """ Import a 8-bit bin radar reflectivity composite from the MeteoSwiss archive. Parameters ---------- filename: str Name of the file to import. product: {"AQC", "CPC", "RZC", "AZC"} The name of the MeteoSwiss QPE product.\n Currently supported prducts: +------+----------------------------+ | Name | Product | +======+============================+ | AQC | Acquire | +------+----------------------------+ | CPC | CombiPrecip | +------+----------------------------+ | RZC | Precip | +------+----------------------------+ | AZC | RZC accumulation | +------+----------------------------+ unit: {"mm/h", "mm", "dBZ"} the physical unit of the data accutime: float the accumulation time in minutes of the data {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the precipitation field in mm/h imported from a MeteoSwiss gif file and the associated quality field and metadata. The quality field is currently set to None. """ if not METRANET_IMPORTED: raise MissingOptionalDependency( "metranet package needed for importing MeteoSwiss " "radar composites but it is not installed" ) ret = metranet.read_file(filename, physic_value=True, verbose=False) precip = ret.data geodata = _import_mch_geodata() # read metranet metadata = geodata metadata["institution"] = "MeteoSwiss" metadata["accutime"] = accutime metadata["unit"] = unit metadata["transform"] = None metadata["zerovalue"] = np.nanmin(precip) metadata["threshold"] = _get_threshold_value(precip) metadata["zr_a"] = 316.0 metadata["zr_b"] = 1.5 return precip, None, metadata def _import_mch_geodata(): """ Swiss radar domain CCS4 These are all hard-coded because the georeferencing is missing from the gif files. """ geodata = {} # LV03 Swiss projection definition in Proj4 projdef = "" projdef += "+proj=somerc " projdef += " +lon_0=7.43958333333333" projdef += " +lat_0=46.9524055555556" projdef += " +k_0=1" projdef += " +x_0=600000" projdef += " +y_0=200000" projdef += " +ellps=bessel" projdef += " +towgs84=674.374,15.056,405.346,0,0,0,0" projdef += " +units=m" projdef += " +no_defs" geodata["projection"] = projdef geodata["x1"] = 255000.0 geodata["y1"] = -160000.0 geodata["x2"] = 965000.0 geodata["y2"] = 480000.0 geodata["xpixelsize"] = 1000.0 geodata["ypixelsize"] = 1000.0 geodata["cartesian_unit"] = "m" geodata["yorigin"] = "upper" return geodata @postprocess_import() def import_odim_hdf5(filename, qty="RATE", **kwargs): """ Import a precipitation field (and optionally the quality field) from a HDF5 file conforming to the ODIM specification. **Important:** Currently, only the Pan-European (OPERA) and the Dipartimento della Protezione Civile (DPC) radar composites are correctly supported. Other ODIM-compliant files may not be read correctly. Parameters ---------- filename: str Name of the file to import. qty: {'RATE', 'ACRR', 'DBZH'} The quantity to read from the file. The currently supported identitiers are: 'RATE'=instantaneous rain rate (mm/h), 'ACRR'=hourly rainfall accumulation (mm) and 'DBZH'=max-reflectivity (dBZ). The default value is 'RATE'. {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the OPERA product for the requested quantity and the associated quality field and metadata. The quality field is read from the file if it contains a dataset whose quantity identifier is 'QIND'. """ if not H5PY_IMPORTED: raise MissingOptionalDependency( "h5py package is required to import " "radar reflectivity composites using ODIM HDF5 specification " "but it is not installed" ) if qty not in ["ACRR", "DBZH", "RATE"]: raise ValueError( "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'" ) f = h5py.File(filename, "r") precip = None quality = None for dsg in f.items(): if dsg[0].startswith("dataset"): what_grp_found = False # check if the "what" group is in the "dataset" group if "what" in list(dsg[1].keys()): if "quantity" in dsg[1]["what"].attrs.keys(): try: ( qty_, gain, offset, nodata, undetect, ) = _read_opera_hdf5_what_group(dsg[1]["what"]) what_grp_found = True except KeyError: pass for dg in dsg[1].items(): if dg[0][0:4] == "data": # check if the "what" group is in the "data" group if "what" in list(dg[1].keys()): ( qty_, gain, offset, nodata, undetect, ) = _read_opera_hdf5_what_group(dg[1]["what"]) elif not what_grp_found: raise DataModelError( "Non ODIM compliant file: " "no what group found from {} " "or its subgroups".format(dg[0]) ) if qty_.decode() in [qty, "QIND"]: arr = dg[1]["data"][...] mask_n = arr == nodata mask_u = arr == undetect mask = np.logical_and(~mask_u, ~mask_n) if qty_.decode() == qty: precip = np.empty(arr.shape) precip[mask] = arr[mask] * gain + offset if qty != "DBZH": precip[mask_u] = offset else: precip[mask_u] = -30.0 precip[mask_n] = np.nan elif qty_.decode() == "QIND": quality = np.empty(arr.shape, dtype=float) quality[mask] = arr[mask] quality[~mask] = np.nan if quality is None: for dgg in dg[ 1 ].items(): # da qui ---------------------------- if dgg[0][0:7] == "quality": quality_keys = list(dgg[1].keys()) if "what" in quality_keys: ( qty_, gain, offset, nodata, undetect, ) = _read_opera_hdf5_what_group(dgg[1]["what"]) if qty_.decode() == "QIND": arr = dgg[1]["data"][...] mask_n = arr == nodata mask_u = arr == undetect mask = np.logical_and(~mask_u, ~mask_n) quality = np.empty(arr.shape) # , dtype=float) quality[mask] = arr[mask] * gain + offset quality[~mask] = ( np.nan ) # a qui ----------------------------- if precip is None: raise IOError("requested quantity %s not found" % qty) where = f["where"] if isinstance(where.attrs["projdef"], str): proj4str = where.attrs["projdef"] else: proj4str = where.attrs["projdef"].decode() pr = pyproj.Proj(proj4str) ll_lat = where.attrs["LL_lat"] ll_lon = where.attrs["LL_lon"] ur_lat = where.attrs["UR_lat"] ur_lon = where.attrs["UR_lon"] if ( "LR_lat" in where.attrs.keys() and "LR_lon" in where.attrs.keys() and "UL_lat" in where.attrs.keys() and "UL_lon" in where.attrs.keys() ): lr_lat = float(where.attrs["LR_lat"]) lr_lon = float(where.attrs["LR_lon"]) ul_lat = float(where.attrs["UL_lat"]) ul_lon = float(where.attrs["UL_lon"]) full_cornerpts = True else: full_cornerpts = False ll_x, ll_y = pr(ll_lon, ll_lat) ur_x, ur_y = pr(ur_lon, ur_lat) if full_cornerpts: lr_x, lr_y = pr(lr_lon, lr_lat) ul_x, ul_y = pr(ul_lon, ul_lat) x1 = min(ll_x, ul_x) y1 = min(ll_y, lr_y) x2 = max(lr_x, ur_x) y2 = max(ul_y, ur_y) else: x1 = ll_x y1 = ll_y x2 = ur_x y2 = ur_y dataset1 = f["dataset1"] if "xscale" in where.attrs.keys() and "yscale" in where.attrs.keys(): xpixelsize = where.attrs["xscale"] ypixelsize = where.attrs["yscale"] elif ( "xscale" in dataset1["where"].attrs.keys() and "yscale" in dataset1["where"].attrs.keys() ): where = dataset1["where"] xpixelsize = where.attrs["xscale"] ypixelsize = where.attrs["yscale"] else: xpixelsize = None ypixelsize = None if qty == "ACRR": unit = "mm" transform = None elif qty == "DBZH": unit = "dBZ" transform = "dB" else: unit = "mm/h" transform = None metadata = { "projection": proj4str, "ll_lon": ll_lon, "ll_lat": ll_lat, "ur_lon": ur_lon, "ur_lat": ur_lat, "x1": x1, "y1": y1, "x2": x2, "y2": y2, "xpixelsize": xpixelsize, "ypixelsize": ypixelsize, "cartesian_unit": "m", "yorigin": "upper", "institution": "Odyssey datacentre", "accutime": 15.0, "unit": unit, "transform": transform, "zerovalue": np.nanmin(precip), "threshold": _get_threshold_value(precip), } metadata.update(kwargs) f.close() return precip, quality, metadata def import_opera_hdf5(filename, qty="RATE", **kwargs): """ Wrapper to :py:func:`pysteps.io.importers.import_odim_hdf5` to maintain backward compatibility with previous pysteps versions. **Important:** Use :py:func:`~pysteps.io.importers.import_odim_hdf5` instead. """ return import_odim_hdf5(filename, qty=qty, **kwargs) def _read_opera_hdf5_what_group(whatgrp): qty = whatgrp.attrs["quantity"] if "quantity" in whatgrp.attrs.keys() else b"QIND" gain = whatgrp.attrs["gain"] if "gain" in whatgrp.attrs.keys() else 1.0 offset = whatgrp.attrs["offset"] if "offset" in whatgrp.attrs.keys() else 0.0 nodata = whatgrp.attrs["nodata"] if "nodata" in whatgrp.attrs.keys() else np.nan undetect = whatgrp.attrs["undetect"] if "undetect" in whatgrp.attrs.keys() else 0.0 return qty, gain, offset, nodata, undetect @postprocess_import() def import_saf_crri(filename, extent=None, **kwargs): """ Import a NetCDF radar rainfall product from the Convective Rainfall Rate Intensity (CRRI) product from the Satellite Application Facilities (SAF). Product description available on http://www.nwcsaf.org/crr_description (last visited Jan 26, 2020). Parameters ---------- filename: str Name of the file to import. extent: scalars (left, right, bottom, top), optional The spatial extent specified in data coordinates. If None, the full extent is imported. {extra_kwargs_doc} Returns ------- out: tuple A three-element tuple containing the rainfall field in mm/h, the quality field and the metadata imported from the CRRI SAF netcdf file. The quality field includes values [1, 2, 4, 8, 16, 24, 32] meaning "nodata", "internal_consistency", "temporal_consistency", "good", "questionable", "bad", and "interpolated", respectively. """ if not NETCDF4_IMPORTED: raise MissingOptionalDependency( "netCDF4 package is required to import CRRI SAF products " "but it is not installed" ) geodata = _import_saf_crri_geodata(filename) metadata = geodata if extent: xcoord = ( np.arange( metadata["x1"], metadata["x2"], metadata["xpixelsize"], ) + metadata["xpixelsize"] / 2 ) ycoord = ( np.arange( metadata["y1"], metadata["y2"], metadata["ypixelsize"], ) + metadata["ypixelsize"] / 2 ) ycoord = ycoord[::-1] # yorigin = "upper" idx_x = np.logical_and(xcoord < extent[1], xcoord > extent[0]) idx_y = np.logical_and(ycoord < extent[3], ycoord > extent[2]) # update geodata metadata["x1"] = xcoord[idx_x].min() - metadata["xpixelsize"] / 2 metadata["x2"] = xcoord[idx_x].max() + metadata["xpixelsize"] / 2 metadata["y1"] = ycoord[idx_y].min() - metadata["ypixelsize"] / 2 metadata["y2"] = ycoord[idx_y].max() + metadata["ypixelsize"] / 2 else: idx_x = None idx_y = None precip, quality = _import_saf_crri_data(filename, idx_x, idx_y) metadata["transform"] = None metadata["zerovalue"] = np.nanmin(precip) metadata["threshold"] = _get_threshold_value(precip) return precip, quality, metadata def _import_saf_crri_data(filename, idx_x=None, idx_y=None): ds_rainfall = netCDF4.Dataset(filename) if "crr_intensity" in ds_rainfall.variables.keys(): if idx_x is not None: data = np.asarray(ds_rainfall.variables["crr_intensity"][idx_y, idx_x]) quality = np.asarray(ds_rainfall.variables["crr_quality"][idx_y, idx_x]) else: data = np.asarray(ds_rainfall.variables["crr_intensity"][:]) quality = np.asarray(ds_rainfall.variables["crr_quality"][:]) precipitation = np.where(data == 65535, np.nan, data) else: precipitation = None quality = None ds_rainfall.close() return precipitation, quality def _import_saf_crri_geodata(filename): geodata = {} ds_rainfall = netCDF4.Dataset(filename) # get projection projdef = ds_rainfall.getncattr("gdal_projection") geodata["projection"] = projdef # get x1, y1, x2, y2, xpixelsize, ypixelsize, yorigin geotable = ds_rainfall.getncattr("gdal_geotransform_table") xmin = ds_rainfall.getncattr("gdal_xgeo_up_left") xmax = ds_rainfall.getncattr("gdal_xgeo_low_right") ymin = ds_rainfall.getncattr("gdal_ygeo_low_right") ymax = ds_rainfall.getncattr("gdal_ygeo_up_left") xpixelsize = abs(geotable[1]) ypixelsize = abs(geotable[5]) geodata["x1"] = xmin geodata["y1"] = ymin geodata["x2"] = xmax geodata["y2"] = ymax geodata["xpixelsize"] = xpixelsize geodata["ypixelsize"] = ypixelsize geodata["cartesian_unit"] = "m" geodata["yorigin"] = "upper" # get the accumulation period geodata["accutime"] = None # get the unit of precipitation geodata["unit"] = ds_rainfall.variables["crr_intensity"].units # get institution geodata["institution"] = ds_rainfall.getncattr("institution") ds_rainfall.close() return geodata @postprocess_import() def import_dwd_hdf5(filename, qty="RATE", **kwargs): """ Import a DWD precipitation product field (and optionally the quality field) from an HDF5 file conforming to the ODIM specification. Parameters ---------- filename : str Name of the file to import. qty : {'RATE', 'ACRR', 'DBZH'}, optional Quantity to read from the file. The currently supported identifiers are: - 'RATE': instantaneous rain rate (mm/h) - 'ACRR': hourly rainfall accumulation (mm) - 'DBZH': maximum reflectivity (dBZ) The default is 'RATE'. {extra_kwargs_doc} Returns ------- data : np.ndarray The requested precipitation product imported from the HDF5 file. quality : None Placeholder for quality field (not yet implemented). metadata : dict Dictionary containing geospatial metadata with the following keys: projection : str PROJ.4 string defining the stereographic projection. ll_lon, ll_lat : float Coordinates of the lower-left corner. ur_lon, ur_lat : float Coordinates of the upper-right corner. x1, y1 : float Cartesian coordinates of the lower-left corner. x2, y2 : float Cartesian coordinates of the upper-right corner. xpixelsize, ypixelsize : float Pixel size in meters. cartesian_unit : str Unit of the coordinate system (meters). yorigin : {'lower'} Origin of the y-axis. institution : {'DWD', 'DWD Radolan'} Originating institution. accutime : int Accumulation period of the requested precipitation product. unit : str Unit of the data. transform : str Logarithmic transformation applied. zerovalue : float Value representing no echo. threshold : float Precipitation threshold. """ if not H5PY_IMPORTED: raise MissingOptionalDependency( "h5py package is required to import " "radar reflectivity composites using ODIM HDF5 specification " "but it is not installed" ) if not PYPROJ_IMPORTED: raise MissingOptionalDependency( "pyproj package is required to import " "DWD's radar reflectivity composite " "but it is not installed" ) if qty not in ["ACRR", "DBZH", "RATE"]: raise ValueError( "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'" ) # Open file f = h5py.File(filename, "r") precip = None quality = None # Read data recursively file_content = {} _read_hdf5_cont(f, file_content) f.close() # Read attributes data_prop = {} _get_whatgrp(file_content, data_prop) # Get data as well as no data and no echo masks arr = file_content["dataset1"]["data1"]["data"] mask_n = arr == data_prop["nodata"] mask_u = arr == data_prop["undetect"] mask = np.logical_and(~mask_u, ~mask_n) # If the requested quantity in the file # Transform precipitation data by gain and offset if data_prop["quantity"] == qty: precip = np.empty(arr.shape) precip[mask] = arr[mask] * data_prop["gain"] + data_prop["offset"] if qty != "DBZH": precip[mask_u] = data_prop["offset"] else: # Set the no echo value manually to -32.5 # if the file contains horizontal reflectivity precip[mask_u] = -32.5 precip[mask_n] = np.nan # Get possible information about data quality elif data_prop["quantity"] == "QIND": quality = np.empty(arr.shape, dtype=float) quality[mask] = arr[mask] quality[~mask] = np.nan if precip is None: raise IOError("requested quantity %s not found" % qty) # Get the projection and grid information from the HDF5 file pr = pyproj.Proj(file_content["where"]["projdef"]) ll_x, ll_y = pr( file_content["where"]["LL_lon"], file_content["where"]["LL_lat"], ) ur_x, ur_y = pr( file_content["where"]["UR_lon"], file_content["where"]["UR_lat"], ) # Determine domain corners in geographic and carthesian coordinates if len([k for k in file_content["where"].keys() if "_lat" in k]) == 4: lr_x, lr_y = pr( file_content["where"]["LR_lon"], file_content["where"]["LR_lat"], ) ul_x, ul_y = pr( file_content["where"]["UL_lon"], file_content["where"]["UL_lat"], ) x1 = min(ll_x, ul_x) y1 = min(ll_y, lr_y) x2 = max(lr_x, ur_x) y2 = max(ul_y, ur_y) else: x1 = ll_x y1 = ll_y x2 = ur_x y2 = ur_y # Get the grid cell size if ( "where" in file_content["dataset1"].keys() and "xscale" in file_content["dataset1"]["where"].keys() ): xpixelsize = file_content["dataset1"]["where"]["xscale"] ypixelsize = file_content["dataset1"]["where"]["yscale"] elif "xscale" in file_content["where"].keys(): xpixelsize = file_content["where"]["xscale"] ypixelsize = file_content["where"]["yscale"] else: xpixelsize = None ypixelsize = None # Get the unit and transform if qty == "ACRR": unit = "mm" transform = None elif qty == "DBZH": unit = "dBZ" transform = "dB" else: unit = "mm/h" transform = None # Extract the time step startdate = datetime.datetime.strptime( file_content["dataset1"]["what"]["startdate"] + file_content["dataset1"]["what"]["starttime"], "%Y%m%d%H%M%S", ) enddate = datetime.datetime.strptime( file_content["dataset1"]["what"]["enddate"] + file_content["dataset1"]["what"]["endtime"], "%Y%m%d%H%M%S", ) accutime = (enddate - startdate).total_seconds() / 60.0 # Finally, fill out the metadata metadata = { "projection": file_content["where"]["projdef"], "ll_lon": file_content["where"]["LL_lon"], "ll_lat": file_content["where"]["LL_lat"], "ur_lon": file_content["where"]["UR_lon"], "ur_lat": file_content["where"]["UR_lat"], "x1": x1, "y1": y1, "x2": x2, "y2": y2, "xpixelsize": xpixelsize, "ypixelsize": ypixelsize, "cartesian_unit": "m", "yorigin": "upper", "institution": file_content["what"]["source"], "accutime": accutime, "unit": unit, "transform": transform, "zerovalue": np.nanmin(precip), "threshold": _get_threshold_value(precip), } metadata.update(kwargs) f.close() return precip, quality, metadata def _read_hdf5_cont(f, d): """ Recursively read nested dictionaries from a HDF5 file. Parameters: ----------- f : h5py.Group or h5py.File The current group or file object from which to read data. d : dict The dictionary to populate with the contents of the HDF5 group. Returns: -------- None. """ # Set simple types of hdf content group_type = h5py._hl.group.Group for key, value in f.items(): if isinstance(value, group_type): d[key] = {} if len(list(value.items())) > 0: # Recurse into non-empty group _read_hdf5_cont(value, d[key]) else: # Handle empty group with attributes d[key] = {attr: value.attrs[attr] for attr in value.attrs} d[key] = { k: (v.decode() if isinstance(v, np.bytes_) else v) for k, v in d[key].items() } else: # Save h5py.Dataset by group name d[key] = np.array(value) return def _get_whatgrp(d, g): """ Recursively get attributes of the what group containing the scaling properties. Parameters: ----------- d : dict Dictionary including content of an ODIM compliant HDF5 file. g : dict Dictionary containing attributes of what group. Returns: -------- None. """ if "what" in d.keys(): # Searching for the corresponding what group # that contains the scaling properties if "gain" in d["what"].keys(): g.update(d["what"]) else: k = [k for k in d.keys() if "data" in k][0] _get_whatgrp(d[k], g) else: raise DataModelError( "Non ODIM compliant file: " "no what group found from {} " "or its subgroups".format(d.keys()[0]) ) return @postprocess_import() def import_dwd_radolan(filename, product_name): """ Import a RADOLAN precipitation product from a binary file. Parameters ---------- filename: str Name of the file to import. product_name: {'WX','RX','EX','RY','RW','AY','RS','YW','WN'} The specific product to read from the file. Please see https://www.dwd.de/DE/leistungen/radolan/radolan_info/ radolan_radvor_op_komposit_format_pdf.pdf for a detailed description. {extra_kwargs_doc} Returns ------- tuple A tuple containing: - data : np.ndarray The desired precipitation product in mm/h imported from a RADOLAN file - quality : None - metadata : dict Dictionary containing geospatial metadata such as: - 'projection': PROJ.4 string defining the stereographic projection. - 'xpixelsize', 'ypixelsize': Pixel size in meters. - 'cartesian_unit': Unit of the coordinate system (meters). - 'yorigin': Origin of the y-axis ('upper'). - 'x1', 'y1': Coordinates of the lower-left corner. - 'x2', 'y2': Coordinates of the upper-right corner. """ # Determine file size and header size size_file = os.path.getsize(filename) size_data = np.round(size_file, -3) size_header = size_file - size_data # Open file and read header f = open(filename, "rb") header = f.read(size_header).decode("utf-8") # Get product name from header product = header[:2] # Check if its the desired product assert product == product_name, "Product not in File!" # Distinguish between products saved with 8 or 16bit product_cat1 = np.array(["WX", "RX"]) product_cat2 = np.array(["RY", "RW", "YW"]) # Determine byte size and data type nbyte = 1 if product in product_cat1 else 2 signed = "B" if product in product_cat1 else "H" # Extract the scaling factor and grid dimensions fac = int(header.split("E-")[1].split("INT")[0]) dimsplit = header.split("x") dims = np.array((dimsplit[0][-4:], dimsplit[1][:4]), dtype=int)[::-1] # Read binary data data = array.array(signed) data.fromfile(f, size_data // nbyte) f.close() # Reshape and transpose data to match grid layout data = np.array(np.reshape(data, dims, order="F"), dtype=float).T # Define no-echo values based on product type if product == "SF": no_echo_value = 0.0 elif product in product_cat2: no_echo_value = -0.01 else: no_echo_value = -32.5 # Apply scaling and handle missing data if product in product_cat1: data[data >= 249] = np.nan data = data / 2.0 + no_echo_value elif product in product_cat2: data, no_data_mask = _identify_info_bits(data) if product == "AY": data = (10 ** (-fac)) * data / 2.0 + no_echo_value else: data = (10 ** (-fac)) * data + no_echo_value else: data, no_data_mask = _identify_info_bits(data) data = (10 ** (-fac)) * data / 2.0 + no_echo_value # Mask out no-data values data[no_data_mask] = np.nan # Load geospatial metadata geodata = _import_dwd_geodata(product_name, dims) metadata = geodata return data, None, metadata def _identify_info_bits(data): """ Identifies and processes information bits embedded in RADOLAN data values. This function decodes metadata flags embedded in the RADOLAN data array, such as: - Clutter (bit 16) - Negative values (bit 15) - No data (bit 14) - Secondary data (bit 13) Parameters ---------- data : np.ndarray The raw RADOLAN data array containing encoded information bits. Returns ------- tuple A tuple containing: - data : np.ndarray The cleaned and decoded data array. - no_data_mask : np.ndarray A boolean mask indicating positions of no-data values. """ # Identify and remove clutter (bit 16) clutter_mask = data - 2**15 >= 0.0 data[clutter_mask] = 0 # Identify and convert negative values (bit 15) mask = data - 2**14 >= 0.0 data[mask] -= 2**14 # Identify no-data values (bit 14) no_data_mask = data - 2**13 == 2500.0 if np.sum(no_data_mask) == 0.0: no_data_mask = data - 2**13 == 0.0 data[no_data_mask] = 0.0 # Identify and remove secondary data flag (bit 13) data[data - 2**12 > 0.0] -= 2**12 # Apply negative sign to previously marked negative values data[mask] *= -1 return data, no_data_mask def _import_dwd_geodata(product_name, dims): """ Generate geospatial metadata for RADOLAN precipitation products. Since RADOLAN binary files contain only limited projection metadata, this function provides hard-coded geospatial definitions and calculates the bounding box of the data grid based on the product type and dimensions. Parameters ---------- product : str The RADOLAN product code (e.g., 'RX', 'WX', 'WN', etc.). dims : tuple of int The dimensions of the data grid (rows, columns). Returns ------- geodata : dict A dictionary containing: - 'projection': PROJ.4 string defining the stereographic projection. - 'xpixelsize', 'ypixelsize': Pixel size in meters. - 'cartesian_unit': Unit of the coordinate system (meters). - 'yorigin': Origin of the y-axis ('upper'). - 'x1', 'y1': Coordinates of the lower-left corner. - 'x2', 'y2': Coordinates of the upper-right corner. """ geodata = {} # Define stereographic projection used by RADOLAN projdef = ( "+a=6378137.0 +b=6356752.0 +proj=stere +lat_ts=60.0 " "+lat_0=90.0 +lon_0=10.0 +x_0=0 +y_0=0" ) geodata["projection"] = projdef # Spatial resolution of 1km geodata["xpixelsize"] = 1000.0 geodata["ypixelsize"] = 1000.0 geodata["cartesian_unit"] = "m" geodata["yorigin"] = "upper" # Define product categories product_cat1 = ["RX", "RY", "RW"] product_cat2 = ["WN"] product_cat3 = ["WX", "YW"] # Assign reference coordinates based on product type if product_name in product_cat1: lon, lat = 3.604382995, 46.95361536 elif product_name in product_cat2: lon, lat = 3.566994635, 45.69642538 elif product_name in product_cat3: lon, lat = 9.0, 51.0 # Project reference coordinates to Cartesian system pr = pyproj.Proj(projdef) x1, y1 = pr(lon, lat) # Adjust origin for center-based products if product_name in product_cat3: x1 -= dims[0] * 1000 // 2 y1 -= dims[1] * 1000 // 2 - 80000 # Calculate bounding box x2 = x1 + dims[0] * 1000 y2 = y1 + dims[1] * 1000 geodata["x1"] = x1 geodata["y1"] = y1 geodata["x2"] = x2 geodata["y2"] = y2 return geodata ================================================ FILE: pysteps/io/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.io.interface ==================== Interface for the io module. .. currentmodule:: pysteps.io.interface .. autosummary:: :toctree: ../generated/ get_method """ from importlib.metadata import entry_points from pysteps.decorators import postprocess_import from pysteps.io import importers, exporters, interface from pprint import pprint _importer_methods = dict( bom_rf3=importers.import_bom_rf3, dwd_hdf5=importers.import_dwd_hdf5, dwd_radolan=importers.import_dwd_radolan, fmi_geotiff=importers.import_fmi_geotiff, fmi_pgm=importers.import_fmi_pgm, knmi_hdf5=importers.import_knmi_hdf5, mch_gif=importers.import_mch_gif, mch_hdf5=importers.import_mch_hdf5, mch_metranet=importers.import_mch_metranet, mrms_grib=importers.import_mrms_grib, odim_hdf5=importers.import_odim_hdf5, opera_hdf5=importers.import_opera_hdf5, saf_crri=importers.import_saf_crri, ) _exporter_methods = dict( geotiff=exporters.initialize_forecast_exporter_geotiff, kineros=exporters.initialize_forecast_exporter_kineros, netcdf=exporters.initialize_forecast_exporter_netcdf, ) def discover_importers(): """ Search for installed importers plugins in the entrypoint 'pysteps.plugins.importers' The importers found are added to the `pysteps.io.interface_importer_methods` dictionary containing the available importers. """ # Backward compatibility with previous entry point 'pysteps.plugins.importers' next to 'pysteps.plugins.importer' for entry_point in list(entry_points(group="pysteps.plugins.importer")) + list( entry_points(group="pysteps.plugins.importers") ): _importer = entry_point.load() importer_function_name = _importer.__name__ importer_short_name = importer_function_name.replace("import_", "") _postprocess_kws = getattr(_importer, "postprocess_kws", dict()) _importer = postprocess_import(**_postprocess_kws)(_importer) if importer_short_name not in _importer_methods: _importer_methods[importer_short_name] = _importer else: RuntimeWarning( f"The importer identifier '{importer_short_name}' is already available in" "'pysteps.io.interface._importer_methods'.\n" f"Skipping {entry_point.module}:{entry_point.attr}" ) if hasattr(importers, importer_function_name): RuntimeWarning( f"The importer function '{importer_function_name}' is already an attribute" "of 'pysteps.io.importers`.\n" f"Skipping {entry_point.module}:{entry_point.attr}" ) else: setattr(importers, importer_function_name, _importer) def importers_info(): """Print all the available importers.""" # Importers available in the `io.importers` module available_importers = [ attr for attr in dir(importers) if attr.startswith("import_") ] print("\nImporters available in the pysteps.io.importers module") pprint(available_importers) # Importers declared in the pysteps.io.get_method interface importers_in_the_interface = [ f.__name__ for f in interface._importer_methods.values() ] print("\nImporters available in the pysteps.io.get_method interface") pprint( [ (short_name, f.__name__) for short_name, f in interface._importer_methods.items() ] ) # Let's use sets to find out if there are importers present in the importer module # but not declared in the interface, and viceversa. available_importers = set(available_importers) importers_in_the_interface = set(importers_in_the_interface) difference = available_importers ^ importers_in_the_interface if len(difference) > 0: _diff = available_importers - importers_in_the_interface if len(_diff) > 0: print( "\nIMPORTANT:\nThe following importers are available in pysteps.io.importers module " "but not in the pysteps.io.get_method interface" ) pprint(_diff) _diff = importers_in_the_interface - available_importers if len(_diff) > 0: print( "\nWARNING:\n" "The following importers are available in the pysteps.io.get_method " "interface but not in the pysteps.io.importers module" ) pprint(_diff) return available_importers, importers_in_the_interface def get_method(name, method_type): """ Return a callable function for the method corresponding to the given name. Parameters ---------- name: str Name of the method. The available options are:\n Importers: .. tabularcolumns:: |p{2cm}|L| +--------------+------------------------------------------------------+ | Name | Description | +==============+======================================================+ | bom_rf3 | NefCDF files used in the Boreau of Meterorology | | | archive containing precipitation intensity | | | composites. | +--------------+------------------------------------------------------+ | dwd_hdf5 | HDF5 file format used by DWD. | +--------------+------------------------------------------------------+ | fmi_geotiff | GeoTIFF files used in the Finnish Meteorological | | | Institute (FMI) archive, containing reflectivity | | | composites (dBZ). | +--------------+------------------------------------------------------+ | fmi_pgm | PGM files used in the Finnish Meteorological | | | Institute (FMI) archive, containing reflectivity | | | composites (dBZ). | +--------------+------------------------------------------------------+ | knmi_hdf5 | HDF5 file format used by KNMI. | +--------------+------------------------------------------------------+ | mch_gif | GIF files in the MeteoSwiss (MCH) archive containing | | | precipitation composites. | +--------------+------------------------------------------------------+ | mch_hdf5 | HDF5 file format used by MeteoSiss (MCH). | +--------------+------------------------------------------------------+ | mch_metranet | metranet files in the MeteoSwiss (MCH) archive | | | containing precipitation composites. | +--------------+------------------------------------------------------+ | mrms_grib | Grib2 files used by the NSSL's MRMS product | +--------------+------------------------------------------------------+ | odim_hdf5 | HDF5 file conforming to the ODIM specification. | +--------------+------------------------------------------------------+ | opera_hdf5 | Wrapper to "odim_hdf5" to maintain backward | | | compatibility with previous pysteps versions. | +--------------+------------------------------------------------------+ | saf_crri | NetCDF SAF CRRI files containing convective rain | | | rate intensity and other | +--------------+------------------------------------------------------+ Exporters: .. tabularcolumns:: |p{2cm}|L| +-------------+-------------------------------------------------------+ | Name | Description | +=============+=======================================================+ | geotiff | Export as GeoTIFF files. | +-------------+-------------------------------------------------------+ | kineros | KINEROS2 Rainfall file as specified in | | | https://www.tucson.ars.ag.gov/kineros/. | | | Grid points are treated as individual rain gauges. | | | A separate file is produced for each ensemble member. | +-------------+-------------------------------------------------------+ | netcdf | NetCDF files conforming to the CF 1.7 specification. | +-------------+-------------------------------------------------------+ method_type: {'importer', 'exporter'} Type of the method (see tables above). """ if isinstance(method_type, str): method_type = method_type.lower() else: raise TypeError( "Only strings supported for for the method_type" + " argument\n" + "The available types are: 'importer' and 'exporter'" ) from None if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available importers names:" + str(list(_importer_methods.keys())) + "\nAvailable exporters names:" + str(list(_exporter_methods.keys())) ) from None if method_type == "importer": methods_dict = _importer_methods elif method_type == "exporter": methods_dict = _exporter_methods else: raise ValueError( "Unknown method type {}\n".format(name) + "The available types are: 'importer' and 'exporter'" ) from None try: return methods_dict[name] except KeyError: raise ValueError( "Unknown {} method {}\n".format(method_type, name) + "The available methods are:" + str(list(methods_dict.keys())) ) from None ================================================ FILE: pysteps/io/mch_lut_8bit_Metranet_AZC_V104.txt ================================================ DN Red Gre Blue mm 0 0 0 0 0.00 1 147 163 160 0.04 2 145 161 161 0.07 3 143 159 163 0.11 4 141 157 165 0.15 5 140 156 167 0.19 6 138 154 168 0.23 7 136 152 170 0.27 8 134 150 172 0.32 9 133 149 174 0.37 10 131 147 175 0.41 11 129 145 177 0.46 12 127 143 179 0.52 13 126 142 181 0.57 14 124 140 182 0.62 15 122 138 184 0.68 16 120 136 186 0.74 17 119 135 188 0.80 18 119 135 188 0.87 19 117 132 190 0.93 20 115 130 192 1.00 21 113 127 195 1.07 22 111 125 197 1.14 23 109 122 199 1.22 24 107 120 202 1.30 25 105 117 204 1.38 26 104 115 207 1.46 27 102 113 209 1.55 28 100 110 211 1.64 29 98 108 214 1.73 30 96 105 216 1.83 31 94 103 218 1.93 32 92 100 221 2.03 33 90 98 223 2.14 34 89 96 226 2.25 35 89 96 226 2.36 36 85 95 227 2.48 37 82 95 229 2.61 38 79 95 231 2.73 39 76 94 233 2.86 40 73 94 235 3.00 41 70 94 236 3.14 42 67 93 238 3.29 43 64 93 240 3.44 44 61 93 242 3.59 45 58 92 244 3.76 46 55 92 245 3.92 47 52 92 247 4.10 48 49 91 249 4.28 49 46 91 251 4.46 50 43 91 253 4.66 51 40 91 255 4.86 52 40 91 255 5.06 53 40 95 253 5.28 54 40 100 252 5.50 55 40 104 250 5.73 56 40 109 249 5.96 57 40 114 247 6.21 58 40 118 246 6.46 59 40 123 244 6.73 60 40 128 243 7.00 61 40 132 242 7.28 62 40 137 240 7.57 63 40 141 239 7.88 64 40 146 237 8.19 65 40 151 236 8.51 66 40 155 234 8.85 67 40 160 233 9.20 68 40 165 232 9.56 69 40 165 232 9.93 70 40 166 230 10.31 71 41 168 228 10.71 72 42 169 226 11.13 73 43 171 225 11.55 74 44 173 223 12.00 75 44 174 221 12.45 76 45 176 219 12.93 77 46 178 218 13.42 78 47 179 216 13.93 79 48 181 214 14.45 80 48 182 212 15.00 81 49 184 211 15.56 82 50 186 209 16.15 83 51 187 207 16.75 84 52 189 205 17.38 85 53 191 204 18.03 86 53 191 204 18.70 87 54 192 202 19.39 88 55 193 201 20.11 89 57 195 200 20.86 90 58 196 198 21.63 91 60 198 197 22.43 92 61 199 196 23.25 93 63 201 194 24.11 94 64 202 193 24.99 95 65 203 192 25.91 96 67 205 190 26.86 97 68 206 189 27.84 98 70 208 188 28.86 99 71 209 186 29.91 100 73 211 185 31.00 101 74 212 184 32.13 102 76 214 183 33.30 103 76 214 183 34.51 104 77 215 181 35.76 105 78 216 179 37.05 106 80 217 178 38.40 107 81 219 176 39.79 108 83 220 175 41.22 109 84 221 173 42.71 110 86 222 172 44.25 111 87 224 170 45.85 112 88 225 168 47.50 113 90 226 167 49.21 114 91 227 165 50.98 115 93 229 164 52.82 116 94 230 162 54.72 117 96 231 161 56.68 118 97 232 159 58.71 119 99 234 158 60.82 120 99 234 158 63.00 121 102 234 156 65.26 122 105 235 154 67.59 123 109 235 153 70.01 124 112 236 151 72.52 125 115 237 149 75.11 126 119 237 148 77.79 127 122 238 146 80.57 128 126 239 145 83.45 129 129 239 143 86.43 130 132 240 141 89.51 131 136 240 140 92.70 132 139 241 138 96.01 133 142 242 136 99.43 134 146 242 135 102.97 135 149 243 133 106.63 136 153 244 132 110.43 137 153 244 132 114.36 138 155 244 129 118.43 139 157 245 127 122.64 140 159 246 125 127.00 141 161 246 123 131.51 142 163 247 121 136.19 143 166 248 119 141.02 144 168 248 117 146.03 145 170 249 115 151.22 146 172 250 113 156.59 147 174 250 111 162.14 148 177 251 109 167.90 149 179 252 107 173.85 150 181 252 105 180.02 151 183 253 103 186.40 152 185 254 101 193.01 153 188 255 99 199.85 154 188 255 99 206.94 155 190 255 97 214.27 156 192 255 95 221.86 157 195 255 94 229.72 158 197 255 92 237.86 159 199 255 90 246.28 160 202 255 89 255.00 161 204 255 87 264.03 162 207 255 86 273.37 163 209 255 84 283.05 164 211 255 82 293.07 165 214 255 81 303.44 166 216 255 79 314.17 167 218 255 77 325.29 168 221 255 76 336.79 169 223 255 74 348.71 170 226 255 73 361.04 171 226 255 73 373.81 172 226 253 69 387.02 173 227 251 65 400.71 174 227 249 62 414.87 175 228 247 58 429.54 176 229 245 55 444.72 177 229 243 51 460.44 178 230 241 48 476.71 179 231 240 44 493.56 180 231 238 40 511.00 181 232 236 37 529.06 182 232 234 33 547.75 183 233 232 30 567.10 184 234 230 26 587.13 185 234 228 23 607.87 186 235 226 19 629.35 187 236 225 16 651.58 188 236 225 16 674.59 189 237 223 15 698.41 190 238 221 14 723.08 191 239 219 13 748.61 192 240 217 12 775.05 193 241 215 11 802.41 194 243 214 10 830.75 195 244 212 9 860.08 196 245 210 9 890.44 197 246 208 8 921.88 198 247 206 7 954.43 199 249 205 6 988.12 200 250 203 5 1023.00 201 251 201 4 1059.11 202 252 199 3 1096.50 203 253 197 2 1135.20 204 255 196 2 1175.27 205 255 196 2 1216.75 206 255 192 5 1259.69 207 255 189 8 1304.15 208 255 186 11 1350.18 209 255 183 14 1397.83 210 255 180 17 1447.16 211 255 176 20 1498.22 212 255 173 23 1551.09 213 255 170 26 1605.83 214 255 167 29 1662.49 215 255 164 32 1721.16 216 255 160 35 1781.89 217 255 157 38 1844.76 218 255 154 41 1909.85 219 255 151 44 1977.24 220 255 148 47 2047.00 221 255 145 50 2119.22 222 255 145 50 2193.99 223 254 140 49 2271.40 224 253 136 48 2351.54 225 252 132 47 2434.50 226 252 127 47 2520.39 227 251 123 46 2609.30 228 250 119 45 2701.35 229 250 114 44 2796.65 230 249 110 44 2895.31 231 248 106 43 2997.45 232 248 101 42 3103.19 233 247 97 41 3212.66 234 246 93 41 3325.99 235 246 88 40 3443.31 236 245 84 39 3564.78 237 244 80 38 3690.52 238 244 76 38 3820.71 239 244 76 38 3955.48 240 244 73 39 4095.00 241 245 70 40 4239.45 242 246 67 41 4388.99 243 246 65 42 4543.80 244 247 62 43 4704.07 245 248 59 44 4870.00 246 248 57 45 5041.77 247 249 54 47 5219.60 248 250 51 48 5403.71 249 250 49 49 5594.30 250 251 46 50 5791.61 251 255 0 0 9999.9 252 0 255 0 9999.9 253 0 0 255 9999.9 254 255 255 255 9999.9 255 255 33 56 9999.9 ================================================ FILE: pysteps/io/mch_lut_8bit_Metranet_v103.txt ================================================ Index R G B mm/h 0 255 255 255 -10.0 1 235 235 235 0.0001 2 145 161 161 0.10 3 143 159 163 0.15 4 141 157 165 0.20 5 140 156 167 0.25 6 138 154 168 0.30 7 136 152 170 0.35 8 134 150 172 0.40 9 133 149 174 0.45 10 131 147 175 0.50 11 129 145 177 0.55 12 127 143 179 0.60 13 126 142 181 0.65 14 124 140 182 0.70 15 122 138 184 0.75 16 120 136 186 0.80 17 119 135 187 0.85 18 119 134 188 0.90 19 117 132 190 0.95 20 115 130 192 1.00 21 113 127 195 1.1 22 111 125 197 1.25 23 109 122 199 1.35 24 107 120 202 1.45 25 105 117 204 1.55 26 104 115 207 1.65 27 102 113 209 1.75 28 100 110 211 1.85 29 98 108 214 1.95 30 96 105 216 2.00 31 94 103 218 3.05 32 92 100 221 4.05 33 90 98 223 5.05 34 89 96 225 6.05 35 89 96 226 7.05 36 85 95 227 8.05 37 82 95 229 9.05 38 79 95 231 10.05 39 76 94 233 11.05 40 73 94 235 12.05 41 70 94 236 13.05 42 67 93 238 14.05 43 64 93 240 15.05 44 61 93 242 16.05 45 58 92 244 17.05 46 55 92 245 18.05 47 52 92 247 19.05 48 49 91 249 20.05 49 46 91 251 21.05 50 43 91 253 22.05 51 40 91 254 23.05 52 40 94 255 24.05 53 40 95 253 25.05 54 40 100 252 26.05 55 40 104 250 27.05 56 40 109 249 28.05 57 40 114 247 29.05 58 40 118 246 30.05 59 40 123 244 31.05 60 40 128 243 32.05 61 40 132 242 33.05 62 40 137 240 34.05 63 40 141 239 35.05 64 40 146 237 36.05 65 40 151 236 37.05 66 40 155 234 38.05 67 40 160 233 39.05 68 40 163 232 40.05 69 40 165 231 41.05 70 40 166 230 42.05 71 41 168 228 43.05 72 42 169 226 44.05 73 43 171 225 45.05 74 44 173 223 46.05 75 44 174 221 47.05 76 45 176 219 48.05 77 46 178 218 49.05 78 47 179 216 50.05 79 48 181 214 51.05 80 48 182 212 52.05 81 49 184 211 53.05 82 50 186 209 54.05 83 51 187 207 55.05 84 52 189 205 56.05 85 53 190 204 57.05 86 53 191 203 58.05 87 54 192 202 59.05 88 55 193 201 60.05 89 57 195 200 61.05 90 58 196 198 62.05 91 60 198 197 63.05 92 61 199 196 64.05 93 63 201 194 65.05 94 64 202 193 66.05 95 65 203 192 67.05 96 67 205 190 68.05 97 68 206 189 69.05 98 70 208 188 70.05 99 71 209 186 71.05 100 73 211 185 72.05 101 74 212 184 73.05 102 76 213 183 74.05 103 76 214 182 75.05 104 77 215 181 76.05 105 78 216 179 77.05 106 80 217 178 78.05 107 81 219 176 79.05 108 83 220 175 80.05 109 84 221 173 81.05 110 86 222 172 82.05 111 87 224 170 83.05 112 88 225 168 84.05 113 90 226 167 85.05 114 91 227 165 86.05 115 93 229 164 87.05 116 94 230 162 88.05 117 96 231 161 89.05 118 97 232 159 90.05 119 99 233 158 91.05 120 99 234 157 92.05 121 102 234 156 93.05 122 105 235 154 94.05 123 109 235 153 95.05 124 112 236 151 96.05 125 115 237 149 97.05 126 119 237 148 98.05 127 122 238 146 99.05 128 126 239 145 100.05 129 129 239 143 101.05 130 132 240 141 102.05 131 136 240 140 103.05 132 139 241 138 104.05 133 142 242 136 105.05 134 146 242 135 106.05 135 149 243 133 107.05 136 151 244 132 108.05 137 153 244 131 109.05 138 155 244 129 110.05 139 157 245 127 111.05 140 159 246 125 112.05 141 161 246 123 113.05 142 163 247 121 114.05 143 166 248 119 115.05 144 168 248 117 116.05 145 170 249 115 117.05 146 172 250 113 118.05 147 174 250 111 119.05 148 177 251 109 120.05 149 179 252 107 121.05 150 181 252 105 122.05 151 183 253 103 123.05 152 185 254 101 124.05 153 186 255 99 125.05 154 188 255 98 126.05 155 190 255 97 127.05 156 192 255 95 128.05 157 195 255 94 129.05 158 197 255 92 130.05 159 199 255 90 131.05 160 202 255 89 132.05 161 204 255 87 133.05 162 207 255 86 134.05 163 209 255 84 135.05 164 211 255 82 136.05 165 214 255 81 137.05 166 216 255 79 138.05 167 218 255 77 139.05 168 221 255 76 141.05 169 223 255 74 142.05 170 224 255 73 143.05 171 226 255 71 144.05 172 226 253 69 145.05 173 227 251 65 146.05 174 227 249 62 147.05 175 228 247 58 148.05 176 229 245 55 149.05 177 229 243 51 150.05 178 230 241 48 151.05 179 231 240 44 152.05 180 231 238 40 153.05 181 232 236 37 154.05 182 232 234 33 155.05 183 233 232 30 156.05 184 234 230 26 157.05 185 234 228 23 158.05 186 235 226 19 159.05 187 236 225 17 160.05 188 236 224 16 161.05 189 237 223 15 162.05 190 238 221 14 163.05 191 239 219 13 164.05 192 240 217 12 165.05 193 241 215 11 166.05 194 243 214 10 167.05 195 244 212 9 168.05 196 245 210 9 169.05 197 246 208 8 170.05 198 247 206 7 171.05 199 249 205 6 172.05 200 250 203 5 173.05 201 251 201 4 174.05 202 252 199 3 175.05 203 253 197 2 176.05 204 254 196 2 177.05 205 255 196 3 178.05 206 255 192 5 179.05 207 255 189 8 180.05 208 255 186 11 181.05 209 255 183 14 182.05 210 255 180 17 183.05 211 255 176 20 184.05 212 255 173 23 185.05 213 255 170 26 186.05 214 255 167 29 187.05 215 255 164 32 188.05 216 255 160 35 189.05 217 255 157 38 190.05 218 255 154 41 191.05 219 255 151 44 192.05 220 255 148 47 193.05 221 255 147 48 194.05 222 255 145 50 195.05 223 254 140 49 196.05 224 253 136 48 197.05 225 252 132 47 198.05 226 252 127 47 199.05 227 251 123 46 200.05 228 250 119 45 201.05 229 250 114 44 202.05 230 249 110 44 203.05 231 248 106 43 204.05 232 248 101 42 205.05 233 247 97 41 206.05 234 246 93 41 207.05 235 246 88 40 208.05 236 245 84 39 209.05 237 244 80 38 210.05 238 244 78 38 220.05 239 244 76 38 230.05 240 244 73 39 240.05 241 245 70 40 250.05 242 246 67 41 260.05 243 246 65 42 270.05 244 247 62 43 280.05 245 248 59 44 290.05 246 248 57 45 300.05 247 249 54 47 310.05 248 250 51 48 320.05 249 250 49 49 330.05 250 251 46 50 340.05 251 255 0 0 9999.9 252 0 255 0 9999.9 253 0 0 255 9999.9 254 255 255 255 9999.9 255 0 0 55 9999.9 ================================================ FILE: pysteps/io/nowcast_importers.py ================================================ # -*- coding: utf-8 -*- """ pysteps.io.nowcast_importers ============================ Methods for importing nowcast files. The methods in this module implement the following interface:: import_xxx(filename, optional arguments) where xxx is the name (or abbreviation) of the file format and filename is the name of the input file. The output of each method is a two-element tuple containing the nowcast array and a metadata dictionary. The metadata dictionary contains the following mandatory key-value pairs: .. tabularcolumns:: |p{2cm}|L| +------------------+----------------------------------------------------------+ | Key | Value | +==================+==========================================================+ | projection | PROJ.4-compatible projection definition | +------------------+----------------------------------------------------------+ | x1 | x-coordinate of the lower-left corner of the data raster | +------------------+----------------------------------------------------------+ | y1 | y-coordinate of the lower-left corner of the data raster | +------------------+----------------------------------------------------------+ | x2 | x-coordinate of the upper-right corner of the data raster| +------------------+----------------------------------------------------------+ | y2 | y-coordinate of the upper-right corner of the data raster| +------------------+----------------------------------------------------------+ | xpixelsize | grid resolution in x-direction | +------------------+----------------------------------------------------------+ | ypixelsize | grid resolution in y-direction | +------------------+----------------------------------------------------------+ | yorigin | a string specifying the location of the first element in | | | the data raster w.r.t. y-axis: | | | 'upper' = upper border | | | 'lower' = lower border | +------------------+----------------------------------------------------------+ | institution | name of the institution who provides the data | +------------------+----------------------------------------------------------+ | timestep | time step of the input data (minutes) | +------------------+----------------------------------------------------------+ | unit | the physical unit of the data: 'mm/h', 'mm' or 'dBZ' | +------------------+----------------------------------------------------------+ | transform | the transformation of the data: None, 'dB', 'Box-Cox' or | | | others | +------------------+----------------------------------------------------------+ | accutime | the accumulation time in minutes of the data, float | +------------------+----------------------------------------------------------+ | threshold | the rain/no rain threshold with the same unit, | | | transformation and accutime of the data. | +------------------+----------------------------------------------------------+ | zerovalue | it is the value assigned to the no rain pixels with the | | | same unit, transformation and accutime of the data. | +------------------+----------------------------------------------------------+ Available Nowcast Importers --------------------------- .. autosummary:: :toctree: ../generated/ import_netcdf_pysteps """ import numpy as np from pysteps.decorators import postprocess_import from pysteps.exceptions import MissingOptionalDependency, DataModelError try: import netCDF4 NETCDF4_IMPORTED = True except ImportError: NETCDF4_IMPORTED = False @postprocess_import(dtype="single") def import_netcdf_pysteps(filename, onerror="warn", **kwargs): """ Read a nowcast or an ensemble of nowcasts from a NetCDF file conforming to the CF 1.7 specification. If an error occurs during the import, the corresponding error message is shown, and ( None, None ) is returned. Parameters ---------- filename: str Name of the file to import. onerror: str Define the behavior if an exception is raised during the import. - "warn": Print an error message and return (None, None) - "raise": Raise an exception {extra_kwargs_doc} Returns ------- precipitation: 2D array, float32 Precipitation field in mm/h. The dimensions are [latitude, longitude]. The first grid point (0,0) corresponds to the upper left corner of the domain, while (last i, last j) denote the lower right corner. metadata: dict Associated metadata (pixel sizes, map projections, etc.). """ if not NETCDF4_IMPORTED: raise MissingOptionalDependency( "netCDF4 package is required to import pysteps netcdf " "nowcasts but it is not installed" ) onerror = onerror.lower() if onerror not in ["warn", "raise"]: raise ValueError("'onerror' keyword must be 'warn' or 'raise'.") try: ds = netCDF4.Dataset(filename, "r") var_names = list(ds.variables.keys()) if "precip_intensity" in var_names: precip = ds.variables["precip_intensity"] unit = "mm/h" accutime = None transform = None elif "precip_accum" in var_names: precip = ds.variables["precip_accum"] unit = "mm" accutime = None transform = None elif "hourly_precip_accum" in var_names: precip = ds.variables["hourly_precip_accum"] unit = "mm" accutime = 60.0 transform = None elif "reflectivity" in var_names: precip = ds.variables["reflectivity"] unit = "dBZ" accutime = None transform = "dB" else: raise DataModelError( "Non CF compilant file: " "the netCDF file does not contain any " "supported variable name.\n" "Supported names: 'precip_intensity', 'hourly_precip_accum', " "or 'reflectivity'\n" "file: " + filename ) precip = precip[...].squeeze().astype(float) if isinstance(precip, np.ma.MaskedArray): invalid_mask = np.ma.getmaskarray(precip) precip = precip.data precip[invalid_mask] = np.nan metadata = {} time_var = ds.variables["time"] leadtimes = time_var[:] / 60.0 # minutes leadtime metadata["leadtimes"] = leadtimes timestamps = netCDF4.num2date(time_var[:], time_var.units) metadata["timestamps"] = timestamps if "polar_stereographic" in var_names: vn = "polar_stereographic" attr_dict = {} for attr_name in ds.variables[vn].ncattrs(): attr_dict[attr_name] = ds[vn].getncattr(attr_name) proj_str = _convert_grid_mapping_to_proj4(attr_dict) metadata["projection"] = proj_str # geodata metadata["xpixelsize"] = abs(ds.variables["x"][1] - ds.variables["x"][0]) metadata["ypixelsize"] = abs(ds.variables["y"][1] - ds.variables["y"][0]) xmin = np.min(ds.variables["x"]) - 0.5 * metadata["xpixelsize"] xmax = np.max(ds.variables["x"]) + 0.5 * metadata["xpixelsize"] ymin = np.min(ds.variables["y"]) - 0.5 * metadata["ypixelsize"] ymax = np.max(ds.variables["y"]) + 0.5 * metadata["ypixelsize"] # TODO: this is only a quick solution metadata["x1"] = xmin metadata["y1"] = ymin metadata["x2"] = xmax metadata["y2"] = ymax metadata["yorigin"] = "upper" # TODO: check this # TODO: Read the metadata to the dictionary. if (accutime is None) and (leadtimes.size > 1): accutime = leadtimes[1] - leadtimes[0] metadata["accutime"] = accutime metadata["unit"] = unit metadata["transform"] = transform metadata["zerovalue"] = np.nanmin(precip) if metadata["zerovalue"] == np.nanmax(precip): metadata["threshold"] = metadata["zerovalue"] else: metadata["threshold"] = np.nanmin(precip[precip > metadata["zerovalue"]]) ds.close() return precip, metadata except Exception as er: if onerror == "warn": print("There was an error processing the file", er) return None, None else: raise er def _convert_grid_mapping_to_proj4(grid_mapping): gm_keys = list(grid_mapping.keys()) # TODO: implement more projection types here if grid_mapping["grid_mapping_name"] == "polar_stereographic": proj_str = "+proj=stere" proj_str += " +lon_0=%s" % grid_mapping["straight_vertical_longitude_from_pole"] proj_str += " +lat_0=%s" % grid_mapping["latitude_of_projection_origin"] if "standard_parallel" in gm_keys: proj_str += " +lat_ts=%s" % grid_mapping["standard_parallel"] if "scale_factor_at_projection_origin" in gm_keys: proj_str += " +k_0=%s" % grid_mapping["scale_factor_at_projection_origin"] proj_str += " +x_0=%s" % grid_mapping["false_easting"] proj_str += " +y_0=%s" % grid_mapping["false_northing"] return proj_str else: return None ================================================ FILE: pysteps/io/readers.py ================================================ # -*- coding: utf-8 -*- """ pysteps.io.readers ================== Module with the reader functions. .. autosummary:: :toctree: ../generated/ read_timeseries """ import numpy as np def read_timeseries(inputfns, importer, **kwargs): """ Read a time series of input files using the methods implemented in the :py:mod:`pysteps.io.importers` module and stack them into a 3d array of shape (num_timesteps, height, width). Parameters ---------- inputfns: tuple Input files returned by a function implemented in the :py:mod:`pysteps.io.archive` module. importer: function A function implemented in the :py:mod:`pysteps.io.importers` module. kwargs: dict Optional keyword arguments for the importer. Returns ------- out: tuple A three-element tuple containing the read data and quality rasters and associated metadata. If an input file name is None, the corresponding precipitation and quality fields are filled with nan values. If all input file names are None or if the length of the file name list is zero, a three-element tuple containing None values is returned. """ # check for missing data precip_ref = None if all(ifn is None for ifn in inputfns): return None, None, None else: if len(inputfns[0]) == 0: return None, None, None for ifn in inputfns[0]: if ifn is not None: precip_ref, quality_ref, metadata = importer(ifn, **kwargs) break if precip_ref is None: return None, None, None precip = [] quality = [] timestamps = [] for i, ifn in enumerate(inputfns[0]): if ifn is not None: precip_, quality_, _ = importer(ifn, **kwargs) precip.append(precip_) quality.append(quality_) timestamps.append(inputfns[1][i]) else: precip.append(precip_ref * np.nan) if quality_ref is not None: quality.append(quality_ref * np.nan) else: quality.append(None) timestamps.append(inputfns[1][i]) # Replace this with stack? precip = np.concatenate([precip_[None, :, :] for precip_ in precip]) # TODO: Q should be organized as R, but this is not trivial as Q_ can be also None or a scalar metadata["timestamps"] = np.array(timestamps) return precip, quality, metadata ================================================ FILE: pysteps/motion/__init__.py ================================================ # -*- coding: utf-8 -*- """ Implementations of optical flow methods.""" from .interface import get_method ================================================ FILE: pysteps/motion/_proesmans.pyx ================================================ # -*- coding: utf-8 -*- """ Cython module for the Proesmans optical flow algorithm """ #from cython.parallel import parallel, prange import numpy as np from scipy.ndimage import convolve cimport cython cimport numpy as np ctypedef np.float64_t float64 ctypedef np.intp_t intp from libc.math cimport floor, sqrt cdef float64 _INTENSITY_SCALE = 1.0 / 255.0 def _compute_advection_field(float64 [:, :, :] R, lam, intp num_iter, intp n_levels): R_p = [_construct_image_pyramid(R[0, :, :], n_levels), _construct_image_pyramid(R[1, :, :], n_levels)] cdef intp m = R_p[0][-1].shape[0] cdef intp n = R_p[0][-1].shape[1] cdef np.ndarray[float64, ndim=4] V_cur = np.zeros((2, 2, m, n)) cdef np.ndarray[float64, ndim=4] V_next cdef np.ndarray[float64, ndim=3] GAMMA = np.empty((2, R.shape[1], R.shape[2])) for i in range(n_levels-1, -1, -1): _proesmans(np.stack([R_p[0][i], R_p[1][i]]), V_cur, num_iter, lam) m = R_p[0][i-1].shape[0] n = R_p[0][i-1].shape[1] V_next = np.zeros((2, 2, m, n)) if i > 0: _initialize_next_level(V_cur, V_next) V_cur = V_next _compute_consistency_maps(V_cur, GAMMA) return V_cur, GAMMA @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) cdef _compute_next_pyramid_level(float64 [:, :] I_src, float64 [:, :] I_dest): cdef intp dh = I_dest.shape[0] cdef intp dw = I_dest.shape[1] cdef intp x, y for y in range(dh): for x in range(dw): I_dest[y, x] = (I_src[2*y, 2*x] + I_src[2*y, 2*x+1] + \ I_src[2*y+1, 2*x] + I_src[2*y+1, 2*x+1]) / 4.0 cdef _construct_image_pyramid(float64 [:, :] R, intp n_levels): cdef intp m = R.shape[0] cdef intp n = R.shape[1] cdef np.ndarray[float64, ndim=2] R_next R_out = [R] cdef float64 [:, :] R_cur = R for i in range(1, n_levels): R_next = np.zeros((int(m/2), int(n/2))) _compute_next_pyramid_level(R_cur, R_next) R_cur = R_next R_out.append(R_cur) m = int(m / 2) n = int(n / 2) return R_out @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) cdef _proesmans(float64 [:, :, :] R, float64 [:, :, :, :] V, intp num_iter, float64 lam): cdef intp x, y cdef intp i, j cdef float64 xd, yd cdef float64 It cdef float64 ic cdef float64 gx, gy cdef intp m = R.shape[1] cdef intp n = R.shape[2] cdef np.ndarray[float64, ndim=4] G = np.zeros((2, 2, R.shape[1], R.shape[2])) G[0, :, :, :] = _compute_gradients(R[0, :, :]) G[1, :, :, :] = _compute_gradients(R[1, :, :]) cdef np.ndarray[float64, ndim=3] GAMMA = np.zeros((2, R.shape[1], R.shape[2])) cdef float64 v_avg_1, v_avg_2 cdef float64 v_next_1, v_next_2 cdef float64 [:, :] R_j_1 cdef float64 [:, :] R_j_2 cdef float64 [:, :] G_j_1 cdef float64 [:, :] G_j_2 cdef float64 [:, :, :] V_j cdef float64 [:, :] GAMMA_j for i in range(num_iter): _compute_consistency_maps(V, GAMMA) for j in range(2): R_j_1 = R[j, :, :] R_j_2 = R[1-j, :, :] G_j_1 = G[j, 0, :, :] G_j_2 = G[j, 1, :, :] V_j = V[j, :, :, :] GAMMA_j = GAMMA[j, :, :] for y in range(1, m-1): #for y in prange(1, m - 1, schedule='static', nogil=True): for x in range(1, n-1): v_avg_1 = _compute_laplacian(GAMMA_j, V_j, x, y, 0) v_avg_2 = _compute_laplacian(GAMMA_j, V_j, x, y, 1) xd = x + v_avg_1 yd = y + v_avg_2 if xd >= 0 and xd < n - 1 and yd >= 0 and yd < m - 1: It = (_linear_interpolate(R_j_2, xd, yd) - \ R_j_1[y, x]) * _INTENSITY_SCALE gx = G_j_1[y, x] gy = G_j_2[y, x] ic = lam * It / (1.0 + lam * (gx * gx + gy * gy)) v_next_1 = v_avg_1 - gx * ic v_next_2 = v_avg_2 - gy * ic else: # use consistency-weighted average as the next value # if (xd,yd) is outside the image v_next_1 = v_avg_1 v_next_2 = v_avg_2 V_j[0, y, x] = v_next_1 V_j[1, y, x] = v_next_2 _fill_edges(V[j, :, :, :]) @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) cdef float64 _compute_laplacian(float64 [:, :] gi, float64 [:, :, :] Vi, intp x, intp y, intp j): #nogil: cdef float64 v cdef float64 sumWeights = (gi[y-1, x] + gi[y, x-1] + \ gi[y, x+1] + gi[y+1, x]) / 6.0 + \ (gi[y-1, x-1] + gi[y-1, x+1] + \ gi[y+1, x-1] + gi[y+1, x+1]) / 12.0 if sumWeights > 1e-8: v = (gi[y-1, x] * Vi[j, y-1, x] + gi[y, x-1] * Vi[j, y, x-1] + \ gi[y, x+1] * Vi[j, y, x+1] + gi[y+1, x] * Vi[j, y+1, x]) / 6.0 + \ (gi[y-1, x-1] * Vi[j, y-1, x-1] + gi[y-1, x+1] * Vi[j, y-1, x+1] + \ gi[y+1, x-1] * Vi[j, y+1, x-1] + gi[y+1, x+1] * Vi[j, y+1, x+1]) / 12.0 return v / sumWeights else: return 0.0 @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) cdef void _compute_consistency_maps(float64 [:, :, :, :] V, float64 [:, :, :] GAMMA): cdef intp x, y cdef intp i cdef intp m, n cdef float64 xd, yd cdef float64 ub, vb cdef float64 uDiff, vDiff cdef float64 c cdef float64 c_sum cdef intp c_count cdef float64 K cdef float64 g cdef float64 [:, :] V11, V12, V21, V22 m = V.shape[2] n = V.shape[3] for i in range(2): c_sum = 0.0 c_count = 0 V11 = V[i, 0, :, :] V12 = V[i, 1, :, :] V21 = V[1-i, 0, :, :] V22 = V[1-i, 1, :, :] #for y in prange(m, schedule='guided', nogil=True): for y in range(m): for x in range(n): xd = x + V[i, 0, y, x] yd = y + V[i, 1, y, x] if xd >= 0 and yd >= 0 and xd < n and yd < m: ub = _linear_interpolate(V21, xd, yd) vb = _linear_interpolate(V22, xd, yd) uDiff = V11[y, x] + ub vDiff = V12[y, x] + vb c = sqrt(uDiff * uDiff + vDiff * vDiff) GAMMA[i, y, x] = c c_sum += c c_count += 1 else: GAMMA[i, y, x] = -1.0 if c_count > 0: K = 0.9 * c_sum / c_count else: K = 0.0 #for y in prange(m, schedule='guided', nogil=True): for y in range(m): for x in range(n): if K > 1e-8: if GAMMA[i, y, x] >= 0.0: g = GAMMA[i, y, x] GAMMA[i, y, x] = 1.0 / (1.0 + (g / K) * (g / K)) else: GAMMA[i, y, x] = 1.0 else: GAMMA[i, y, x] = 1.0 cdef np.ndarray[float64, ndim=3] _compute_gradients(float64 [:, :] I): # use 3x3 Sobel kernels for computing partial derivatives cdef np.ndarray[float64, ndim=2] Kx = np.zeros((3, 3)) cdef np.ndarray[float64, ndim=2] Ky = np.zeros((3, 3)) Kx[0, 0] = 1.0 / 8.0 * _INTENSITY_SCALE Kx[0, 1] = 0.0 Kx[0, 2] = -1.0 / 8.0 * _INTENSITY_SCALE Kx[1, 0] = 2.0 / 8.0 * _INTENSITY_SCALE Kx[1, 1] = 0.0 Kx[1, 2] = -2.0 / 8.0 * _INTENSITY_SCALE Kx[2, 0] = 1.0 / 8.0 * _INTENSITY_SCALE Kx[2, 1] = 0.0 Kx[2, 2] = -1.0 / 8.0 * _INTENSITY_SCALE Ky[0, 0] = 1.0 / 8.0 * _INTENSITY_SCALE Ky[0, 1] = 2.0 / 8.0 * _INTENSITY_SCALE Ky[0, 2] = 1.0 / 8.0 * _INTENSITY_SCALE Ky[1, 0] = 0.0 Ky[1, 1] = 0.0 Ky[1, 2] = 0.0 Ky[2, 0] = -1.0 / 8.0 * _INTENSITY_SCALE Ky[2, 1] = -2.0 / 8.0 * _INTENSITY_SCALE Ky[2, 2] = -1.0 / 8.0 * _INTENSITY_SCALE cdef np.ndarray[float64, ndim=3] G = np.zeros((2, I.shape[0], I.shape[1])) G[0, :, :] = convolve(I, Kx, mode="constant", cval=0.0) G[1, :, :] = convolve(I, Ky, mode="constant", cval=0.0) return G @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) cdef void _fill_edges(float64 [:, :, :] V): #nogil: cdef intp x, y cdef intp i cdef intp m = V.shape[1] cdef intp n = V.shape[2] for i in range(2): # top and bottom edges for x in range(1, n-1): V[i, 0, x] = V[i, 1, x] V[i, m - 1, x] = V[i, m - 2, x] # left and right edges for y in range(1, m-1): V[i, y, 0] = V[i, y, 1] V[i, y, n-1] = V[i, y, n-2] # corners V[i, 0, 0] = V[i, 1, 1] V[i, 0, n - 1] = V[i, 1, n - 2] V[i, m - 1, 0] = V[i, m - 2, 1] V[i, m - 1, n - 1] = V[i, m - 2, n - 2] @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) cdef _initialize_next_level(float64 [:, :, :, :] V_prev, float64 [:, :, :, :] V_next): cdef intp m_prev = V_prev.shape[2] cdef intp n_prev = V_prev.shape[3] cdef intp m_next = V_next.shape[2] cdef intp n_next = V_next.shape[3] cdef float64 vxf, vyf cdef float64 vxb, vyb cdef float64 xc, yc cdef intp xci, yci cdef intp xn, yn cdef float64 [:, :] V_prev_1 = V_prev[0, 0, :, :] cdef float64 [:, :] V_prev_2 = V_prev[0, 1, :, :] cdef float64 [:, :] V_prev_3 = V_prev[1, 0, :, :] cdef float64 [:, :] V_prev_4 = V_prev[1, 1, :, :] for yn in range(m_next): yc = yn / 2.0 yci = yn / 2 for xn in range(n_next): xc = xn / 2.0 xci = xn / 2 if xn % 2 != 0 or yn % 2 != 0: vxf = _linear_interpolate(V_prev_1, xc, yc) vyf = _linear_interpolate(V_prev_2, xc, yc) vxb = _linear_interpolate(V_prev_3, xc, yc) vyb = _linear_interpolate(V_prev_4, xc, yc) else: if xci > n_prev - 1: xci = n_prev - 1 if yci > m_prev - 1: yci = m_prev - 1 vxf = V_prev[0, 0, yci, xci] vyf = V_prev[0, 1, yci, xci] vxb = V_prev[1, 0, yci, xci] vyb = V_prev[1, 1, yci, xci] V_next[0, 0, yn, xn] = 2.0 * vxf V_next[0, 1, yn, xn] = 2.0 * vyf V_next[1, 0, yn, xn] = 2.0 * vxb V_next[1, 1, yn, xn] = 2.0 * vyb @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) cdef float64 _linear_interpolate(float64 [:, :] I, float64 x, float64 y): #nogil: cdef intp x0 = int(x) cdef intp x1 = x0 + 1 cdef intp y0 = int(y) cdef intp y1 = y0 + 1 if x0 < 0: x0 = 0 if x0 > I.shape[1] - 1: x0 = I.shape[1]-1 if x1 < 0: x1 = 0 if x1 > I.shape[1] - 1: x1 = I.shape[1]-1 if y0 < 0: y0 = 0 if y0 > I.shape[0] - 1: y0 = I.shape[0]-1 if y1 < 0: y1 = 0 if y1 > I.shape[0] - 1: y1 = I.shape[0]-1 cdef float64 I_a = I[y0, x0] cdef float64 I_b = I[y1, x0] cdef float64 I_c = I[y0, x1] cdef float64 I_d = I[y1, x1] cdef float64 w_a = (x1-x) * (y1-y) cdef float64 w_b = (x1-x) * (y-y0) cdef float64 w_c = (x-x0) * (y1-y) cdef float64 w_d = (x-x0) * (y-y0) return w_a*I_a + w_b*I_b + w_c*I_c + w_d*I_d ================================================ FILE: pysteps/motion/_vet.pyx ================================================ # -*- coding: utf-8 -*- """ Cython module for morphing and cost functions implementations used in in the Variation Echo Tracking Algorithm """ from cython.parallel import prange, parallel import numpy as np cimport numpy as np cimport cython ctypedef np.float64_t float64 ctypedef np.int8_t int8 ctypedef np.intp_t intp from libc.math cimport floor, round cdef inline float64 float_abs(float64 a) nogil: return a if a > 0. else -a """ Return the absolute value of a float """ cdef inline intp int_min(intp a, intp b) nogil: return a if a < b else b cdef inline intp int_max(intp a, intp b) nogil: return a if a > b else b @cython.cdivision(True) cdef inline float64 _linear_interpolation(float64 x, float64 x1, float64 x2, float64 y1, float64 y2) nogil: """ Linear interpolation at x. y(x) = y1 + (x-x1) * (y2-y1) / (x2-x1) """ if float_abs(x1 - x2) < 1e-6: return y1 return y1 + (x - x1) * (y2 - y1) / (x2 - x1) @cython.cdivision(True) cdef inline float64 _bilinear_interpolation(float64 x, float64 y, float64 x1, float64 x2, float64 y1, float64 y2, float64 q11, float64 q12, float64 q21, float64 q22) nogil: """https://en.wikipedia.org/wiki/Bilinear_interpolation""" cdef float64 f_x_y1, f_x_y2 f_x_y1 = _linear_interpolation(x, x1, x2, q11, q21) f_x_y2 = _linear_interpolation(x, x1, x2, q12, q22) return _linear_interpolation(y, y1, y2, f_x_y1, f_x_y2) @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) def _warp(np.ndarray[float64, ndim=2] image, np.ndarray[int8, ndim=2] mask, np.ndarray[float64, ndim=3] displacement, bint gradient=False): """ Morph image by applying a displacement field (Warping). The new image is created by selecting for each position the values of the input image at the positions given by the x and y displacements. The routine works in a backward sense. The displacement vectors have to refer to their destination. For more information in Morphing functions see Section 3 in `Beezley and Mandel (2008)`_. Beezley, J. D., & Mandel, J. (2008). Morphing ensemble Kalman filters. Tellus A, 60(1), 131-140. .. _`Beezley and Mandel (2008)`: http://dx.doi.org/10.1111/\ j.1600-0870.2007.00275.x The displacement field in x and y directions and the image must have the same dimensions. The morphing is executed in parallel over x axis. The value of displaced pixels that fall outside the limits takes the value of the nearest edge. Those pixels are indicated by values greater than 1 in the output mask. Parameters ---------- image : ndarray (ndim = 2) Image to morph displacement : ndarray (ndim = 3) Displacement field to be applied (Warping). The dimensions are: displacement [ x (0) or y (1) , i index of pixel, j index of pixel ] gradient : bool, optional If True, the gradient of the morphing function is returned. Returns ------- image : ndarray (float64 ,ndim = 2) Morphed image. mask : ndarray (int8 ,ndim = 2) Invalid values mask. Points outside the boundaries are masked. Values greater than 1, indicate masked values. gradient_values : ndarray (float64 ,ndim = 3), optional If gradient keyword is True, the gradient of the function is also returned. """ cdef intp nx = image.shape[0] cdef intp ny = image.shape[1] cdef np.ndarray[float64, ndim = 2] new_image = ( np.zeros([nx, ny], dtype=np.float64)) cdef np.ndarray[int8, ndim = 2] morphed_mask = ( np.zeros([nx, ny], dtype=np.int8)) morphed_mask[mask > 0] = 1.0 cdef np.ndarray[float64, ndim = 3] gradient_values = ( np.zeros([2, nx, ny], dtype=np.float64)) cdef intp x, y cdef intp x_max_int = nx - 1 cdef intp y_max_int = ny - 1 cdef float64 x_max_float = x_max_int cdef float64 y_max_float = y_max_int cdef float64 x_float, y_float, dx, dy cdef intp x_floor cdef intp x_ceil cdef intp y_floor cdef intp y_ceil cdef float64 f00, f10, f01, f11 for x in prange(nx, schedule='static', nogil=True): for y in range(ny): x_float = ( x) - displacement[0, x, y] y_float = ( y) - displacement[1, x, y] if x_float < 0: morphed_mask[x, y] = 1 x_float = 0 x_floor = 0 x_ceil = 0 elif x_float > x_max_float: morphed_mask[x, y] = 1 x_float = x_max_float x_floor = x_max_int x_ceil = x_max_int else: x_floor = floor(x_float) x_ceil = x_floor + 1 if x_ceil > x_max_int: x_ceil = x_max_int if y_float < 0: morphed_mask[x, y] = 1 y_float = 0 y_floor = 0 y_ceil = 0 elif y_float > y_max_float: morphed_mask[x, y] = 1 y_float = y_max_float y_floor = y_max_int y_ceil = y_max_int else: y_floor = floor(y_float) y_ceil = y_floor + 1 if y_ceil > y_max_int: y_ceil = y_max_int dx = x_float - x_floor dy = y_float - y_floor # This assumes that the spacing between grid points=1. # Bilinear interpolation coeficients f00 = image[x_floor, y_floor] f10 = image[x_ceil, y_floor] - image[x_floor, y_floor] f01 = image[x_floor, y_ceil] - image[x_floor, y_floor] f11 = (image[x_floor, y_floor] - image[x_ceil, y_floor] - image[x_floor, y_ceil] + image[x_ceil, y_ceil]) # Bilinear interpolation new_image[x, y] = f00 + dx * f10 + dy * f01 + dx * dy * f11 if gradient: gradient_values[0, x, y] = f10 + dy * f11 gradient_values[1, x, y] = f01 + dx * f11 f00 = mask[x_floor, y_floor] f10 = mask[x_ceil, y_floor] - mask[x_floor, y_floor] f01 = mask[x_floor, y_ceil] - mask[x_floor, y_floor] f11 = (mask[x_floor, y_floor] - mask[x_ceil, y_floor] - mask[x_floor, y_ceil] + mask[x_ceil, y_ceil]) morphed_mask[x, y] = (f00 + dx * f10 + dy * f01 + dx * dy * f11) morphed_mask[morphed_mask != 0] = 1 if gradient: return new_image, morphed_mask, gradient_values else: return new_image, morphed_mask @cython.boundscheck(False) @cython.wraparound(False) @cython.nonecheck(False) @cython.cdivision(True) def _cost_function(np.ndarray[float64, ndim=3] sector_displacement, np.ndarray[float64, ndim=2] template_image, np.ndarray[float64, ndim=2] input_image, np.ndarray[int8, ndim=2] mask, float smooth_gain, bint gradient = False): """ Variational Echo Tracking Cost function. This function computes the Variational Echo Tracking (VET) Cost function presented by `Laroche and Zawazdki (1995)`_ and used in the McGill Algorithm for Prediction by Lagrangian Extrapolation (MAPLE) described in `Germann and Zawadzki (2002)`_. .. _`Laroche and Zawazdki (1995)`: \ http://dx.doi.org/10.1175/1520-0426(1995)012<0721:ROHWFS>2.0.CO;2 .. _`Germann and Zawadzki (2002)`: \ http://dx.doi.org/10.1175/1520-0493(2002)130<2859:SDOTPO>2.0.CO;2 The cost function is a the sum of the residuals of the squared image differences along with a smoothness constrain. This cost function implementation, supports displacement vector sectorization. The displacement vector represent the displacement applied to the pixels in each individual sector. This help to reduce the number of degrees of freedom of the cost function when hierarchical approaches are used to obtain the minima of the cost function (from low resolution to full image resolution). For example, in the MAPLE algorithm an Scaling Guess procedure is used to find the displacement vectors. The echo motion field is retrieved in three runs with increasing resolution. The retrieval starts with (left) a uniform field, which is used as a first guess to retrieve (middle) the field on a 5 × 5 grid, which in turn is the first guess of (right) the final minimization with a 25 × 25 grid The shape of the sector is deduced from the image shape and the displacement vector shape. IMPORTANT: The number of sectors in each dimension (x and y) must be a factor full image size. The value of displaced pixels that fall outside the limits takes the value of the nearest edge. The cost function is computed in parallel over the x axis. .. _ndarray: \ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- sector_displacement : ndarray_ (ndim=3) Array of displacements to apply to each sector. The dimensions are: sector_displacement [ x (0) or y (1) displacement, i index of sector, j index of sector ] template_image : ndarray_ (ndim=2) Input image array where the sector displacement is applied. input_image : ndarray_ Image array to be used as reference smooth_gain : float Smoothness constrain gain mask : ndarray_ (ndim=2) Data mask. If is True, the data is marked as not valid and is not used in the computations. gradient : bool, optional If True, the gradient of the morphing function is returned. Returns ------- penalty or gradient values. penalty : float Value of the cost function gradient_values : ndarray (float64 ,ndim = 3), optional If gradient keyword is True, the gradient of the function is also returned. References ---------- Laroche, S., and I. Zawadzki, 1995: Retrievals of horizontal winds from single-Doppler clear-air data by methods of cross-correlation and variational analysis. J. Atmos. Oceanic Technol., 12, 721–738. doi: http://dx.doi.org/10.1175/1520-0426(1995)012<0721:ROHWFS>2.0.CO;2 Germann, U. and I. Zawadzki, 2002: Scale-Dependence of the Predictability of Precipitation from Continental Radar Images. Part I: Description of the Methodology. Mon. Wea. Rev., 130, 2859–2873, doi: 10.1175/1520-0493(2002)130<2859:SDOTPO>2.0.CO;2. """ cdef intp x_sectors = sector_displacement.shape[1] cdef intp y_sectors = sector_displacement.shape[2] cdef intp x_image_size = template_image.shape[0] cdef intp y_image_size = template_image.shape[1] if x_image_size % x_sectors != 0: raise ValueError("Error computing cost function.\n", "The number of sectors in x axis (axis=0)" + " don't divide the image size") if y_image_size % y_sectors != 0: raise ValueError("Error computing cost function.\n", "The number of sectors in y axis (axis=1) don't" + " divide the image size") cdef intp x_sector_size = ( (round(x_image_size / x_sectors))) cdef intp y_sector_size = ( (round(y_image_size / y_sectors))) cdef np.ndarray[float64, ndim = 3] displacement = ( np.zeros([2, x_image_size, y_image_size], dtype=np.float64)) cdef intp i, j, xy, l, m, ll, mm, i_sec, j_sec cdef intp l0, m0, l1, m1, i_shift, j_shift, axis i_shift = (x_sector_size // 2) j_shift = (y_sector_size // 2) #Assume regular grid with constant grid spacing. cdef np.ndarray[float64, ndim = 1] x cdef np.ndarray[float64, ndim = 1] y x = np.arange(x_image_size, dtype='float64') y = np.arange(y_image_size, dtype='float64') cdef np.ndarray[float64, ndim = 1] x_guess cdef np.ndarray[float64, ndim = 1] y_guess x_guess = x.reshape((x_sectors, x_sector_size)).mean(axis=1) y_guess = y.reshape((y_sectors, y_sector_size)).mean(axis=1) cdef float64 sector_area cdef np.ndarray[float64, ndim = 3] interp_coef interp_coef = np.zeros([4, x_image_size, y_image_size], dtype=np.float64) cdef np.ndarray[intp, ndim = 1] l_i = np.zeros(x_image_size, dtype=np.intp) cdef np.ndarray[intp, ndim = 1] m_j = np.zeros(y_image_size, dtype=np.intp) cdef np.ndarray[intp, ndim = 1] i_min = np.full(x_sectors, x_image_size, dtype=np.intp) cdef np.ndarray[intp, ndim = 1] i_max = np.full(x_sectors, x_image_size, dtype=np.intp) cdef np.ndarray[intp, ndim = 1] j_min = np.full(y_sectors, y_image_size, dtype=np.intp) cdef np.ndarray[intp, ndim = 1] j_max = np.full(y_sectors, y_image_size, dtype=np.intp) #################################### # Compute interpolation coefficients for i in prange(x_image_size, schedule='static', nogil=True): l0 = int_min((i - i_shift) // x_sector_size, x_sectors - 2) l0 = int_max(l0, 0) l1 = l0 + 1 l_i[i] = l0 for j in range(y_image_size): m0 = int_min((j - j_shift) // y_sector_size, y_sectors - 2) m0 = int_max(m0, 0) m1 = m0 + 1 m_j[j] = m0 sector_area = (x_guess[l1] - x_guess[l0]) * (y_guess[m1] - y_guess[m0]) interp_coef[0, i, j] = (x_guess[l1] * y_guess[m1] - x[i] * y_guess[m1] - x_guess[l1] * y[j] + x[i] * y[j]) / sector_area interp_coef[1, i, j] = (-x_guess[l1] * y_guess[m0] + x[i] * y_guess[m0] + x_guess[l1] * y[j] - x[i] * y[j]) / sector_area interp_coef[2, i, j] = (-x_guess[l0] * y_guess[m1] + x[i] * y_guess[m1] + x_guess[l0] * y[j] - x[i] * y[j]) / sector_area interp_coef[3, i, j] = (x_guess[l0] * y_guess[m0] - x[i] * y_guess[m0] - x_guess[l0] * y[j] + x[i] * y[j]) / sector_area for xy in range(2): displacement[xy, i, j] = ( sector_displacement[xy, l0, m0] * interp_coef[0, i, j] + sector_displacement[xy, l0, m1] * interp_coef[1, i, j] + sector_displacement[xy, l1, m0] * interp_coef[2, i, j] + sector_displacement[xy, l1, m1] * interp_coef[3, i, j] ) ############################################## # Compute limits used in gradient computations for l, i, counts in zip(*np.unique(l_i, return_index=True, return_counts=True)): i_min[l] = i i_max[l] = i + counts - 1 for m, j, counts in zip(*np.unique(m_j, return_index=True, return_counts=True)): j_min[m] = j j_max[m] = j + counts cdef np.ndarray[float64, ndim = 2] morphed_image cdef np.ndarray[int8, ndim = 2] morph_mask cdef np.ndarray[float64, ndim = 3] _gradient_data cdef np.ndarray[float64, ndim = 3] grad_residuals cdef np.ndarray[float64, ndim = 3] grad_smooth cdef np.ndarray[float64, ndim = 2] buffer = \ np.zeros([x_image_size, y_image_size], dtype=np.float64) grad_smooth = np.zeros([2, x_sectors, y_sectors], dtype=np.float64) grad_residuals = np.zeros([2, x_sectors, y_sectors], dtype=np.float64) cdef float64 residuals = 0 # Compute residual part of the cost function if gradient: morphed_image, morph_mask, _gradient_data = _warp(template_image, mask, displacement, gradient=True) morph_mask[mask > 0] = 1 buffer = (2 * (input_image - morphed_image)) buffer[morph_mask == 1] = 0 _gradient_data[0, :] *= buffer _gradient_data[1, :] *= buffer for l in range(x_sectors): # schedule='dynamic', nogil=True): for m in range(y_sectors): for i in range(i_min[l], i_max[l]): for j in range(j_min[m], j_max[m]): grad_residuals[0, l, m] = grad_residuals[0, l, m] + \ (_gradient_data[0, i, j] * interp_coef[0, i, j]) grad_residuals[1, l, m] = grad_residuals[1, l, m] + \ (_gradient_data[1, i, j] * interp_coef[0, i, j]) for m in range(1, y_sectors): for i in range(i_min[l], i_max[l]): for j in range(j_min[m - 1], j_max[m - 1]): grad_residuals[0, l, m] = grad_residuals[0, l, m] + \ (_gradient_data[0, i, j] * interp_coef[1, i, j]) grad_residuals[1, l, m] = grad_residuals[1, l, m] + \ (_gradient_data[1, i, j] # TODO: Check this line! * interp_coef[1, i, j]) for l in range(1, x_sectors): #, schedule='dynamic', nogil=True): for m in range(y_sectors): for i in range(i_min[l - 1], i_max[l - 1]): for j in range(j_min[m], j_max[m]): grad_residuals[0, l, m] += (_gradient_data[0, i, j] * interp_coef[2, i, j]) grad_residuals[1, l, m] += (_gradient_data[1, i, j] * interp_coef[2, i, j]) for m in range(1, y_sectors): for i in range(i_min[l - 1], i_max[l - 1]): for j in range(j_min[m - 1], j_max[m - 1]): grad_residuals[0, l, m] += (_gradient_data[0, i, j] * interp_coef[3, i, j]) grad_residuals[1, l, m] += (_gradient_data[1, i, j] * interp_coef[3, i, j]) else: morphed_image, morph_mask = _warp(template_image, mask, displacement, gradient=False) morph_mask[mask > 0] = 1 residuals = np.sum((morphed_image - input_image)[morph_mask == 0] ** 2) # Compute smoothness constraint part of the cost function cdef float64 smoothness_penalty = 0 cdef float64 df_dx2 = 0 cdef float64 df_dxdy = 0 cdef float64 df_dy2 = 0 cdef float64 inloop_smoothness_penalty if smooth_gain > 0.: for axis in range(2): #, schedule='dynamic', nogil=True): inloop_smoothness_penalty = 0 for l in range(1, x_sectors - 1): for m in range(1, y_sectors - 1): df_dx2 = (sector_displacement[axis, l + 1, m] - 2 * sector_displacement[axis, l, m] + sector_displacement[axis, l - 1, m]) df_dx2 = df_dx2 / (x_sector_size * x_sector_size) df_dy2 = (sector_displacement[axis, l, m + 1] - 2 * sector_displacement[axis, l, m] + sector_displacement[axis, l, m - 1]) df_dy2 = df_dy2 / (y_sector_size * y_sector_size) df_dxdy = (sector_displacement[axis, l + 1, m + 1] - sector_displacement[axis, l + 1, m - 1] - sector_displacement[axis, l - 1, m + 1] + sector_displacement[axis, l - 1, m - 1]) df_dxdy = df_dxdy / (4 * x_sector_size * y_sector_size) if gradient: grad_smooth[axis, l, m] -= 2 * df_dx2 grad_smooth[axis, l + 1, m] += df_dx2 grad_smooth[axis, l - 1, m] += df_dx2 grad_smooth[axis, l, m] -= 2 * df_dy2 grad_smooth[axis, l, m - 1] += df_dy2 grad_smooth[axis, l, m + 1] += df_dy2 grad_smooth[axis, l - 1, m - 1] += df_dxdy grad_smooth[axis, l - 1, m + 1] -= df_dxdy grad_smooth[axis, l + 1, m - 1] -= df_dxdy grad_smooth[axis, l + 1, m + 1] += df_dxdy inloop_smoothness_penalty = (df_dx2 * df_dx2 + 2 * df_dxdy * df_dxdy + df_dy2 * df_dy2) smoothness_penalty += inloop_smoothness_penalty smoothness_penalty *= smooth_gain #* x_sector_size * y_sector_size if gradient: grad_smooth *= 2 * smooth_gain #* x_sector_size * y_sector_size return grad_residuals + grad_smooth else: return residuals, smoothness_penalty ================================================ FILE: pysteps/motion/constant.py ================================================ # -*- coding: utf-8 -*- """ pysteps.motion.constant ======================= Implementation of a constant advection field estimation by maximizing the correlation between two images. .. autosummary:: :toctree: ../generated/ constant """ import numpy as np import scipy.optimize as op from scipy.ndimage import map_coordinates def constant(R, **kwargs): """ Compute a constant advection field by finding a translation vector that maximizes the correlation between two successive images. Parameters ---------- R: array_like Array of shape (T,m,n) containing a sequence of T two-dimensional input images of shape (m,n). If T > 2, two last elements along axis 0 are used. Returns ------- out: array_like The constant advection field having shape (2, m, n), where out[0, :, :] contains the x-components of the motion vectors and out[1, :, :] contains the y-components. """ m, n = R.shape[1:] X, Y = np.meshgrid(np.arange(n), np.arange(m)) def f(v): XYW = [Y + v[1], X + v[0]] R_w = map_coordinates( R[-2, :, :], XYW, mode="constant", cval=np.nan, order=0, prefilter=False ) mask = np.logical_and(np.isfinite(R[-1, :, :]), np.isfinite(R_w)) return -np.corrcoef(R[-1, :, :][mask], R_w[mask])[0, 1] options = {"initial_simplex": (np.array([(0, 1), (1, 0), (1, 1)]))} result = op.minimize(f, (1, 1), method="Nelder-Mead", options=options) return np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))]) ================================================ FILE: pysteps/motion/darts.py ================================================ # -*- coding: utf-8 -*- """ pysteps.motion.darts ==================== Implementation of the DARTS algorithm. .. autosummary:: :toctree: ../generated/ DARTS """ import numpy as np import time from numpy.linalg import lstsq, svd from pysteps import utils from pysteps.decorators import check_input_frames @check_input_frames(just_ndim=True) def DARTS(input_images, **kwargs): """ Compute the advection field from a sequence of input images by using the DARTS method. :cite:`RCW2011` Parameters ---------- input_images: array-like Array of shape (T,m,n) containing a sequence of T two-dimensional input images of shape (m,n). Other Parameters ---------------- N_x: int Number of DFT coefficients to use for the input images, x-axis (default=50). N_y: int Number of DFT coefficients to use for the input images, y-axis (default=50). N_t: int Number of DFT coefficients to use for the input images, time axis (default=4). N_t must be strictly smaller than T. M_x: int Number of DFT coefficients to compute for the output advection field, x-axis (default=2). M_y: int Number of DFT coefficients to compute for the output advection field, y-axis (default=2). fft_method: str A string defining the FFT method to use, see utils.fft.get_method. Defaults to 'numpy'. output_type: {"spatial", "spectral"} The type of the output: "spatial"=apply the inverse FFT to obtain the spatial representation of the advection field, "spectral"=return the (truncated) DFT representation. n_threads: int Number of threads to use for the FFT computation. Applicable if fft_method is 'pyfftw'. verbose: bool If True, print information messages. lsq_method: {1, 2} The method to use for solving the linear equations in the least squares sense: 1=numpy.linalg.lstsq, 2=explicit computation of the Moore-Penrose pseudoinverse and SVD. verbose: bool if set to True, it prints information about the program Returns ------- out: ndarray Three-dimensional array (2,m,n) containing the dense x- and y-components of the motion field in units of pixels / timestep as given by the input array R. """ N_x = kwargs.get("N_x", 50) N_y = kwargs.get("N_y", 50) N_t = kwargs.get("N_t", 4) M_x = kwargs.get("M_x", 2) M_y = kwargs.get("M_y", 2) fft_method = kwargs.get("fft_method", "numpy") output_type = kwargs.get("output_type", "spatial") lsq_method = kwargs.get("lsq_method", 2) verbose = kwargs.get("verbose", True) if N_t >= input_images.shape[0] - 1: raise ValueError( "N_t = %d >= %d = T-1, but N_t < T-1 required" % (N_t, input_images.shape[0] - 1) ) if output_type not in ["spatial", "spectral"]: raise ValueError( "invalid output_type=%s, must be 'spatial' or 'spectral'" % output_type ) if np.any(~np.isfinite(input_images)): raise ValueError("the input images contain non-finite values") if verbose: print("Computing the motion field with the DARTS method.") t0 = time.time() input_images = np.moveaxis(input_images, (0, 1, 2), (2, 0, 1)) fft = utils.get_method( fft_method, shape=input_images.shape[:2], fftn_shape=input_images.shape, **kwargs, ) T_x = input_images.shape[1] T_y = input_images.shape[0] T_t = input_images.shape[2] if verbose: print("-----") print("DARTS") print("-----") print(" Computing the FFT of the reflectivity fields...", end="", flush=True) starttime = time.time() input_images = fft.fftn(input_images) if verbose: print("Done in %.2f seconds." % (time.time() - starttime)) print(" Constructing the y-vector...", end="", flush=True) starttime = time.time() m = (2 * N_x + 1) * (2 * N_y + 1) * (2 * N_t + 1) n = (2 * M_x + 1) * (2 * M_y + 1) y = np.zeros(m, dtype=complex) k_t, k_y, k_x = np.unravel_index( np.arange(m), (2 * N_t + 1, 2 * N_y + 1, 2 * N_x + 1) ) for i in range(m): k_x_ = k_x[i] - N_x k_y_ = k_y[i] - N_y k_t_ = k_t[i] - N_t y[i] = k_t_ * input_images[k_y_, k_x_, k_t_] if verbose: print("Done in %.2f seconds." % (time.time() - starttime)) A = np.zeros((m, n), dtype=complex) B = np.zeros((m, n), dtype=complex) if verbose: print(" Constructing the H-matrix...", end="", flush=True) starttime = time.time() c1 = -1.0 * T_t / (T_x * T_y) kp_y, kp_x = np.unravel_index(np.arange(n), (2 * M_y + 1, 2 * M_x + 1)) for i in range(m): k_x_ = k_x[i] - N_x k_y_ = k_y[i] - N_y k_t_ = k_t[i] - N_t kp_x_ = kp_x[:] - M_x kp_y_ = kp_y[:] - M_y i_ = k_y_ - kp_y_ j_ = k_x_ - kp_x_ R_ = input_images[i_, j_, k_t_] c2 = c1 / T_y * i_ A[i, :] = c2 * R_ c2 = c1 / T_x * j_ B[i, :] = c2 * R_ if verbose: print("Done in %.2f seconds." % (time.time() - starttime)) print(" Solving the linear systems...", end="", flush=True) starttime = time.time() if lsq_method == 1: x = lstsq(np.hstack([A, B]), y, rcond=0.01)[0] else: x = _leastsq(A, B, y) if verbose: print("Done in %.2f seconds." % (time.time() - starttime)) h, w = 2 * M_y + 1, 2 * M_x + 1 U = np.zeros((h, w), dtype=complex) V = np.zeros((h, w), dtype=complex) i, j = np.unravel_index(np.arange(h * w), (h, w)) V[i, j] = x[0 : h * w] U[i, j] = x[h * w : 2 * h * w] k_x, k_y = np.meshgrid(np.arange(-M_x, M_x + 1), np.arange(-M_y, M_y + 1)) if output_type == "spatial": U = np.real( fft.ifft2(_fill(U, input_images.shape[0], input_images.shape[1], k_x, k_y)) ) V = np.real( fft.ifft2(_fill(V, input_images.shape[0], input_images.shape[1], k_x, k_y)) ) if verbose: print("--- %s seconds ---" % (time.time() - t0)) return np.stack([U, V]) def _leastsq(A, B, y): M = np.hstack([A, B]) M_ct = M.conjugate().T MM = np.dot(M_ct, M) U, s, V = svd(MM, full_matrices=False) mask = s > 0.01 * s[0] s = 1.0 / s[mask] MM_inv = np.dot( np.dot(V[: len(s), :].conjugate().T, np.diag(s)), U[:, : len(s)].conjugate().T ) return np.dot(MM_inv, np.dot(M_ct, y)) def _fill(X, h, w, k_x, k_y): X_f = np.zeros((h, w), dtype=complex) X_f[k_y, k_x] = X return X_f ================================================ FILE: pysteps/motion/farneback.py ================================================ # -*- coding: utf-8 -*- """ pysteps.motion.farneback ======================== The Farneback dense optical flow module. This module implements the interface to the local `Farneback`_ routine available in OpenCV_. .. _OpenCV: https://opencv.org/ .. _`Farneback`:\ https://docs.opencv.org/3.4/dc/d6b/group__video__track.html#ga5d10ebbd59fe09c5f650289ec0ece5af .. autosummary:: :toctree: ../generated/ farneback """ import numpy as np from numpy.ma.core import MaskedArray import scipy.ndimage as sndi import time from pysteps.decorators import check_input_frames from pysteps.exceptions import MissingOptionalDependency from pysteps.utils.images import morph_opening try: import cv2 CV2_IMPORTED = True except ImportError: CV2_IMPORTED = False @check_input_frames(2) def farneback( input_images, pyr_scale=0.5, levels=3, winsize=15, iterations=3, poly_n=5, poly_sigma=1.1, flags=0, size_opening=3, sigma=60.0, verbose=False, ): """Estimate a dense motion field from a sequence of 2D images using the `Farneback`_ optical flow algorithm. This function computes dense optical flow between each pair of consecutive input frames using OpenCV's Farneback method. If more than two frames are provided, the motion fields estimated from all consecutive pairs are averaged to obtain a single representative advection field. After the pairwise motion fields are averaged, the resulting motion field can optionally be smoothed with a Gaussian filter. In that case, its amplitude is rescaled so that the mean motion magnitude is preserved. .. _OpenCV: https://opencv.org/ .. _`Farneback`:\ https://docs.opencv.org/3.4/dc/d6b/group__video__track.html#ga5d10ebbd59fe09c5f650289ec0ece5af .. _MaskedArray:\ https://docs.scipy.org/doc/numpy/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- input_images: ndarray_ or MaskedArray_ Array of shape (T, m, n) containing a sequence of *T* two-dimensional input images of shape (m, n). The indexing order in **input_images** is assumed to be (time, latitude, longitude). *T* = 2 is the minimum required number of images. With *T* > 2, all the resulting motion vectors are averaged together. In case of ndarray_, invalid values (Nans or infs) are masked, otherwise the mask of the MaskedArray_ is used. Such mask defines a region where features are not detected for the tracking algorithm. pyr_scale : float, optional Parameter specifying the image scale (<1) used to build pyramids for each image; pyr_scale=0.5 means a classical pyramid, where each next layer is twice smaller than the previous one. This and the following parameter descriptions are adapted from the original OpenCV documentation (see https://docs.opencv.org). levels : int, optional Number of pyramid layers including the initial image; levels=1 means that no extra layers are created and only the original images are used. winsize : int, optional Averaging window size; larger values increase the algorithm robustness to image noise and give more stable motion estimates. Small windows (e.g. 10) lead to unrealistic motion. iterations : int, optional Number of iterations the algorithm does at each pyramid level. poly_n : int Size of the pixel neighborhood used to find polynomial expansion in each pixel; larger values mean that the image will be approximated with smoother surfaces, yielding more robust algorithm and more blurred motion field, typically poly_n = 5 or 7. poly_sigma : float Standard deviation of the Gaussian that is used to smooth derivatives used as a basis for the polynomial expansion; for poly_n=5, you can set poly_sigma=1.1, for poly_n=7, a good value would be poly_sigma=1.5. flags : int, optional Operation flags that can be a combination of the following: OPTFLOW_USE_INITIAL_FLOW uses the input 'flow' as an initial flow approximation. OPTFLOW_FARNEBACK_GAUSSIAN uses the Gaussian winsize x winsize filter instead of a box filter of the same size for optical flow estimation; usually, this option gives a more accurate flow than with a box filter, at the cost of lower speed; normally, winsize for a Gaussian window should be set to a larger value to achieve the same level of robustness. size_opening : int, optional Non-OpenCV parameter: The structuring element size for the filtering of isolated pixels [px]. sigma : float, optional Non-OpenCV parameter: The smoothing bandwidth of the motion field. The motion field amplitude is adjusted by multiplying by the ratio of average magnitude before and after smoothing to avoid damping of the motion field. verbose: bool, optional If set to True, print some information about the program. Returns ------- out : ndarray_, shape (2,m,n) Return the advection field having shape (2, m, n), where out[0, :, :] contains the x-components of the motion vectors and out[1, :, :] contains the y-components. The velocities are in units of pixels / timestep, where timestep is the time difference between the two input images. Return a zero motion field of shape (2, m, n) when no motion is detected. References ---------- Farnebäck, G.: Two-frame motion estimation based on polynomial expansion, In Image Analysis, pages 363–370. Springer, 2003. Driedger, N., Mahidjiba, A. and Hortal, A.P. (2022, June 1-8): Evaluation of optical flow methods for radar precipitation extrapolation. Canadian Meteorological and Oceanographic Society Congress, contributed abstract 11801. """ if len(input_images.shape) != 3: raise ValueError( "input_images has %i dimensions, but a " "three-dimensional array is expected" % len(input_images.shape) ) input_images = input_images.copy() if verbose: print("Computing the motion field with the Farneback method.") t0 = time.time() if not CV2_IMPORTED: raise MissingOptionalDependency( "OpenCV (cv2) is required for the Farneback optical flow method, but it is not installed" ) nr_pairs = input_images.shape[0] - 1 domain_size = (input_images.shape[1], input_images.shape[2]) u_sum = np.zeros(domain_size) v_sum = np.zeros(domain_size) for n in range(nr_pairs): # extract consecutive images prvs_img = input_images[n, :, :].copy() next_img = input_images[n + 1, :, :].copy() # Check if a MaskedArray is used. If not, mask the ndarray if not isinstance(prvs_img, MaskedArray): prvs_img = np.ma.masked_invalid(prvs_img) np.ma.set_fill_value(prvs_img, prvs_img.min()) if not isinstance(next_img, MaskedArray): next_img = np.ma.masked_invalid(next_img) np.ma.set_fill_value(next_img, next_img.min()) # scale between 0 and 255 im_min = prvs_img.min() im_max = prvs_img.max() if (im_max - im_min) > 1e-8: prvs_img = (prvs_img.filled() - im_min) / (im_max - im_min) * 255 else: prvs_img = prvs_img.filled() - im_min im_min = next_img.min() im_max = next_img.max() if (im_max - im_min) > 1e-8: next_img = (next_img.filled() - im_min) / (im_max - im_min) * 255 else: next_img = next_img.filled() - im_min # convert to 8-bit prvs_img = np.ndarray.astype(prvs_img, "uint8") next_img = np.ndarray.astype(next_img, "uint8") # remove small noise with a morphological operator (opening) if size_opening > 0: prvs_img = morph_opening(prvs_img, prvs_img.min(), size_opening) next_img = morph_opening(next_img, next_img.min(), size_opening) flow = cv2.calcOpticalFlowFarneback( prvs_img, next_img, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags, ) fa, fb = np.dsplit(flow, 2) u_sum += fa.reshape(domain_size) v_sum += fb.reshape(domain_size) # Compute the average motion field u = u_sum / nr_pairs v = v_sum / nr_pairs # Smoothing if sigma > 0: uv2 = u * u + v * v # squared magnitude of motion field us = sndi.gaussian_filter(u, sigma, mode="nearest") vs = sndi.gaussian_filter(v, sigma, mode="nearest") uvs2 = us * us + vs * vs # squared magnitude of smoothed motion field mean_uv2 = np.nanmean(uv2) mean_uvs2 = np.nanmean(uvs2) if mean_uvs2 > 0: mult = np.sqrt(mean_uv2 / mean_uvs2) else: mult = 1.0 else: mult = 1.0 us = u vs = v if verbose: print("mult factor of smoothed motion field=", mult) UV = np.stack([us * mult, vs * mult]) if verbose: print("--- %s seconds ---" % (time.time() - t0)) return UV ================================================ FILE: pysteps/motion/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.motion.interface ======================== Interface for the motion module. It returns a callable optical flow routine for computing the motion field. The methods in the motion module implement the following interface: ``motion_method(precip, **keywords)`` where precip is a (T,m,n) array containing a sequence of T two-dimensional input images of shape (m,n). The first dimension represents the images time dimension and the value of T depends on the type of the method. The output is a three-dimensional array (2,m,n) containing the dense x- and y-components of the motion field in units of pixels / timestep as given by the input array R. .. autosummary:: :toctree: ../generated/ get_method """ import numpy as np from pysteps.motion.constant import constant from pysteps.motion.darts import DARTS from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.motion.proesmans import proesmans from pysteps.motion.vet import vet from pysteps.motion.farneback import farneback _methods = dict() _methods["constant"] = constant _methods["lk"] = dense_lucaskanade _methods["lucaskanade"] = dense_lucaskanade _methods["darts"] = DARTS _methods["proesmans"] = proesmans _methods["vet"] = vet _methods["farneback"] = farneback _methods[None] = lambda precip, *args, **kw: np.zeros( (2, precip.shape[1], precip.shape[2]) ) def get_method(name): """ Return a callable function for the optical flow method corresponding to the given name. The available options are:\n +--------------------------------------------------------------------------+ | Python-based implementations | +-------------------+------------------------------------------------------+ | Name | Description | +===================+======================================================+ | None | returns a zero motion field | +-------------------+------------------------------------------------------+ | constant | constant advection field estimated by maximizing the | | | correlation between two images | +-------------------+------------------------------------------------------+ | darts | implementation of the DARTS method of Ruzanski et | | | al. (2011) | +-------------------+------------------------------------------------------+ | lucaskanade | OpenCV implementation of the Lucas-Kanade method | | | with interpolated motion vectors for areas with no | | | precipitation | +-------------------+------------------------------------------------------+ | proesmans | the anisotropic diffusion method of Proesmans et | | | al. (1994) | +-------------------+------------------------------------------------------+ | vet | implementation of the VET method of | | | Laroche and Zawadzki (1995) and | | | Germann and Zawadzki (2002) | +-------------------+------------------------------------------------------+ | farneback | OpenCV implementation of the Farneback (2003) method.| +-------------------+------------------------------------------------------+ +--------------------------------------------------------------------------+ | Methods implemented in C (these require separate compilation and linkage)| +-------------------+------------------------------------------------------+ | Name | Description | +===================+======================================================+ | brox | implementation of the variational method of | | | Brox et al. (2004) from IPOL | | | (http://www.ipol.im/pub/art/2013/21) | +-------------------+------------------------------------------------------+ | clg | implementation of the Combined Local-Global (CLG) | | | method of Bruhn et al., 2005 from IPOL | | | (http://www.ipol.im/pub/art/2015/44) | +-------------------+------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() if name in ["brox", "clg"]: raise NotImplementedError("Method %s not implemented" % name) else: try: motion_method = _methods[name] return motion_method except KeyError: raise ValueError( "Unknown method {}\n".format(name) + "The available methods are:" + str(list(_methods.keys())) ) from None ================================================ FILE: pysteps/motion/lucaskanade.py ================================================ # -*- coding: utf-8 -*- """ pysteps.motion.lucaskanade ========================== The Lucas-Kanade (LK) local feature tracking module. This module implements the interface to the local `Lucas-Kanade`_ routine available in OpenCV_. For its dense method, it additionally interpolates the sparse vectors over a regular grid to return a motion field. .. _OpenCV: https://opencv.org/ .. _`Lucas-Kanade`:\ https://docs.opencv.org/3.4/dc/d6b/group__video__track.html#ga473e4b886d0bcc6b65831eb88ed93323 .. autosummary:: :toctree: ../generated/ dense_lucaskanade """ import numpy as np from numpy.ma.core import MaskedArray from pysteps.decorators import check_input_frames from pysteps import utils, feature from pysteps.tracking.lucaskanade import track_features from pysteps.utils.cleansing import decluster, detect_outliers from pysteps.utils.images import morph_opening import time @check_input_frames(2) def dense_lucaskanade( input_images, lk_kwargs=None, fd_method="shitomasi", fd_kwargs=None, interp_method="idwinterp2d", interp_kwargs=None, dense=True, nr_std_outlier=3, k_outlier=30, size_opening=3, decl_scale=20, verbose=False, ): """ Run the Lucas-Kanade optical flow routine and interpolate the motion vectors. .. _OpenCV: https://opencv.org/ .. _`Lucas-Kanade`:\ https://docs.opencv.org/3.4/dc/d6b/group__video__track.html#ga473e4b886d0bcc6b65831eb88ed93323 .. _MaskedArray:\ https://docs.scipy.org/doc/numpy/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Interface to the OpenCV_ implementation of the local `Lucas-Kanade`_ optical flow method applied in combination to a feature detection routine. The sparse motion vectors are finally interpolated to return the whole motion field. Parameters ---------- input_images: ndarray_ or MaskedArray_ Array of shape (T, m, n) containing a sequence of *T* two-dimensional input images of shape (m, n). The indexing order in **input_images** is assumed to be (time, latitude, longitude). *T* = 2 is the minimum required number of images. With *T* > 2, all the resulting sparse vectors are pooled together for the final interpolation on a regular grid. In case of ndarray_, invalid values (Nans or infs) are masked, otherwise the mask of the MaskedArray_ is used. Such mask defines a region where features are not detected for the tracking algorithm. lk_kwargs: dict, optional Optional dictionary containing keyword arguments for the `Lucas-Kanade`_ features tracking algorithm. See the documentation of :py:func:`pysteps.tracking.lucaskanade.track_features`. fd_method: {"shitomasi", "blob", "tstorm"}, optional Name of the feature detection routine. See feature detection methods in :py:mod:`pysteps.feature`. fd_kwargs: dict, optional Optional dictionary containing keyword arguments for the features detection algorithm. See the documentation of :py:mod:`pysteps.feature`. interp_method: {"idwinterp2d", "rbfinterp2d"}, optional Name of the interpolation method to use. See interpolation methods in :py:mod:`pysteps.utils.interpolate`. interp_kwargs: dict, optional Optional dictionary containing keyword arguments for the interpolation algorithm. See the documentation of :py:mod:`pysteps.utils.interpolate`. dense: bool, optional If True, return the three-dimensional array (2, m, n) containing the dense x- and y-components of the motion field. If False, return the sparse motion vectors as 2-D **xy** and **uv** arrays, where **xy** defines the vector positions, **uv** defines the x and y direction components of the vectors. nr_std_outlier: int, optional Maximum acceptable deviation from the mean in terms of number of standard deviations. Any sparse vector with a deviation larger than this threshold is flagged as outlier and excluded from the interpolation. See the documentation of :py:func:`pysteps.utils.cleansing.detect_outliers`. k_outlier: int or None, optional The number of nearest neighbors used to localize the outlier detection. If set to None, it employs all the data points (global detection). See the documentation of :py:func:`pysteps.utils.cleansing.detect_outliers`. size_opening: int, optional The size of the structuring element kernel in pixels. This is used to perform a binary morphological opening on the input fields in order to filter isolated echoes due to clutter. If set to zero, the filtering is not performed. See the documentation of :py:func:`pysteps.utils.images.morph_opening`. decl_scale: int, optional The scale declustering parameter in pixels used to reduce the number of redundant sparse vectors before the interpolation. Sparse vectors within this declustering scale are averaged together. If set to less than 2 pixels, the declustering is not performed. See the documentation of :py:func:`pysteps.utils.cleansing.decluster`. verbose: bool, optional If set to True, print some information about the program. Returns ------- out: ndarray_ or tuple If **dense=True** (the default), return the advection field having shape (2, m, n), where out[0, :, :] contains the x-components of the motion vectors and out[1, :, :] contains the y-components. The velocities are in units of pixels / timestep, where timestep is the time difference between the two input images. Return a zero motion field of shape (2, m, n) when no motion is detected. If **dense=False**, it returns a tuple containing the 2-dimensional arrays **xy** and **uv**, where x, y define the vector locations, u, v define the x and y direction components of the vectors. Return two empty arrays when no motion is detected. See also -------- pysteps.motion.lucaskanade.track_features References ---------- Bouguet, J.-Y.: Pyramidal implementation of the affine Lucas Kanade feature tracker description of the algorithm, Intel Corp., 5, 4, 2001 Lucas, B. D. and Kanade, T.: An iterative image registration technique with an application to stereo vision, in: Proceedings of the 1981 DARPA Imaging Understanding Workshop, pp. 121–130, 1981. """ input_images = input_images.copy() if verbose: print("Computing the motion field with the Lucas-Kanade method.") t0 = time.time() nr_fields = input_images.shape[0] domain_size = (input_images.shape[1], input_images.shape[2]) feature_detection_method = feature.get_method(fd_method) interpolation_method = utils.get_method(interp_method) if fd_kwargs is None: fd_kwargs = dict() if fd_method == "tstorm": fd_kwargs.update({"output_feat": True}) if lk_kwargs is None: lk_kwargs = dict() if interp_kwargs is None: interp_kwargs = dict() xy = np.empty(shape=(0, 2)) uv = np.empty(shape=(0, 2)) for n in range(nr_fields - 1): # extract consecutive images prvs_img = input_images[n, :, :].copy() next_img = input_images[n + 1, :, :].copy() # Check if a MaskedArray is used. If not, mask the ndarray if not isinstance(prvs_img, MaskedArray): prvs_img = np.ma.masked_invalid(prvs_img) np.ma.set_fill_value(prvs_img, prvs_img.min()) if not isinstance(next_img, MaskedArray): next_img = np.ma.masked_invalid(next_img) np.ma.set_fill_value(next_img, next_img.min()) # remove small noise with a morphological operator (opening) if size_opening > 0: prvs_img = morph_opening(prvs_img, prvs_img.min(), size_opening) next_img = morph_opening(next_img, next_img.min(), size_opening) # features detection points = feature_detection_method(prvs_img, **fd_kwargs).astype(np.float32) # skip loop if no features to track if points.shape[0] == 0: continue # get sparse u, v vectors with Lucas-Kanade tracking xy_, uv_ = track_features(prvs_img, next_img, points, **lk_kwargs) # skip loop if no vectors if xy_.shape[0] == 0: continue # stack vectors xy = np.append(xy, xy_, axis=0) uv = np.append(uv, uv_, axis=0) # return zero motion field is no sparse vectors are found if xy.shape[0] == 0: if dense: return np.zeros((2, domain_size[0], domain_size[1])) else: return xy, uv # detect and remove outliers outliers = detect_outliers(uv, nr_std_outlier, xy, k_outlier, verbose) xy = xy[~outliers, :] uv = uv[~outliers, :] if verbose: print("--- LK found %i sparse vectors ---" % xy.shape[0]) # return sparse vectors if required if not dense: return xy, uv # decluster sparse motion vectors if decl_scale > 1: xy, uv = decluster(xy, uv, decl_scale, 1, verbose) # return zero motion field if no sparse vectors are left for interpolation if xy.shape[0] == 0: return np.zeros((2, domain_size[0], domain_size[1])) # interpolation xgrid = np.arange(domain_size[1]) ygrid = np.arange(domain_size[0]) uvgrid = interpolation_method(xy, uv, xgrid, ygrid, **interp_kwargs) if verbose: print("--- total time: %.2f seconds ---" % (time.time() - t0)) return uvgrid ================================================ FILE: pysteps/motion/proesmans.py ================================================ # -*- coding: utf-8 -*- """ pysteps.motion.proesmans ======================== Implementation of the anisotropic diffusion method of Proesmans et al. (1994). .. autosummary:: :toctree: ../generated/ proesmans """ import numpy as np from scipy.ndimage import gaussian_filter from pysteps.decorators import check_input_frames from pysteps.motion._proesmans import _compute_advection_field @check_input_frames(2, 2) def proesmans( input_images, lam=50.0, num_iter=100, num_levels=6, filter_std=0.0, verbose=True, full_output=False, ): """ Implementation of the anisotropic diffusion method of Proesmans et al. (1994). Parameters ---------- input_images: array_like Array of shape (2, m, n) containing the first and second input image. lam: float Multiplier of the smoothness term. Smaller values give a smoother motion field. num_iter: float The number of iterations to use. num_levels: int The number of image pyramid levels to use. filter_std: float Standard deviation of an optional Gaussian filter that is applied before computing the optical flow. verbose: bool, optional Verbosity enabled if True (default). full_output: bool, optional If True, the output is a two-element tuple containing the forward-backward advection and consistency fields. The first element is shape (2, 2, m, n), where the index along the first dimension refers to the forward and backward advection fields. The second element is an array of shape (2, m, n), where the index along the first dimension refers to the forward and backward consistency fields. Default: False. Returns ------- out: ndarray If full_output=False, the advection field having shape (2, m, n), where out[0, :, :] contains the x-components of the motion vectors and out[1, :, :] contains the y-components. The velocities are in units of pixels / timestep, where timestep is the time difference between the two input images. References ---------- :cite:`PGPO1994` """ del verbose # Not used im1 = input_images[-2, :, :].copy() im2 = input_images[-1, :, :].copy() im = np.stack([im1, im2]) im_min = np.min(im) im_max = np.max(im) if im_max - im_min > 1e-8: im = (im - im_min) / (im_max - im_min) * 255.0 if filter_std > 0.0: im[0, :, :] = gaussian_filter(im[0, :, :], filter_std) im[1, :, :] = gaussian_filter(im[1, :, :], filter_std) advfield, quality = _compute_advection_field(im, lam, num_iter, num_levels) if not full_output: return advfield[0] else: return advfield, quality ================================================ FILE: pysteps/motion/vet.py ================================================ # -*- coding: utf-8 -*- """ pysteps.motion.vet ================== Variational Echo Tracking (VET) Module This module implements the VET algorithm presented by `Laroche and Zawadzki (1995)`_ and used in the McGill Algorithm for Prediction by Lagrangian Extrapolation (MAPLE) described in `Germann and Zawadzki (2002)`_. .. _`Laroche and Zawadzki (1995)`:\ http://dx.doi.org/10.1175/1520-0426(1995)012<0721:ROHWFS>2.0.CO;2 .. _`Germann and Zawadzki (2002)`:\ http://dx.doi.org/10.1175/1520-0493(2002)130<2859:SDOTPO>2.0.CO;2 The morphing and the cost functions are implemented in Cython and parallelized for performance. .. currentmodule:: pysteps.motion.vet .. autosummary:: :toctree: ../generated/ vet vet_cost_function vet_cost_function_gradient morph round_int ceil_int get_padding """ import numpy from numpy.ma.core import MaskedArray from scipy.ndimage import zoom from scipy.optimize import minimize from pysteps.decorators import check_input_frames from pysteps.motion._vet import _warp, _cost_function def round_int(scalar): """ Round number to nearest integer. Returns and integer value. """ return int(numpy.round(scalar)) def ceil_int(scalar): """ Round number to nearest integer. Returns and integer value. """ return int(numpy.ceil(scalar)) def get_padding(dimension_size, sectors): """ Get the padding at each side of the one dimensions of the image so the new image dimensions are divided evenly in the number of *sectors* specified. Parameters ---------- dimension_size: int Actual dimension size. sectors: int number of sectors over which the the image will be divided. Returns ------- pad_before , pad_after: int, int Padding at each side of the image for the corresponding dimension. """ reminder = dimension_size % sectors if reminder != 0: pad = sectors - reminder pad_before = pad // 2 if pad % 2 == 0: pad_after = pad_before else: pad_after = pad_before + 1 return pad_before, pad_after return 0, 0 def morph(image, displacement, gradient=False): """ Morph image by applying a displacement field (Warping). The new image is created by selecting for each position the values of the input image at the positions given by the x and y displacements. The routine works in a backward sense. The displacement vectors have to refer to their destination. For more information in Morphing functions see Section 3 in `Beezley and Mandel (2008)`_. Beezley, J. D., & Mandel, J. (2008). Morphing ensemble Kalman filters. Tellus A, 60(1), 131-140. .. _`Beezley and Mandel (2008)`: http://dx.doi.org/10.1111/\ j.1600-0870.2007.00275.x The displacement field in x and y directions and the image must have the same dimensions. The morphing is executed in parallel over x axis. The value of displaced pixels that fall outside the limits takes the value of the nearest edge. Those pixels are indicated by values greater than 1 in the output mask. Parameters ---------- image: ndarray (ndim = 2) Image to morph displacement: ndarray (ndim = 3) Displacement field to be applied (Warping). The first dimension corresponds to the coordinate to displace. The dimensions are: displacement [ i/x (0) or j/y (1) , i index of pixel, j index of pixel ] gradient: bool, optional If True, the gradient of the morphing function is returned. Returns ------- image: ndarray (float64 ,ndim = 2) Morphed image. mask: ndarray (int8 ,ndim = 2) Invalid values mask. Points outside the boundaries are masked. Values greater than 1, indicate masked values. gradient_values: ndarray (float64 ,ndim = 3), optional If gradient keyword is True, the gradient of the function is also returned. """ if not isinstance(image, MaskedArray): _mask = numpy.zeros_like(image, dtype="int8") else: _mask = numpy.asarray(numpy.ma.getmaskarray(image), dtype="int8", order="C") _image = numpy.asarray(image, dtype="float64", order="C") _displacement = numpy.asarray(displacement, dtype="float64", order="C") return _warp(_image, _mask, _displacement, gradient=gradient) def vet_cost_function_gradient(*args, **kwargs): """ Compute the vet cost function gradient. See :py:func:`vet_cost_function` for more information. """ kwargs["gradient"] = True return vet_cost_function(*args, **kwargs) def vet_cost_function( sector_displacement_1d, input_images, blocks_shape, mask, smooth_gain, debug=False, gradient=False, ): """ .. _`scipy minimization`: \ https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.minimize.html Variational Echo Tracking Cost Function. This function is designed to be used with the `scipy minimization`_. The function first argument is the variable to be used in the minimization procedure. The sector displacement must be a flat array compatible with the dimensions of the input image and sectors shape (see parameters section below for more details). .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- sector_displacement_1d: ndarray_ Array of displacements to apply to each sector. The dimensions are: sector_displacement_2d [ x (0) or y (1) displacement, i index of sector, j index of sector ]. The shape of the sector displacements must be compatible with the input image and the block shape. The shape should be (2, mx, my) where mx and my are the numbers of sectors in the x and the y dimension. input_images: ndarray_ Input images, sequence of 2D arrays, or 3D arrays. The first dimension represents the images time dimension. The template_image (first element in first dimensions) denotes the reference image used to obtain the displacement (2D array). The second is the target image. The expected dimensions are (2,nx,ny). Be aware the the 2D images dimensions correspond to (lon,lat) or (x,y). blocks_shape: ndarray_ (ndim=2) Number of sectors in each dimension (x and y). blocks_shape.shape = (mx,my) mask: ndarray_ (ndim=2) Data mask. If is True, the data is marked as not valid and is not used in the computations. smooth_gain: float Smoothness constrain gain debug: bool, optional If True, print debugging information. gradient: bool, optional If True, the gradient of the morphing function is returned. Returns ------- penalty or gradient values. penalty: float Value of the cost function gradient_values: ndarray (float64 ,ndim = 3), optional If gradient keyword is True, the gradient of the function is also returned. """ sector_displacement_2d = sector_displacement_1d.reshape( *((2,) + tuple(blocks_shape)) ) if input_images.shape[0] == 3: three_times = True previous_image = input_images[0] center_image = input_images[1] next_image = input_images[2] else: previous_image = None center_image = input_images[0] next_image = input_images[1] three_times = False if gradient: gradient_values = _cost_function( sector_displacement_2d, center_image, next_image, mask, smooth_gain, gradient=True, ) if three_times: gradient_values += _cost_function( sector_displacement_2d, previous_image, center_image, mask, smooth_gain, gradient=True, ) return gradient_values.ravel() else: residuals, smoothness_penalty = _cost_function( sector_displacement_2d, center_image, next_image, mask, smooth_gain, gradient=False, ) if three_times: _residuals, _smoothness = _cost_function( sector_displacement_2d, previous_image, center_image, mask, smooth_gain, gradient=False, ) residuals += _residuals smoothness_penalty += _smoothness if debug: print("\nresiduals", residuals) print("smoothness_penalty", smoothness_penalty) return residuals + smoothness_penalty @check_input_frames(2, 3) def vet( input_images, sectors=((32, 16, 4, 2), (32, 16, 4, 2)), smooth_gain=1e6, first_guess=None, intermediate_steps=False, verbose=True, indexing="yx", padding=0, options=None, ): """ Variational Echo Tracking Algorithm presented in `Laroche and Zawadzki (1995)`_ and used in the McGill Algorithm for Prediction by Lagrangian Extrapolation (MAPLE) described in `Germann and Zawadzki (2002)`_. .. _`Laroche and Zawadzki (1995)`:\ http://dx.doi.org/10.1175/1520-0426(1995)012<0721:ROHWFS>2.0.CO;2 .. _`Germann and Zawadzki (2002)`:\ http://dx.doi.org/10.1175/1520-0493(2002)130<2859:SDOTPO>2.0.CO;2 This algorithm computes the displacement field between two images ( the input_image with respect to the template image). The displacement is sought by minimizing the sum of the residuals of the squared differences of the images pixels and the contribution of a smoothness constraint. In the case that a MaskedArray is used as input, the residuals term in the cost function is only computed over areas with non-masked values. Otherwise, it is computed over the entire domain. To find the minimum, a scaling guess procedure is applied, from larger to smaller scales. This reduces the chances that the minimization procedure converges to a local minimum. The first scaling guess is defined by the scaling sectors keyword. The smoothness of the returned displacement field is controlled by the smoothness constraint gain (**smooth_gain** keyword). If a first guess is not given, zero displacements are used as the first guess. The cost function is minimized using the `scipy minimization`_ function, with the 'CG' method by default. This method proved to give the best results under many different conditions and is the most similar one to the original VET implementation in `Laroche and Zawadzki (1995)`_. The method CG uses a nonlinear conjugate gradient algorithm by Polak and Ribiere, a variant of the Fletcher-Reeves method described in Nocedal and Wright (2006), pp. 120-122. .. _`scipy minimization`: \ https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.minimize.html .. _MaskedArray: https://docs.scipy.org/doc/numpy/reference/\ maskedarray.baseclass.html#numpy.ma.MaskedArray .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- input_images: ndarray_ or MaskedArray Input images, sequence of 2D arrays, or 3D arrays. The first dimension represents the images time dimension. The template_image (first element in first dimensions) denotes the reference image used to obtain the displacement (2D array). The second is the target image. The expected dimensions are (2,ni,nj). sectors: list or array, optional Number of sectors on each dimension used in the scaling procedure. If dimension is 1, the same sectors will be used both image dimensions (x and y). If **sectors** is a 1D array, the same number of sectors is used in both dimensions. smooth_gain: float, optional Smooth gain factor first_guess: ndarray_, optional The shape of the first guess should have the same shape as the initial sectors shapes used in the scaling procedure. If first_guess is not present zeros are used as first guess. E.g.: If the first sector shape in the scaling procedure is (ni,nj), then the first_guess should have (2, ni, nj ) shape. intermediate_steps: bool, optional If True, also return a list with the first guesses obtained during the scaling procedure. False, by default. verbose: bool, optional Verbosity enabled if True (default). indexing: str, optional Input indexing order.'ij' and 'xy' indicates that the dimensions of the input are (time, longitude, latitude), while 'yx' indicates (time, latitude, longitude). The displacement field dimensions are ordered accordingly in a way that the first dimension indicates the displacement along x (0) or y (1). That is, UV displacements are always returned. padding: int Padding width in grid points. A border is added to the input array to reduce the effects of the minimization at the border. options: dict, optional A dictionary of solver options. See `scipy minimization`_ function for more details. Returns ------- displacement_field: ndarray_ Displacement Field (2D array representing the transformation) that warps the template image into the input image. The dimensions are (2,ni,nj), where the first dimension indicates the displacement along x (0) or y (1) in units of pixels / timestep as given by the input_images array. intermediate_steps: list of ndarray_ List with the first guesses obtained during the scaling procedure. References ---------- Laroche, S., and I. Zawadzki, 1995: Retrievals of horizontal winds from single-Doppler clear-air data by methods of cross-correlation and variational analysis. J. Atmos. Oceanic Technol., 12, 721–738. doi: http://dx.doi.org/10.1175/1520-0426(1995)012<0721:ROHWFS>2.0.CO;2 Germann, U. and I. Zawadzki, 2002: Scale-Dependence of the Predictability of Precipitation from Continental Radar Images. Part I: Description of the Methodology. Mon. Wea. Rev., 130, 2859–2873, doi: 10.1175/1520-0493(2002)130<2859:SDOTPO>2.0.CO;2. Nocedal, J, and S J Wright. 2006. Numerical Optimization. Springer New York. """ if verbose: def debug_print(*args, **kwargs): print(*args, **kwargs) else: def debug_print(*args, **kwargs): del args del kwargs if options is None: options = dict() else: options = dict(options) options.setdefault("eps", 0.1) options.setdefault("gtol", 0.1) options.setdefault("maxiter", 100) options.setdefault("disp", False) optimization_method = options.pop("method", "CG") # Set to None to suppress pylint warning. pad_i = None pad_j = None sectors_in_i = None sectors_in_j = None debug_print("Running VET algorithm") valid_indexing = ["yx", "xy", "ij"] if indexing not in valid_indexing: raise ValueError( "Invalid indexing values: {0}\n".format(indexing) + "Supported values: {0}".format(str(valid_indexing)) ) # Convert input_images to a MaskedArray if it is a regular ndarray if not isinstance(input_images, MaskedArray): input_images = numpy.ma.masked_invalid(input_images) mask = numpy.ma.getmaskarray(input_images) if padding > 0: padding_tuple = ((0, 0), (padding, padding), (padding, padding)) input_images_data = numpy.pad( numpy.ma.getdata(input_images), padding_tuple, "constant", constant_values=numpy.nan, ) mask = numpy.pad(mask, padding_tuple, "constant", constant_values=True) input_images = numpy.ma.MaskedArray(data=input_images_data, mask=mask) input_images.data[mask] = 0 # Remove any Nan from the raw data # Create a 2D mask with the right data type for _vet mask = numpy.asarray(numpy.any(mask, axis=0), dtype="int8", order="C") input_images = numpy.asarray(input_images.data, dtype="float64", order="C") # Check that the sectors divide the domain sectors = numpy.asarray(sectors, dtype="int", order="C") if sectors.ndim == 1: new_sectors = numpy.zeros( (2,) + sectors.shape, dtype="int", order="C" ) + sectors.reshape((1, sectors.shape[0])) sectors = new_sectors elif sectors.ndim > 2 or sectors.ndim < 1: raise ValueError( "Incorrect sectors dimensions.\n" + "Only 1D or 2D arrays are supported to define" + "the number of sectors used in" + "the scaling procedure" ) # Sort sectors in descending order sectors[0, :].sort() sectors[1, :].sort() # Prepare first guest first_guess_shape = (2, int(sectors[0, 0]), int(sectors[1, 0])) if first_guess is None: first_guess = numpy.zeros(first_guess_shape, order="C") else: if first_guess.shape != first_guess_shape: raise ValueError( "The shape of the initial guess do not match the number of " + "sectors of the first scaling guess\n" + "first_guess.shape={}\n".format(str(first_guess.shape)) + "Expected shape={}".format(str(first_guess_shape)) ) else: first_guess = numpy.asarray(first_guess, order="C", dtype="float64") scaling_guesses = list() previous_sectors_in_i = sectors[0, 0] previous_sectors_in_j = sectors[1, 0] for n, (sectors_in_i, sectors_in_j) in enumerate(zip(sectors[0, :], sectors[1, :])): # Minimize for each sector size pad_i = get_padding(input_images.shape[1], sectors_in_i) pad_j = get_padding(input_images.shape[2], sectors_in_j) if (pad_i != (0, 0)) or (pad_j != (0, 0)): _input_images = numpy.pad(input_images, ((0, 0), pad_i, pad_j), "edge") _mask = numpy.pad(mask, (pad_i, pad_j), "constant", constant_values=1) _mask = numpy.ascontiguousarray(_mask) if first_guess is None: first_guess = numpy.pad(first_guess, ((0, 0), pad_i, pad_j), "edge") first_guess = numpy.ascontiguousarray(first_guess) else: _input_images = input_images _mask = mask sector_shape = ( _input_images.shape[1] // sectors_in_i, _input_images.shape[2] // sectors_in_j, ) debug_print("original image shape: " + str(input_images.shape)) debug_print("padded image shape: " + str(_input_images.shape)) debug_print("padded template_image image shape: " + str(_input_images.shape)) debug_print( "\nNumber of sectors: {0:d},{1:d}".format(sectors_in_i, sectors_in_j) ) debug_print("Sector Shape:", sector_shape) if n > 0: first_guess = zoom( first_guess, ( 1, sectors_in_i / previous_sectors_in_i, sectors_in_j / previous_sectors_in_j, ), order=1, mode="nearest", ) debug_print("Minimizing") result = minimize( vet_cost_function, first_guess.flatten(), jac=vet_cost_function_gradient, args=(_input_images, (sectors_in_i, sectors_in_j), _mask, smooth_gain), method=optimization_method, options=options, ) first_guess = result.x.reshape(*first_guess.shape) if verbose: vet_cost_function( result.x, _input_images, (sectors_in_i, sectors_in_j), _mask, smooth_gain, debug=True, ) if indexing == "yx": scaling_guesses.append(first_guess[::-1, ...]) else: scaling_guesses.append(first_guess) previous_sectors_in_i = sectors_in_i previous_sectors_in_j = sectors_in_j first_guess = zoom( first_guess, ( 1, _input_images.shape[1] / sectors_in_i, _input_images.shape[2] / sectors_in_j, ), order=1, mode="nearest", ) first_guess = numpy.ascontiguousarray(first_guess) # Remove the extra padding if any ni = _input_images.shape[1] nj = _input_images.shape[2] first_guess = first_guess[:, pad_i[0] : ni - pad_i[1], pad_j[0] : nj - pad_j[1]] if indexing == "yx": first_guess = first_guess[::-1, ...] if padding > 0: first_guess = first_guess[:, padding:-padding, padding:-padding] if intermediate_steps: return first_guess, scaling_guesses return first_guess ================================================ FILE: pysteps/noise/__init__.py ================================================ # -*- coding: utf-8 -*- """ Methods for generating stochastic perturbations of 2d precipitation and velocity fields. """ from .interface import get_method from . import utils, motion, fftgenerators ================================================ FILE: pysteps/noise/fftgenerators.py ================================================ """ pysteps.noise.fftgenerators =========================== Methods for noise generators based on FFT filtering of white noise. The methods in this module implement the following interface for filter initialization depending on their parametric or nonparametric nature:: initialize_param_2d_xxx_filter(field, **kwargs) or:: initialize_nonparam_2d_xxx_filter(field, **kwargs) where field is an array of shape (m, n) or (t, m, n) that defines the target field and optional parameters are supplied as keyword arguments. The output of each initialization method is a dictionary containing the keys field and input_shape. The first is a two-dimensional array of shape (m, int(n/2)+1) that defines the filter. The second one is the shape of the input field for the filter. The methods in this module implement the following interface for the generation of correlated noise:: generate_noise_2d_xxx_filter(field, randstate=np.random, seed=None, **kwargs) where field (m, n) is a filter returned from the corresponding initialization method, and randstate and seed can be used to set the random generator and its seed. Additional keyword arguments can be included as a dictionary. The output of each generator method is a two-dimensional array containing the field of correlated noise cN of shape (m, n). .. autosummary:: :toctree: ../generated/ initialize_param_2d_fft_filter initialize_nonparam_2d_fft_filter initialize_nonparam_2d_nested_filter initialize_nonparam_2d_ssft_filter generate_noise_2d_fft_filter generate_noise_2d_ssft_filter """ import numpy as np from scipy import optimize from .. import utils def initialize_param_2d_fft_filter(field, **kwargs): """ Takes one ore more 2d input fields, fits two spectral slopes, beta1 and beta2, to produce one parametric, global and isotropic fourier filter. Parameters ---------- field: array-like Two- or three-dimensional array containing one or more input fields. All values are required to be finite. If more than one field are passed, the average fourier filter is returned. It assumes that fields are stacked by the first axis: [nr_fields, y, x]. Other Parameters ---------------- win_fun: {'hann', 'tukey' or None} Optional tapering function to be applied to the input field, generated with :py:func:`pysteps.utils.tapering.compute_window_function`. (default None). model: {'power-law'} The name of the parametric model to be used to fit the power spectrum of the input field (default 'power-law'). weighted: bool Whether or not to apply 1/sqrt(power) as weight in the numpy.polyfit() function (default False). rm_rdisc: bool Whether or not to remove the rain/no-rain disconituity (default False). It assumes no-rain pixels are assigned with lowest value. fft_method: str or tuple A string or a (function,kwargs) tuple defining the FFT method to use (see "FFT methods" in :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". Returns ------- out: dict A dictionary containing the keys field, input_shape, model and pars. The first is a two-dimensional array of shape (m, int(n/2)+1) that defines the filter. The second one is the shape of the input field for the filter. The last two are the model and fitted parameters, respectively. This dictionary can be passed to :py:func:`pysteps.noise.fftgenerators.generate_noise_2d_fft_filter` to generate noise fields. """ if len(field.shape) < 2 or len(field.shape) > 3: raise ValueError("the input is not two- or three-dimensional array") if np.any(~np.isfinite(field)): raise ValueError( "field contains non-finite values, this typically happens when the input\n" + "precipitation field provided to pysteps contains (mostly)zero values.\n" + "To prevent this error please call pysteps.utils.check_norain first,\n" + "using the same win_fun as used in this method (tukey by default)\n" + "and then only call this method if that check fails." ) # defaults win_fun = kwargs.get("win_fun", None) model = kwargs.get("model", "power-law") weighted = kwargs.get("weighted", False) rm_rdisc = kwargs.get("rm_rdisc", False) fft = kwargs.get("fft_method", "numpy") if type(fft) == str: fft_shape = field.shape if len(field.shape) == 2 else field.shape[1:] fft = utils.get_method(fft, shape=fft_shape) field = field.copy() # remove rain/no-rain discontinuity if rm_rdisc: field[field > field.min()] -= field[field > field.min()].min() - field.min() # dims if len(field.shape) == 2: field = field[None, :, :] nr_fields = field.shape[0] M, N = field.shape[1:] if win_fun is not None: tapering = utils.tapering.compute_window_function(M, N, win_fun) # make sure non-rainy pixels are set to zero field -= field.min(axis=(1, 2))[:, None, None] else: tapering = np.ones((M, N)) if model.lower() == "power-law": # compute average 2D PSD F = np.zeros((M, N), dtype=complex) for i in range(nr_fields): F += fft.fftshift(fft.fft2(field[i, :, :] * tapering)) F /= nr_fields F = abs(F) ** 2 / F.size # compute radially averaged 1D PSD psd = utils.spectral.rapsd(F) L = max(M, N) # wavenumbers if L % 2 == 1: wn = np.arange(0, int(L / 2) + 1) else: wn = np.arange(0, int(L / 2)) # compute single spectral slope beta as first guess if weighted: p0 = np.polyfit(np.log(wn[1:]), np.log(psd[1:]), 1, w=np.sqrt(psd[1:])) else: p0 = np.polyfit(np.log(wn[1:]), np.log(psd[1:]), 1) beta = p0[0] # create the piecewise function with two spectral slopes beta1 and beta2 # and scaling break x0 def piecewise_linear(x, x0, y0, beta1, beta2): return np.piecewise( x, [x < x0, x >= x0], [ lambda x: beta1 * x + y0 - beta1 * x0, lambda x: beta2 * x + y0 - beta2 * x0, ], ) # fit the two betas and the scaling break p0 = [2.0, 0, beta, beta] # first guess bounds = ( [2.0, 0, -4, -4], [5.0, 20, -1.0, -1.0], ) # TODO: provide better bounds if weighted: p, e = optimize.curve_fit( piecewise_linear, np.log(wn[1:]), np.log(psd[1:]), p0=p0, bounds=bounds, sigma=1 / np.sqrt(psd[1:]), ) else: p, e = optimize.curve_fit( piecewise_linear, np.log(wn[1:]), np.log(psd[1:]), p0=p0, bounds=bounds ) # compute 2d filter YC, XC = utils.arrays.compute_centred_coord_array(M, N) R = np.sqrt(XC * XC + YC * YC) R = fft.fftshift(R) pf = p.copy() pf[2:] = pf[2:] / 2 F = np.exp(piecewise_linear(np.log(R), *pf)) F[~np.isfinite(F)] = 1 f = piecewise_linear else: raise ValueError("unknown parametric model %s" % model) return { "field": F, "input_shape": field.shape[1:], "use_full_fft": True, "model": f, "pars": p, } def initialize_nonparam_2d_fft_filter(field, **kwargs): """ Takes one ore more 2d input fields and produces one non-parametric, global and anisotropic fourier filter. Parameters ---------- field: array-like Two- or three-dimensional array containing one or more input fields. All values are required to be finite. If more than one field are passed, the average fourier filter is returned. It assumes that fields are stacked by the first axis: [nr_fields, y, x]. Other Parameters ---------------- win_fun: {'hann', 'tukey', None} Optional tapering function to be applied to the input field, generated with :py:func:`pysteps.utils.tapering.compute_window_function` (default 'tukey'). donorm: bool Option to normalize the real and imaginary parts. Default: False rm_rdisc: bool Whether or not to remove the rain/no-rain disconituity (default True). It assumes no-rain pixels are assigned with lowest value. fft_method: str or tuple A string or a (function,kwargs) tuple defining the FFT method to use (see "FFT methods" in :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". Returns ------- out: dict A dictionary containing the keys field and input_shape. The first is a two-dimensional array of shape (m, int(n/2)+1) that defines the filter. The second one is the shape of the input field for the filter. It can be passed to :py:func:`pysteps.noise.fftgenerators.generate_noise_2d_fft_filter`. """ if len(field.shape) < 2 or len(field.shape) > 3: raise ValueError("the input is not two- or three-dimensional array") if np.any(~np.isfinite(field)): raise ValueError( "field contains non-finite values, this typically happens when the input\n" + "precipitation field provided to pysteps contains (mostly)zero values.\n" + "To prevent this error please call pysteps.utils.check_norain first,\n" + "using the same win_fun as used in this method (tukey by default)\n" + "and then only call this method if that check fails." ) # defaults win_fun = kwargs.get("win_fun", "tukey") donorm = kwargs.get("donorm", False) rm_rdisc = kwargs.get("rm_rdisc", True) use_full_fft = kwargs.get("use_full_fft", False) fft = kwargs.get("fft_method", "numpy") if type(fft) == str: fft_shape = field.shape if len(field.shape) == 2 else field.shape[1:] fft = utils.get_method(fft, shape=fft_shape) field = field.copy() # remove rain/no-rain discontinuity if rm_rdisc: field[field > field.min()] -= field[field > field.min()].min() - field.min() # dims if len(field.shape) == 2: field = field[None, :, :] nr_fields = field.shape[0] field_shape = field.shape[1:] if use_full_fft: fft_shape = (field.shape[1], field.shape[2]) else: fft_shape = (field.shape[1], int(field.shape[2] / 2) + 1) # make sure non-rainy pixels are set to zero field -= field.min(axis=(1, 2))[:, None, None] if win_fun is not None: tapering = utils.tapering.compute_window_function( field_shape[0], field_shape[1], win_fun ) else: tapering = np.ones(field_shape) F = np.zeros(fft_shape, dtype=complex) for i in range(nr_fields): if use_full_fft: F += fft.fft2(field[i, :, :] * tapering) else: F += fft.rfft2(field[i, :, :] * tapering) F /= nr_fields # normalize the real and imaginary parts if donorm: if np.std(F.imag) > 0: F.imag = (F.imag - np.mean(F.imag)) / np.std(F.imag) if np.std(F.real) > 0: F.real = (F.real - np.mean(F.real)) / np.std(F.real) return { "field": np.abs(F), "input_shape": field.shape[1:], "use_full_fft": use_full_fft, } def generate_noise_2d_fft_filter( F, randstate=None, seed=None, fft_method=None, domain="spatial" ): """ Produces a field of correlated noise using global Fourier filtering. Parameters ---------- F: dict A filter object returned by :py:func:`pysteps.noise.fftgenerators.initialize_param_2d_fft_filter` or :py:func:`pysteps.noise.fftgenerators.initialize_nonparam_2d_fft_filter`. All values in the filter array are required to be finite. randstate: mtrand.RandomState Optional random generator to use. If set to None, use numpy.random. seed: int Value to set a seed for the generator. None will not set the seed. fft_method: str or tuple A string or a (function,kwargs) tuple defining the FFT method to use (see "FFT methods" in :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". domain: {"spatial", "spectral"} The domain for the computations: If "spatial", the noise is generated in the spatial domain and transformed back to spatial domain after the Fourier filtering. If "spectral", the noise field is generated and kept in the spectral domain. Returns ------- N: array-like A two-dimensional field of stationary correlated noise. The noise field is normalized to zero mean and unit variance. """ if domain not in ["spatial", "spectral"]: raise ValueError( "invalid value %s for the 'domain' argument: must be 'spatial' or 'spectral'" % str(domain) ) input_shape = F["input_shape"] use_full_fft = F["use_full_fft"] F = F["field"] if len(F.shape) != 2: raise ValueError("field is not two-dimensional array") if np.any(~np.isfinite(F)): raise ValueError( "field contains non-finite values, this typically happens when the input\n" + "precipitation field provided to pysteps contains (mostly)zero values.\n" + "To prevent this error please call pysteps.utils.check_norain first,\n" + "using the same win_fun as used in this method (tukey by default)\n" + "and then only call this method if that check fails." ) if randstate is None: randstate = np.random # set the seed if seed is not None: randstate.seed(seed) if fft_method is None: fft = utils.get_method("numpy", shape=input_shape) else: if type(fft_method) == str: fft = utils.get_method(fft_method, shape=input_shape) else: fft = fft_method # produce fields of white noise if domain == "spatial": N = randstate.randn(input_shape[0], input_shape[1]) else: if use_full_fft: size = (input_shape[0], input_shape[1]) else: size = (input_shape[0], int(input_shape[1] / 2) + 1) theta = randstate.uniform(low=0.0, high=2.0 * np.pi, size=size) if input_shape[0] % 2 == 0: theta[int(input_shape[0] / 2) + 1 :, 0] = -theta[ 1 : int(input_shape[0] / 2), 0 ][::-1] else: theta[int(input_shape[0] / 2) + 1 :, 0] = -theta[ 1 : int(input_shape[0] / 2) + 1, 0 ][::-1] N = np.cos(theta) + 1.0j * np.sin(theta) # apply the global Fourier filter to impose a correlation structure if domain == "spatial": if use_full_fft: fN = fft.fft2(N) else: fN = fft.rfft2(N) else: fN = N fN *= F if domain == "spatial": if use_full_fft: N = np.array(fft.ifft2(fN).real) else: N = np.array(fft.irfft2(fN)) N = (N - N.mean()) / N.std() else: N = fN N[0, 0] = 0.0 N /= utils.spectral.std(N, input_shape, use_full_fft=use_full_fft) return N def initialize_nonparam_2d_ssft_filter(field, **kwargs): """ Function to compute the local Fourier filters using the Short-Space Fourier filtering approach. Parameters ---------- field: array-like Two- or three-dimensional array containing one or more input fields. All values are required to be finite. If more than one field are passed, the average fourier filter is returned. It assumes that fields are stacked by the first axis: [nr_fields, y, x]. Other Parameters ---------------- win_size: int or two-element tuple of ints Size-length of the window to compute the SSFT (default (128, 128)). win_fun: {'hann', 'tukey', None} Optional tapering function to be applied to the input field, generated with :py:func:`pysteps.utils.tapering.compute_window_function` (default 'tukey'). overlap: float [0,1[ The proportion of overlap to be applied between successive windows (default 0.3). war_thr: float [0,1] Threshold for the minimum fraction of rain needed for computing the FFT (default 0.1). rm_rdisc: bool Whether or not to remove the rain/no-rain disconituity. It assumes no-rain pixels are assigned with lowest value. fft_method: str or tuple A string or a (function,kwargs) tuple defining the FFT method to use (see "FFT methods" in :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". Returns ------- field: array-like Four-dimensional array containing the 2d fourier filters distributed over a 2d spatial grid. It can be passed to :py:func:`pysteps.noise.fftgenerators.generate_noise_2d_ssft_filter`. References ---------- :cite:`NBSG2017` """ if len(field.shape) < 2 or len(field.shape) > 3: raise ValueError("the input is not two- or three-dimensional array") if np.any(np.isnan(field)): raise ValueError("field must not contain NaNs") # defaults win_size = kwargs.get("win_size", (128, 128)) if type(win_size) == int: win_size = (win_size, win_size) win_fun = kwargs.get("win_fun", "tukey") overlap = kwargs.get("overlap", 0.3) war_thr = kwargs.get("war_thr", 0.1) rm_rdisc = kwargs.get("rm_rdisc", True) fft = kwargs.get("fft_method", "numpy") if type(fft) == str: fft_shape = field.shape if len(field.shape) == 2 else field.shape[1:] fft = utils.get_method(fft, shape=fft_shape) field = field.copy() # remove rain/no-rain discontinuity if rm_rdisc: field[field > field.min()] -= field[field > field.min()].min() - field.min() # dims if len(field.shape) == 2: field = field[None, :, :] nr_fields = field.shape[0] dim = field.shape[1:] dim_x = dim[1] dim_y = dim[0] # make sure non-rainy pixels are set to zero field -= field.min(axis=(1, 2))[:, None, None] # SSFT algorithm # prepare indices idxi = np.zeros(2, dtype=int) idxj = np.zeros(2, dtype=int) # number of windows num_windows_y = np.ceil(float(dim_y) / win_size[0]).astype(int) num_windows_x = np.ceil(float(dim_x) / win_size[1]).astype(int) # domain fourier filter F0 = initialize_nonparam_2d_fft_filter( field, win_fun=win_fun, donorm=True, use_full_fft=True, fft_method=fft )["field"] # and allocate it to the final grid F = np.zeros((num_windows_y, num_windows_x, F0.shape[0], F0.shape[1])) F += F0[np.newaxis, np.newaxis, :, :] # loop rows for i in range(F.shape[0]): # loop columns for j in range(F.shape[1]): # compute indices of local window idxi[0] = int(np.max((i * win_size[0] - overlap * win_size[0], 0))) idxi[1] = int( np.min((idxi[0] + win_size[0] + overlap * win_size[0], dim_y)) ) idxj[0] = int(np.max((j * win_size[1] - overlap * win_size[1], 0))) idxj[1] = int( np.min((idxj[0] + win_size[1] + overlap * win_size[1], dim_x)) ) # build localization mask # TODO: the 0.01 rain threshold must be improved mask = _get_mask(dim, idxi, idxj, win_fun) war = float(np.sum((field * mask[None, :, :]) > 0.01)) / ( (idxi[1] - idxi[0]) * (idxj[1] - idxj[0]) * nr_fields ) if war > war_thr: # the new filter F[i, j, :, :] = initialize_nonparam_2d_fft_filter( field * mask[None, :, :], win_fun=None, donorm=True, use_full_fft=True, fft_method=fft, )["field"] return {"field": F, "input_shape": field.shape[1:], "use_full_fft": True} def initialize_nonparam_2d_nested_filter(field, gridres=1.0, **kwargs): """ Function to compute the local Fourier filters using a nested approach. Parameters ---------- field: array-like Two- or three-dimensional array containing one or more input fields. All values are required to be finite. If more than one field are passed, the average fourier filter is returned. It assumes that fields are stacked by the first axis: [nr_fields, y, x]. gridres: float Grid resolution in km. Other Parameters ---------------- max_level: int Localization parameter. 0: global noise, >0: increasing degree of localization (default 3). win_fun: {'hann', 'tukey', None} Optional tapering function to be applied to the input field, generated with :py:func:`pysteps.utils.tapering.compute_window_function` (default 'tukey'). war_thr: float [0;1] Threshold for the minimum fraction of rain needed for computing the FFT (default 0.1). rm_rdisc: bool Whether or not to remove the rain/no-rain discontinuity. It assumes no-rain pixels are assigned with lowest value. fft_method: str or tuple A string or a (function,kwargs) tuple defining the FFT method to use (see "FFT methods" in :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". Returns ------- field: array-like Four-dimensional array containing the 2d fourier filters distributed over a 2d spatial grid. It can be passed to :py:func:`pysteps.noise.fftgenerators.generate_noise_2d_ssft_filter`. """ if len(field.shape) < 2 or len(field.shape) > 3: raise ValueError("the input is not two- or three-dimensional array") if np.any(np.isnan(field)): raise ValueError("field must not contain NaNs") # defaults max_level = kwargs.get("max_level", 3) win_fun = kwargs.get("win_fun", "tukey") war_thr = kwargs.get("war_thr", 0.1) rm_rdisc = kwargs.get("rm_rdisc", True) fft = kwargs.get("fft_method", "numpy") if type(fft) == str: fft_shape = field.shape if len(field.shape) == 2 else field.shape[1:] fft = utils.get_method(fft, shape=fft_shape) field = field.copy() # remove rain/no-rain discontinuity if rm_rdisc: field[field > field.min()] -= field[field > field.min()].min() - field.min() # dims if len(field.shape) == 2: field = field[None, :, :] nr_fields = field.shape[0] dim = field.shape[1:] dim_x = dim[1] dim_y = dim[0] # make sure non-rainy pixels are set to zero field -= field.min(axis=(1, 2))[:, None, None] # Nested algorithm # prepare indices Idxi = np.array([[0, dim_y]]) Idxj = np.array([[0, dim_x]]) Idxipsd = np.array([[0, 2**max_level]]) Idxjpsd = np.array([[0, 2**max_level]]) # generate the FFT sample frequencies freqx = fft.fftfreq(dim_x, gridres) freqy = fft.fftfreq(dim_y, gridres) fx, fy = np.meshgrid(freqx, freqy) freq_grid = np.sqrt(fx**2 + fy**2) # domain fourier filter F0 = initialize_nonparam_2d_fft_filter( field, win_fun=win_fun, donorm=True, use_full_fft=True, fft_method=fft )["field"] # and allocate it to the final grid F = np.zeros((2**max_level, 2**max_level, F0.shape[0], F0.shape[1])) F += F0[np.newaxis, np.newaxis, :, :] # now loop levels and build composite spectra level = 0 while level < max_level: for m in range(len(Idxi)): # the indices of rainfall field Idxinext, Idxjnext = _split_field(Idxi[m, :], Idxj[m, :], 2) # the indices of the field of fourier filters Idxipsdnext, Idxjpsdnext = _split_field(Idxipsd[m, :], Idxjpsd[m, :], 2) for n in range(len(Idxinext)): mask = _get_mask(dim, Idxinext[n, :], Idxjnext[n, :], win_fun) war = np.sum((field * mask[None, :, :]) > 0.01) / float( (Idxinext[n, 1] - Idxinext[n, 0]) * (Idxjnext[n, 1] - Idxjnext[n, 0]) * nr_fields ) if war > war_thr: # the new filter newfilter = initialize_nonparam_2d_fft_filter( field * mask[None, :, :], win_fun=None, donorm=True, use_full_fft=True, fft_method=fft, )["field"] # compute logistic function to define weights as function of frequency # k controls the shape of the weighting function # TODO: optimize parameters k = 0.05 x0 = ( Idxinext[n, 1] - Idxinext[n, 0] ) / 2.0 # TODO: consider y dimension, too merge_weights = 1 / ( 1 + np.exp(-k * (1 / freq_grid - x0 * gridres)) ) newfilter *= 1 - merge_weights # perform the weighted average of previous and new fourier filters F[ Idxipsdnext[n, 0] : Idxipsdnext[n, 1], Idxjpsdnext[n, 0] : Idxjpsdnext[n, 1], :, :, ] *= merge_weights[np.newaxis, np.newaxis, :, :] F[ Idxipsdnext[n, 0] : Idxipsdnext[n, 1], Idxjpsdnext[n, 0] : Idxjpsdnext[n, 1], :, :, ] += newfilter[np.newaxis, np.newaxis, :, :] # update indices level += 1 Idxi, Idxj = _split_field((0, dim[0]), (0, dim[1]), 2**level) Idxipsd, Idxjpsd = _split_field((0, 2**max_level), (0, 2**max_level), 2**level) return {"field": F, "input_shape": field.shape[1:], "use_full_fft": True} def generate_noise_2d_ssft_filter(F, randstate=None, seed=None, **kwargs): """ Function to compute the locally correlated noise using a nested approach. Parameters ---------- F: array-like A filter object returned by :py:func:`pysteps.noise.fftgenerators.initialize_nonparam_2d_ssft_filter` or :py:func:`pysteps.noise.fftgenerators.initialize_nonparam_2d_nested_filter`. The filter is a four-dimensional array containing the 2d fourier filters distributed over a 2d spatial grid. randstate: mtrand.RandomState Optional random generator to use. If set to None, use numpy.random. seed: int Value to set a seed for the generator. None will not set the seed. Other Parameters ---------------- overlap: float Percentage overlap [0-1] between successive windows (default 0.2). win_fun: {'hann', 'tukey', None} Optional tapering function to be applied to the input field, generated with :py:func:`pysteps.utils.tapering.compute_window_function` (default 'tukey'). fft_method: str or tuple A string or a (function,kwargs) tuple defining the FFT method to use (see "FFT methods" in :py:func:`pysteps.utils.interface.get_method`). Defaults to "numpy". Returns ------- N: array-like A two-dimensional numpy array of non-stationary correlated noise. """ input_shape = F["input_shape"] use_full_fft = F["use_full_fft"] F = F["field"] if len(F.shape) != 4: raise ValueError("the input is not four-dimensional array") if np.any(~np.isfinite(F)): raise ValueError( "field contains non-finite values, this typically happens when the input\n" + "precipitation field provided to pysteps contains (mostly) zero value.s\n" + "To prevent this error please call pysteps.utils.check_norain first,\n" + "using the same win_fun as used in this method (tukey by default)\n" + "and then only call this method if that check fails." ) if "domain" in kwargs.keys() and kwargs["domain"] == "spectral": raise NotImplementedError( "SSFT-based noise generator is not implemented in the spectral domain" ) # defaults overlap = kwargs.get("overlap", 0.2) win_fun = kwargs.get("win_fun", "tukey") fft = kwargs.get("fft_method", "numpy") if type(fft) == str: fft = utils.get_method(fft, shape=input_shape) if randstate is None: randstate = np.random # set the seed if seed is not None: randstate.seed(seed) dim_y = F.shape[2] dim_x = F.shape[3] dim = (dim_y, dim_x) # produce fields of white noise N = randstate.randn(dim_y, dim_x) fN = fft.fft2(N) # initialize variables cN = np.zeros(dim) sM = np.zeros(dim) idxi = np.zeros(2, dtype=int) idxj = np.zeros(2, dtype=int) # get the window size win_size = (float(dim_y) / F.shape[0], float(dim_x) / F.shape[1]) # loop the windows and build composite image of correlated noise # loop rows for i in range(F.shape[0]): # loop columns for j in range(F.shape[1]): # apply fourier filtering with local filter lF = F[i, j, :, :] flN = fN * lF flN = np.array(fft.ifft2(flN).real) # compute indices of local window idxi[0] = int(np.max((i * win_size[0] - overlap * win_size[0], 0))) idxi[1] = int( np.min((idxi[0] + win_size[0] + overlap * win_size[0], dim_y)) ) idxj[0] = int(np.max((j * win_size[1] - overlap * win_size[1], 0))) idxj[1] = int( np.min((idxj[0] + win_size[1] + overlap * win_size[1], dim_x)) ) # build mask and add local noise field to the composite image M = _get_mask(dim, idxi, idxj, win_fun) cN += flN * M sM += M # normalize the field cN[sM > 0] /= sM[sM > 0] cN = (cN - cN.mean()) / cN.std() return cN def _split_field(idxi, idxj, Segments): """Split domain field into a number of equally sapced segments.""" sizei = idxi[1] - idxi[0] sizej = idxj[1] - idxj[0] winsizei = int(sizei / Segments) winsizej = int(sizej / Segments) Idxi = np.zeros((Segments**2, 2)) Idxj = np.zeros((Segments**2, 2)) count = -1 for i in range(Segments): for j in range(Segments): count += 1 Idxi[count, 0] = idxi[0] + i * winsizei Idxi[count, 1] = np.min((Idxi[count, 0] + winsizei, idxi[1])) Idxj[count, 0] = idxj[0] + j * winsizej Idxj[count, 1] = np.min((Idxj[count, 0] + winsizej, idxj[1])) Idxi = np.array(Idxi).astype(int) Idxj = np.array(Idxj).astype(int) return Idxi, Idxj def _get_mask(Size, idxi, idxj, win_fun): """Compute a mask of zeros with a window at a given position.""" idxi = np.array(idxi).astype(int) idxj = np.array(idxj).astype(int) win_size = (idxi[1] - idxi[0], idxj[1] - idxj[0]) if win_fun is not None: wind = utils.tapering.compute_window_function(win_size[0], win_size[1], win_fun) wind += 1e-6 # avoid zero values else: wind = np.ones(win_size) mask = np.zeros(Size) mask[idxi.item(0) : idxi.item(1), idxj.item(0) : idxj.item(1)] = wind return mask ================================================ FILE: pysteps/noise/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.noise.interface ======================= Interface for the noise module. .. autosummary:: :toctree: ../generated/ get_method """ from pysteps.noise.fftgenerators import ( initialize_param_2d_fft_filter, generate_noise_2d_fft_filter, initialize_nonparam_2d_fft_filter, initialize_nonparam_2d_ssft_filter, generate_noise_2d_ssft_filter, initialize_nonparam_2d_nested_filter, ) from pysteps.noise.motion import initialize_bps, generate_bps _noise_methods = dict() _noise_methods["parametric"] = ( initialize_param_2d_fft_filter, generate_noise_2d_fft_filter, ) _noise_methods["nonparametric"] = ( initialize_nonparam_2d_fft_filter, generate_noise_2d_fft_filter, ) _noise_methods["ssft"] = ( initialize_nonparam_2d_ssft_filter, generate_noise_2d_ssft_filter, ) _noise_methods["nested"] = ( initialize_nonparam_2d_nested_filter, generate_noise_2d_ssft_filter, ) _noise_methods["bps"] = (initialize_bps, generate_bps) def get_method(name): """ Return two callable functions to initialize and generate 2d perturbations of precipitation or velocity fields.\n Methods for precipitation fields: +-------------------+------------------------------------------------------+ | Name | Description | +===================+======================================================+ | parametric | this global generator uses parametric Fourier | | | filtering (power-law model) | +-------------------+------------------------------------------------------+ | nonparametric | this global generator uses nonparametric Fourier | | | filtering | +-------------------+------------------------------------------------------+ | ssft | this local generator uses the short-space Fourier | | | filtering | +-------------------+------------------------------------------------------+ | nested | this local generator uses a nested Fourier filtering | +-------------------+------------------------------------------------------+ Methods for velocity fields: +-------------------+------------------------------------------------------+ | Name | Description | +===================+======================================================+ | bps | The method described in :cite:`BPS2006`, where | | | time-dependent velocity perturbations are sampled | | | from the exponential distribution | +-------------------+------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available names:" + str(list(_noise_methods.keys())) ) from None try: return _noise_methods[name] except KeyError: raise ValueError( "Unknown method {}\n".format(name) + "The available methods are:" + str(list(_noise_methods.keys())) ) from None ================================================ FILE: pysteps/noise/motion.py ================================================ # -*- coding: utf-8 -*- """ pysteps.noise.motion ==================== Methods for generating perturbations of two-dimensional motion fields. The methods in this module implement the following interface for initialization:: inizialize_xxx(V, pixelsperkm, timestep, optional arguments) where V (2,m,n) is the motion field and pixelsperkm and timestep describe the spatial and temporal resolution of the motion vectors. The output of each initialization method is a dictionary containing the perturbator that can be supplied to generate_xxx. The methods in this module implement the following interface for the generation of a motion perturbation field:: generate_xxx(perturbator, t, randstate=np.random, seed=None) where perturbator is a dictionary returned by an initialize_xxx method. Optional random generator can be specified with the randstate and seed arguments, respectively. The output of each generator method is an array of shape (2,m,n) containing the x- and y-components of the motion vector perturbations, where m and n are determined from the perturbator. .. autosummary:: :toctree: ../generated/ get_default_params_bps_par get_default_params_bps_perp initialize_bps generate_bps """ import numpy as np from scipy import linalg def get_default_params_bps_par(): """Return a tuple containing the default velocity perturbation parameters given in :cite:`BPS2006` for the parallel component.""" return (10.88, 0.23, -7.68) def get_default_params_bps_perp(): """Return a tuple containing the default velocity perturbation parameters given in :cite:`BPS2006` for the perpendicular component.""" return (5.76, 0.31, -2.72) def initialize_bps( V, pixelsperkm, timestep, p_par=None, p_perp=None, randstate=None, seed=None ): """ Initialize the motion field perturbator described in :cite:`BPS2006`. For simplicity, the bias adjustment procedure described there has not been implemented. The perturbator generates a field whose magnitude increases with respect to lead time. Parameters ---------- V: array_like Array of shape (2,m,n) containing the x- and y-components of the m*n motion field to perturb. p_par: tuple Tuple containing the parameters a,b and c for the standard deviation of the perturbations in the direction parallel to the motion vectors. The standard deviations are modeled by the function f_par(t) = a*t**b+c, where t is lead time. The default values are taken from :cite:`BPS2006`. p_perp: tuple Tuple containing the parameters a,b and c for the standard deviation of the perturbations in the direction perpendicular to the motion vectors. The standard deviations are modeled by the function f_par(t) = a*t**b+c, where t is lead time. The default values are taken from :cite:`BPS2006`. pixelsperkm: float Spatial resolution of the motion field (pixels/kilometer). timestep: float Time step for the motion vectors (minutes). randstate: mtrand.RandomState Optional random generator to use. If set to None, use numpy.random. seed: int Optional seed number for the random generator. Returns ------- out: dict A dictionary containing the perturbator that can be supplied to generate_motion_perturbations_bps. See also -------- pysteps.noise.motion.generate_bps """ if len(V.shape) != 3: raise ValueError("V is not a three-dimensional array") if V.shape[0] != 2: raise ValueError("the first dimension of V is not 2") if p_par is None: p_par = get_default_params_bps_par() if p_perp is None: p_perp = get_default_params_bps_perp() if len(p_par) != 3: raise ValueError("the length of p_par is not 3") if len(p_perp) != 3: raise ValueError("the length of p_perp is not 3") perturbator = {} if randstate is None: randstate = np.random if seed is not None: randstate.seed(seed) eps_par = randstate.laplace(scale=1.0 / np.sqrt(2)) eps_perp = randstate.laplace(scale=1.0 / np.sqrt(2)) # scale factor for converting the unit of the advection velocities # into km/h vsf = 60.0 / (timestep * pixelsperkm) N = linalg.norm(V, axis=0) mask = N > 1e-12 V_n = np.empty(V.shape) V_n[:, mask] = V[:, mask] / np.stack([N[mask], N[mask]]) V_n[:, ~mask] = 0.0 perturbator["randstate"] = randstate perturbator["vsf"] = vsf perturbator["p_par"] = p_par perturbator["p_perp"] = p_perp perturbator["eps_par"] = eps_par perturbator["eps_perp"] = eps_perp perturbator["V_par"] = V_n perturbator["V_perp"] = np.stack([-V_n[1, :, :], V_n[0, :, :]]) return perturbator def generate_bps(perturbator, t): """ Generate a motion perturbation field by using the method described in :cite:`BPS2006`. Parameters ---------- perturbator: dict A dictionary returned by initialize_motion_perturbations_bps. t: float Lead time for the perturbation field (minutes). Returns ------- out: ndarray Array of shape (2,m,n) containing the x- and y-components of the motion vector perturbations, where m and n are determined from the perturbator. See also -------- pysteps.noise.motion.initialize_bps """ vsf = perturbator["vsf"] p_par = perturbator["p_par"] p_perp = perturbator["p_perp"] eps_par = perturbator["eps_par"] eps_perp = perturbator["eps_perp"] V_par = perturbator["V_par"] V_perp = perturbator["V_perp"] g_par = p_par[0] * pow(t, p_par[1]) + p_par[2] g_perp = p_perp[0] * pow(t, p_perp[1]) + p_perp[2] return (g_par * eps_par * V_par + g_perp * eps_perp * V_perp) / vsf ================================================ FILE: pysteps/noise/utils.py ================================================ # -*- coding: utf-8 -*- """ pysteps.noise.utils =================== Miscellaneous utility functions related to generation of stochastic perturbations. .. autosummary:: :toctree: ../generated/ compute_noise_stddev_adjs """ import numpy as np try: import dask dask_imported = True except ImportError: dask_imported = False def compute_noise_stddev_adjs( R, R_thr_1, R_thr_2, F, decomp_method, noise_filter, noise_generator, num_iter, conditional=True, num_workers=1, seed=None, ): """Apply a scale-dependent adjustment factor to the noise fields used in STEPS. Simulates the effect of applying a precipitation mask to a Gaussian noise field obtained by the nonparametric filter method. The idea is to decompose the masked noise field into a cascade and compare the standard deviations of each level into those of the observed precipitation intensity field. This gives correction factors for the standard deviations :cite:`BPS2006`. The calculations are done for n realizations of the noise field, and the correction factors are calculated from the average values of the standard deviations. Parameters ---------- R: array_like The input precipitation field, assumed to be in logarithmic units (dBR or reflectivity). R_thr_1: float Intensity threshold for precipitation/no precipitation. R_thr_2: float Intensity values below R_thr_1 are set to this value. F: dict A bandpass filter dictionary returned by a method defined in pysteps.cascade.bandpass_filters. This defines the filter to use and the number of cascade levels. decomp_method: function A function defined in pysteps.cascade.decomposition. Specifies the method to use for decomposing the observed precipitation field and noise field into different spatial scales. num_iter: int The number of noise fields to generate. conditional: bool If set to True, compute the statistics conditionally by excluding areas of no precipitation. num_workers: int The number of workers to use for parallel computation. Applicable if dask is installed. seed: int Optional seed number for the random generators. Returns ------- out: list A list containing the standard deviation adjustment factor for each cascade level. """ MASK = R >= R_thr_1 R = R.copy() R[~np.isfinite(R)] = R_thr_2 R[~MASK] = R_thr_2 if not conditional: mu, sigma = np.mean(R), np.std(R) else: mu, sigma = np.mean(R[MASK]), np.std(R[MASK]) R -= mu MASK_ = MASK if conditional else None decomp_R = decomp_method(R, F, mask=MASK_) if dask_imported and num_workers > 1: res = [] N_stds = [None] * num_iter randstates = [] for k in range(num_iter): rs = np.random.RandomState(seed=seed) randstates.append(rs) seed = rs.randint(0, high=1e9) def worker(k): # generate Gaussian white noise field, filter it using the chosen # method, multiply it with the standard deviation of the observed # field and apply the precipitation mask N = noise_generator(noise_filter, randstate=randstates[k]) N = N / np.std(N) * sigma + mu N[~MASK] = R_thr_2 # subtract the mean and decompose the masked noise field into a # cascade N -= mu decomp_N = decomp_method(N, F, mask=MASK_) N_stds[k] = decomp_N["stds"] if dask_imported and num_workers > 1: for k in range(num_iter): res.append(dask.delayed(worker)(k)) dask.compute(*res, num_workers=num_workers) else: for k in range(num_iter): worker(k) # for each cascade level, compare the standard deviations between the # observed field and the masked noise field, which gives the correction # factors return decomp_R["stds"] / np.mean(np.vstack(N_stds), axis=0) ================================================ FILE: pysteps/nowcasts/__init__.py ================================================ """Implementations of deterministic and ensemble nowcasting methods.""" from pysteps.nowcasts.interface import get_method ================================================ FILE: pysteps/nowcasts/anvil.py ================================================ """ pysteps.nowcasts.anvil ====================== Implementation of the autoregressive nowcasting using VIL (ANVIL) nowcasting method developed in :cite:`PCLH2020`. Compared to S-PROG, the main improvements are using an autoregressive integrated (ARI) model and the option to use vertically integrated liquid (VIL) as the input variable. Using the ARI model avoids biasedness and loss of small-scale features in the forecast field, and no statistical post-processing is needed. In addition, the model allows localization of parameter estimates. It was shown in :cite:`PCLH2020` that due to the above improvements, ANVIL produces more reliable deterministic nowcasts than S-PROG. .. autosummary:: :toctree: ../generated/ forecast """ import time import numpy as np from scipy.ndimage import gaussian_filter from pysteps import cascade, extrapolation from pysteps.nowcasts.utils import nowcast_main_loop from pysteps.timeseries import autoregression from pysteps import utils try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False def forecast( vil, velocity, timesteps, rainrate=None, n_cascade_levels=6, extrap_method="semilagrangian", ar_order=2, ar_window_radius=50, r_vil_window_radius=3, fft_method="numpy", apply_rainrate_mask=True, num_workers=1, extrap_kwargs=None, filter_kwargs=None, measure_time=False, ): """ Generate a nowcast by using the autoregressive nowcasting using VIL (ANVIL) method. ANVIL is built on top of an extrapolation-based nowcast. The key features are: 1) Growth and decay: implemented by using a cascade decomposition and a multiscale autoregressive integrated ARI(p,1) model. Instead of the original time series, the ARI model is applied to the differenced one corresponding to time derivatives. 2) Originally designed for using integrated liquid (VIL) as the input data. In this case, the rain rate (R) is obtained from VIL via an empirical relation. This implementation is more general so that the input can be any two-dimensional precipitation field. 3) The parameters of the ARI model and the R(VIL) relation are allowed to be spatially variable. The estimation is done using a moving window. Parameters ---------- vil: array_like Array of shape (ar_order+2,m,n) containing the input fields ordered by timestamp from oldest to newest. The inputs are expected to contain VIL or rain rate. The time steps between the inputs are assumed to be regular. velocity: array_like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. rainrate: array_like Array of shape (m,n) containing the most recently observed rain rate field. If set to None, no R(VIL) conversion is done and the outputs are in the same units as the inputs. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. ar_order: int, optional The order of the autoregressive model to use. The recommended values are 1 or 2. Using a higher-order model is strongly discouraged because the stationarity of the AR process cannot be guaranteed. ar_window_radius: int, optional The radius of the window to use for determining the parameters of the autoregressive model. Set to None to disable localization. r_vil_window_radius: int, optional The radius of the window to use for determining the R(VIL) relation. Applicable if rainrate is not None. fft_method: str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. apply_rainrate_mask: bool Apply mask to prevent producing precipitation to areas where it was not originally observed. Defaults to True. Disabling this may improve some verification metrics but increases the number of false alarms. Applicable if rainrate is None. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is installed or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. measure_time: bool, optional If True, measure, print and return the computation time. Returns ------- out: ndarray A three-dimensional array of shape (num_timesteps,m,n) containing a time series of forecast precipitation fields. The time series starts from t0+timestep, where timestep is taken from the input VIL/rain rate fields. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). References ---------- :cite:`PCLH2020` """ _check_inputs(vil, rainrate, velocity, timesteps, ar_order) if extrap_kwargs is None: extrap_kwargs = dict() else: extrap_kwargs = extrap_kwargs.copy() if filter_kwargs is None: filter_kwargs = dict() print("Computing ANVIL nowcast") print("-----------------------") print("") print("Inputs") print("------") print(f"input dimensions: {vil.shape[1]}x{vil.shape[2]}") print("") print("Methods") print("-------") print(f"extrapolation: {extrap_method}") print(f"FFT: {fft_method}") print("") print("Parameters") print("----------") if isinstance(timesteps, int): print(f"number of time steps: {timesteps}") else: print(f"time steps: {timesteps}") print(f"parallel threads: {num_workers}") print(f"number of cascade levels: {n_cascade_levels}") print(f"order of the ARI(p,1) model: {ar_order}") if type(ar_window_radius) == int: print(f"ARI(p,1) window radius: {ar_window_radius}") else: print("ARI(p,1) window radius: none") print(f"R(VIL) window radius: {r_vil_window_radius}") if measure_time: starttime_init = time.time() m, n = vil.shape[1:] vil = vil.copy() if rainrate is None and apply_rainrate_mask: rainrate_mask = vil[-1, :] < 0.1 else: rainrate_mask = None if rainrate is not None: # determine the coefficients fields of the relation R=a*VIL+b by # localized linear regression r_vil_a, r_vil_b = _r_vil_regression(vil[-1, :], rainrate, r_vil_window_radius) else: r_vil_a, r_vil_b = None, None # transform the input fields to Lagrangian coordinates by extrapolation extrapolator = extrapolation.get_method(extrap_method) extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(vil)) else False ) res = list() def worker(vil, i): return ( i, extrapolator( vil[i, :], velocity, vil.shape[0] - 1 - i, **extrap_kwargs, )[-1], ) for i in range(vil.shape[0] - 1): if not DASK_IMPORTED or num_workers == 1: vil[i, :, :] = worker(vil, i)[1] else: res.append(dask.delayed(worker)(vil, i)) if DASK_IMPORTED and num_workers > 1: num_workers_ = len(res) if num_workers > len(res) else num_workers vil_e = dask.compute(*res, num_workers=num_workers_) for i in range(len(vil_e)): vil[vil_e[i][0], :] = vil_e[i][1] # compute the final mask as the intersection of the masks of the advected # fields mask = np.isfinite(vil[0, :]) for i in range(1, vil.shape[0]): mask = np.logical_and(mask, np.isfinite(vil[i, :])) if rainrate is None and apply_rainrate_mask: rainrate_mask = np.logical_and(rainrate_mask, mask) # apply cascade decomposition to the advected input fields bp_filter_method = cascade.get_method("gaussian") bp_filter = bp_filter_method((m, n), n_cascade_levels, **filter_kwargs) fft = utils.get_method(fft_method, shape=vil.shape[1:], n_threads=num_workers) decomp_method, recomp_method = cascade.get_method("fft") vil_dec = np.empty((n_cascade_levels, vil.shape[0], m, n)) for i in range(vil.shape[0]): vil_ = vil[i, :].copy() vil_[~np.isfinite(vil_)] = 0.0 vil_dec_i = decomp_method(vil_, bp_filter, fft_method=fft) for j in range(n_cascade_levels): vil_dec[j, i, :] = vil_dec_i["cascade_levels"][j, :] # compute time-lagged correlation coefficients for the cascade levels of # the advected and differenced input fields gamma = np.empty((n_cascade_levels, ar_order, m, n)) for i in range(n_cascade_levels): vil_diff = np.diff(vil_dec[i, :], axis=0) vil_diff[~np.isfinite(vil_diff)] = 0.0 for j in range(ar_order): gamma[i, j, :] = _moving_window_corrcoef( vil_diff[-1, :], vil_diff[-(j + 2), :], ar_window_radius ) if ar_order == 2: # if the order of the ARI model is 2, adjust the correlation coefficients # so that the resulting process is stationary for i in range(n_cascade_levels): gamma[i, 1, :] = autoregression.adjust_lag2_corrcoef2( gamma[i, 0, :], gamma[i, 1, :] ) # estimate the parameters of the ARI models phi = [] for i in range(n_cascade_levels): if ar_order > 2: phi_ = autoregression.estimate_ar_params_yw_localized(gamma[i, :], d=1) elif ar_order == 2: phi_ = _estimate_ar2_params(gamma[i, :]) else: phi_ = _estimate_ar1_params(gamma[i, :]) phi.append(phi_) vil_dec = vil_dec[:, -(ar_order + 1) :, :] if measure_time: init_time = time.time() - starttime_init print("Starting nowcast computation.") rainrate_f = [] extrap_kwargs["return_displacement"] = True state = {"vil_dec": vil_dec} params = { "apply_rainrate_mask": apply_rainrate_mask, "mask": mask, "n_cascade_levels": n_cascade_levels, "phi": phi, "rainrate": rainrate, "rainrate_mask": rainrate_mask, "recomp_method": recomp_method, "r_vil_a": r_vil_a, "r_vil_b": r_vil_b, } rainrate_f = nowcast_main_loop( vil[-1, :], velocity, state, timesteps, extrap_method, _update, extrap_kwargs=extrap_kwargs, params=params, measure_time=measure_time, ) if measure_time: rainrate_f, mainloop_time = rainrate_f if measure_time: return np.stack(rainrate_f), init_time, mainloop_time else: return np.stack(rainrate_f) def _check_inputs(vil, rainrate, velocity, timesteps, ar_order): if vil.ndim != 3: raise ValueError( "vil.shape = %s, but a three-dimensional array expected" % str(vil.shape) ) if rainrate is not None: if rainrate.ndim != 2: raise ValueError( "rainrate.shape = %s, but a two-dimensional array expected" % str(rainrate.shape) ) if vil.shape[0] != ar_order + 2: raise ValueError( "vil.shape[0] = %d, but vil.shape[0] = ar_order + 2 = %d required" % (vil.shape[0], ar_order + 2) ) if velocity.ndim != 3: raise ValueError( "velocity.shape = %s, but a three-dimensional array expected" % str(velocity.shape) ) if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: raise ValueError("timesteps is not in ascending order") # optimized version of timeseries.autoregression.estimate_ar_params_yw_localized # for an ARI(1,1) model def _estimate_ar1_params(gamma): phi = [] phi.append(1 + gamma[0, :]) phi.append(-gamma[0, :]) phi.append(np.zeros(gamma[0, :].shape)) return phi # optimized version of timeseries.autoregression.estimate_ar_params_yw_localized # for an ARI(2,1) model def _estimate_ar2_params(gamma): phi_diff = [] phi_diff.append(gamma[0, :] * (1 - gamma[1, :]) / (1 - gamma[0, :] * gamma[0, :])) phi_diff.append( (gamma[1, :] - gamma[0, :] * gamma[0, :]) / (1 - gamma[0, :] * gamma[0, :]) ) phi = [] phi.append(1 + phi_diff[0]) phi.append(-phi_diff[0] + phi_diff[1]) phi.append(-phi_diff[1]) phi.append(np.zeros(phi_diff[0].shape)) return phi # Compute correlation coefficients of two 2d fields in a moving window with # a Gaussian weight function. See Section II.G of PCLH2020. Differently to the # standard formula for the Pearson correlation coefficient, the mean value of # the inputs is assumed to be zero. def _moving_window_corrcoef(x, y, window_radius): mask = np.logical_and(np.isfinite(x), np.isfinite(y)) x = x.copy() x[~mask] = 0.0 y = y.copy() y[~mask] = 0.0 mask = mask.astype(float) if window_radius is not None: n = gaussian_filter(mask, window_radius, mode="constant") ssx = gaussian_filter(x**2, window_radius, mode="constant") ssy = gaussian_filter(y**2, window_radius, mode="constant") sxy = gaussian_filter(x * y, window_radius, mode="constant") else: n = np.mean(mask) ssx = np.mean(x**2) ssy = np.mean(y**2) sxy = np.mean(x * y) stdx = np.sqrt(ssx / n) stdy = np.sqrt(ssy / n) cov = sxy / n mask = np.logical_and(stdx > 1e-8, stdy > 1e-8) mask = np.logical_and(mask, stdx * stdy > 1e-8) mask = np.logical_and(mask, n > 1e-3) corr = np.empty(x.shape) corr[mask] = cov[mask] / (stdx[mask] * stdy[mask]) corr[~mask] = 0.0 return corr # Determine the coefficients of the regression R=a*VIL+b. # See Section II.G of PCLH2020. # The parameters a and b are estimated in a localized fashion for each pixel # in the input grid. This is done using a window specified by window_radius. # Zero and non-finite values are not included. In addition, the regression is # done by using a Gaussian weight function depending on the distance to the # current grid point. def _r_vil_regression(vil, r, window_radius): vil = vil.copy() vil[~np.isfinite(vil)] = 0.0 r = r.copy() r[~np.isfinite(r)] = 0.0 mask_vil = vil > 10.0 mask_r = r > 0.1 mask_obs = np.logical_and(mask_vil, mask_r) vil[~mask_obs] = 0.0 r[~mask_obs] = 0.0 n = gaussian_filter(mask_obs.astype(float), window_radius, mode="constant") sx = gaussian_filter(vil, window_radius, mode="constant") sx2 = gaussian_filter(vil * vil, window_radius, mode="constant") sxy = gaussian_filter(vil * r, window_radius, mode="constant") sy = gaussian_filter(r, window_radius, mode="constant") rhs1 = sxy rhs2 = sy m1 = sx2 m2 = sx m3 = sx m4 = n c = 1.0 / (m1 * m4 - m2 * m3) m_inv_11 = c * m4 m_inv_12 = -c * m2 m_inv_21 = -c * m3 m_inv_22 = c * m1 mask = np.abs(m1 * m4 - m2 * m3) > 1e-8 mask = np.logical_and(mask, n > 0.01) a = np.empty(vil.shape) a[mask] = m_inv_11[mask] * rhs1[mask] + m_inv_12[mask] * rhs2[mask] a[~mask] = 0.0 a[~mask_vil] = 0.0 b = np.empty(vil.shape) b[mask] = m_inv_21[mask] * rhs1[mask] + m_inv_22[mask] * rhs2[mask] b[~mask] = 0.0 b[~mask_vil] = 0.0 return a, b def _update(state, params): # iterate the ARI models for each cascade level for i in range(params["n_cascade_levels"]): state["vil_dec"][i, :] = autoregression.iterate_ar_model( state["vil_dec"][i, :], params["phi"][i] ) # recompose the cascade to obtain the forecast field vil_dec_dict = {} vil_dec_dict["cascade_levels"] = state["vil_dec"][:, -1, :] vil_dec_dict["domain"] = "spatial" vil_dec_dict["normalized"] = False vil_f = params["recomp_method"](vil_dec_dict) vil_f[~params["mask"]] = np.nan if params["rainrate"] is not None: # convert VIL to rain rate rainrate_f_new = params["r_vil_a"] * vil_f + params["r_vil_b"] else: rainrate_f_new = vil_f if params["apply_rainrate_mask"]: rainrate_f_new[params["rainrate_mask"]] = 0.0 rainrate_f_new[rainrate_f_new < 0.0] = 0.0 return rainrate_f_new, state ================================================ FILE: pysteps/nowcasts/extrapolation.py ================================================ """ pysteps.nowcasts.extrapolation ============================== Implementation of extrapolation-based nowcasting methods. .. autosummary:: :toctree: ../generated/ forecast """ import time import numpy as np from pysteps import extrapolation def forecast( precip, velocity, timesteps, extrap_method="semilagrangian", extrap_kwargs=None, measure_time=False, ): """ Generate a nowcast by applying a simple advection-based extrapolation to the given precipitation field. .. _ndarray: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- precip: array-like Two-dimensional array of shape (m,n) containing the input precipitation field. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. extrap_kwargs: dict, optional Optional dictionary that is expanded into keyword arguments for the extrapolation method. measure_time: bool, optional If True, measure, print, and return the computation time. Returns ------- out: ndarray_ Three-dimensional array of shape (num_timesteps, m, n) containing a time series of nowcast precipitation fields. The time series starts from t0 + timestep, where timestep is taken from the advection field velocity. If *measure_time* is True, the return value is a two-element tuple containing this array and the computation time (seconds). See also -------- pysteps.extrapolation.interface """ _check_inputs(precip, velocity, timesteps) if extrap_kwargs is None: extrap_kwargs = dict() else: extrap_kwargs = extrap_kwargs.copy() extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(precip)) else False ) if measure_time: print( "Computing extrapolation nowcast from a " f"{precip.shape[0]:d}x{precip.shape[1]:d} input grid... ", end="", ) if measure_time: start_time = time.time() extrapolation_method = extrapolation.get_method(extrap_method) precip_forecast = extrapolation_method(precip, velocity, timesteps, **extrap_kwargs) if measure_time: computation_time = time.time() - start_time print(f"{computation_time:.2f} seconds.") if measure_time: return precip_forecast, computation_time else: return precip_forecast def _check_inputs(precip, velocity, timesteps): if precip.ndim != 2: raise ValueError("The input precipitation must be a " "two-dimensional array") if velocity.ndim != 3: raise ValueError("Input velocity must be a three-dimensional array") if precip.shape != velocity.shape[1:3]: raise ValueError( "Dimension mismatch between " "input precipitation and velocity: " + "shape(precip)=%s, shape(velocity)=%s" % (str(precip.shape), str(velocity.shape)) ) if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: raise ValueError("timesteps is not in ascending order") ================================================ FILE: pysteps/nowcasts/interface.py ================================================ r""" pysteps.nowcasts.interface ========================== Interface for the nowcasts module. It returns a callable function for computing nowcasts. The methods in the nowcasts module implement the following interface: ``forecast(precip, velocity, timesteps, **keywords)`` where precip is a (m,n) array with input precipitation field to be advected and velocity is a (2,m,n) array containing the x- and y-components of the m x n advection field. timesteps can be an integer or a list. An integer specifies the number of time steps to forecast, where the output time step is taken from the inputs. Irregular time steps can be given in a list. The interface accepts optional keyword arguments specific to the given method. The output depends on the type of the method. For deterministic methods, the output is a three-dimensional array of shape (num_timesteps,m,n) containing a time series of nowcast precipitation fields. For stochastic methods that produce an ensemble, the output is a four-dimensional array of shape (num_ensemble_members,num_timesteps,m,n). The time step of the output is taken from the inputs. .. autosummary:: :toctree: ../generated/ get_method """ from pysteps.extrapolation.interface import eulerian_persistence from pysteps.nowcasts import ( anvil, extrapolation, linda, sprog, steps, sseps, ) from pysteps.nowcasts import lagrangian_probability _nowcast_methods = dict() _nowcast_methods["anvil"] = anvil.forecast _nowcast_methods["eulerian"] = eulerian_persistence _nowcast_methods["extrapolation"] = extrapolation.forecast _nowcast_methods["lagrangian"] = extrapolation.forecast _nowcast_methods["lagrangian_probability"] = lagrangian_probability.forecast _nowcast_methods["linda"] = linda.forecast _nowcast_methods["probability"] = lagrangian_probability.forecast _nowcast_methods["sprog"] = sprog.forecast _nowcast_methods["sseps"] = sseps.forecast _nowcast_methods["steps"] = steps.forecast def get_method(name): r""" Return a callable function for computing nowcasts. Description: Return a callable function for computing deterministic or ensemble precipitation nowcasts. Implemented methods: +-----------------+-------------------------------------------------------+ | Name | Description | +=================+=======================================================+ | anvil | the autoregressive nowcasting using VIL (ANVIL) | | | nowcasting method developed in :cite:`PCLH2020` | +-----------------+-------------------------------------------------------+ | eulerian | this approach keeps the last observation frozen | | | (Eulerian persistence) | +-----------------+-------------------------------------------------------+ | lagrangian or | this approach extrapolates the last observation | | extrapolation | using the motion field (Lagrangian persistence) | +-----------------+-------------------------------------------------------+ | linda | the LINDA method developed in :cite:`PCN2021` | +-----------------+-------------------------------------------------------+ | lagrangian\_ | this approach computes local lagrangian probability | | probability | forecasts of threshold exceedences | +-----------------+-------------------------------------------------------+ | sprog | the S-PROG method described in :cite:`Seed2003` | +-----------------+-------------------------------------------------------+ | steps | the STEPS stochastic nowcasting method described in | | | :cite:`Seed2003`, :cite:`BPS2006` and :cite:`SPN2013` | | | | +-----------------+-------------------------------------------------------+ | sseps | short-space ensemble prediction system (SSEPS). | | | Essentially, this is a localization of STEPS | +-----------------+-------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available names:" + str(list(_nowcast_methods.keys())) ) from None try: return _nowcast_methods[name] except KeyError: raise ValueError( "Unknown nowcasting method {}\n".format(name) + "The available methods are:" + str(list(_nowcast_methods.keys())) ) from None ================================================ FILE: pysteps/nowcasts/lagrangian_probability.py ================================================ """ pysteps.nowcasts.lagrangian_probability ======================================= Implementation of the local Lagrangian probability nowcasting technique described in :cite:`GZ2004`. .. autosummary:: :toctree: ../generated/ forecast """ import numpy as np from scipy.signal import convolve from pysteps.nowcasts import extrapolation def forecast( precip, velocity, timesteps, threshold, extrap_method="semilagrangian", extrap_kwargs=None, slope=5, ): """ Generate a probability nowcast by a local lagrangian approach. The ouput is the probability of exceeding a given intensity threshold, i.e. P(precip>=threshold). Parameters ---------- precip: array_like Two-dimensional array of shape (m,n) containing the input precipitation field. velocity: array_like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. timesteps: int or list of floats Number of time steps to forecast or a sorted list of time steps for which the forecasts are computed (relative to the input time step). The number of time steps has to be a positive integer. The elements of the list are required to be in ascending order. threshold: float Intensity threshold for which the exceedance probabilities are computed. slope: float, optional The slope of the relationship between optimum scale and lead time in pixels / timestep. Germann and Zawadzki (2004) found the optimal slope to be equal to 1 km / minute. Returns ------- out: ndarray Three-dimensional array of shape (num_timesteps, m, n) containing a time series of nowcast exceedence probabilities. The time series starts from t0 + timestep, where timestep is taken from the advection field velocity. References ---------- Germann, U. and I. Zawadzki, 2004: Scale Dependence of the Predictability of Precipitation from Continental Radar Images. Part II: Probability Forecasts. Journal of Applied Meteorology, 43(1), 74-89. """ # Compute deterministic extrapolation forecast if isinstance(timesteps, int) and timesteps > 0: timesteps = np.arange(1, timesteps + 1) elif not isinstance(timesteps, list): raise ValueError(f"invalid value for argument 'timesteps': {timesteps}") precip_forecast = extrapolation.forecast( precip, velocity, timesteps, extrap_method, extrap_kwargs, ) # Ignore missing values nanmask = np.isnan(precip_forecast) precip_forecast[nanmask] = threshold - 1 valid_pixels = (~nanmask).astype(float) # Compute exceedance probabilities using a neighborhood approach precip_forecast = (precip_forecast >= threshold).astype(float) for i, timestep in enumerate(timesteps): scale = int(timestep * slope) if scale == 0: continue kernel = _get_kernel(scale) kernel_sum = convolve( valid_pixels[i, ...], kernel, mode="same", ) precip_forecast[i, ...] = convolve( precip_forecast[i, ...], kernel, mode="same", ) precip_forecast[i, ...] /= kernel_sum precip_forecast = np.clip(precip_forecast, 0, 1) precip_forecast[nanmask] = np.nan return precip_forecast def _get_kernel(size): """ Generate a circular kernel. Parameters ---------- size : int Size of the circular kernel (its diameter). For size < 5, the kernel is a square instead of a circle. Returns ------- 2-D array with kernel values """ middle = max((int(size / 2), 1)) if size < 5: return np.ones((size, size), dtype=np.float32) else: xx, yy = np.mgrid[:size, :size] circle = (xx - middle) ** 2 + (yy - middle) ** 2 return np.asarray(circle <= (middle**2), dtype=np.float32) ================================================ FILE: pysteps/nowcasts/linda.py ================================================ """ pysteps.nowcasts.linda ====================== This module implements the Lagrangian INtegro-Difference equation model with Autoregression (LINDA). The model combines extrapolation, S-PROG, STEPS, ANVIL, integro-difference equation (IDE) and cell tracking methods. It can produce both deterministic and probabilistic nowcasts. LINDA is specifically designed for nowcasting intense localized rainfall. For this purpose, it is expected to give better forecast skill than S-PROG or STEPS. The model consists of the following components: 1. feature detection to identify rain cells 2. advection-based extrapolation 3. autoregressive integrated ARI(p,1) process for growth and decay of rainfall 4. convolution to account for loss of predictability 5. stochastic perturbations to simulate forecast errors LINDA utilizes a sparse feature-based representation of the input rain rate fields. This allows localization to cells containing intense rainfall. Building on extrapolation nowcast, the temporal evolution of rainfall is modeled in the Lagrangian coordinates. Using the ARI process is adapted from ANVIL :cite:`PCLH2020`, and the convolution is adapted from the integro-difference equation (IDE) models proposed in :cite:`FW2005` and :cite:`XWF2005`. The combination of these two approaches essentially replaces the cascade-based autoregressive process used in S-PROG and STEPS. Using the convolution gives several advantages such as the ability to handle anisotropic structure, domain boundaries and missing data. Based on the marginal distribution and covariance structure of forecast errors, localized perturbations are generated by adapting the short-space Fourier transform (SSFT) methodology developed in :cite:`NBSG2017`. .. autosummary:: :toctree: ../generated/ forecast """ import time import warnings from pysteps.utils.check_norain import check_norain try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False import numpy as np from scipy import optimize as opt from scipy import stats from scipy.integrate import nquad from scipy.interpolate import interp1d from scipy.signal import convolve from pysteps import extrapolation, feature, noise from pysteps.nowcasts.utils import nowcast_main_loop, zero_precipitation_forecast def forecast( precip, velocity, timesteps, feature_method="blob", max_num_features=25, feature_kwargs=None, ari_order=1, kernel_type="anisotropic", localization_window_radius=None, errdist_window_radius=None, acf_window_radius=None, extrap_method="semilagrangian", extrap_kwargs=None, add_perturbations=True, pert_thrs=(0.5, 1.0), n_ens_members=10, vel_pert_method="bps", vel_pert_kwargs=None, kmperpixel=None, timestep=None, seed=None, num_workers=1, use_multiprocessing=False, measure_time=False, callback=None, return_output=True, ): """ Generate a deterministic or ensemble nowcast by using the Lagrangian INtegro-Difference equation model with Autoregression (LINDA) model. Parameters ---------- precip: array_like Array of shape (ari_order + 2, m, n) containing the input rain rate or reflectivity fields (in linear scale) ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. velocity: array_like Array of shape (2, m, n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. timesteps: int or list of floats Number of time steps to forecast or a list of time steps. If a list is given, the values are assumed to be relative to the input time step and in ascending order. feature_method: {'blob', 'domain' 'shitomasi'} Feature detection method: +-------------------+-----------------------------------------------------+ | Method name | Description | +===================+=====================================================+ | blob | Laplacian of Gaussian (LoG) blob detector | | | implemented in scikit-image | +-------------------+-----------------------------------------------------+ | domain | no feature detection, the model is applied over the | | | whole domain without localization | +-------------------+-----------------------------------------------------+ | shitomasi | Shi-Tomasi corner detector implemented in OpenCV | +-------------------+-----------------------------------------------------+ Default: 'blob' max_num_features: int, optional Maximum number of features to use. It is recommended to set this between 20 and 50, which gives a good tradeoff between localization and computation time. Default: 25 feature_kwargs: dict, optional Keyword arguments that are passed as ``**kwargs`` for the feature detector. See :py:mod:`pysteps.feature.blob` and :py:mod:`pysteps.feature.shitomasi`. ari_order: {1, 2}, optional The order of the ARI(p, 1) model. Default: 1 kernel_type: {"anisotropic", "isotropic"}, optional The type of the kernel. Default: 'anisotropic' localization_window_radius: float, optional The standard deviation of the Gaussian localization window. Default: 0.2 * min(m, n) errdist_window_radius: float, optional The standard deviation of the Gaussian window for estimating the forecast error distribution. Default: 0.15 * min(m, n) acf_window_radius: float, optional The standard deviation of the Gaussian window for estimating the forecast error ACF. Default: 0.25 * min(m, n) extrap_method: str, optional The extrapolation method to use. See the documentation of :py:mod:`pysteps.extrapolation.interface`. Default: 'semilagrangian' extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See :py:mod:`pysteps.extrapolation.interface`. add_perturbations: bool Set to False to disable perturbations and generate a single deterministic nowcast. Default: True pert_thrs: float Two-element tuple containing the threshold values for estimating the perturbation parameters (mm/h). Default: (0.5, 1.0) n_ens_members: int, optional The number of ensemble members to generate. Default: 10 vel_pert_method: {'bps', None}, optional Name of the generator to use for perturbing the advection field. See :py:mod:`pysteps.noise.interface`. Default: 'bps' vel_pert_kwargs: dict, optional Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for the initializer of the velocity perturbator. The choice of the optimal parameters depends on the domain and the used optical flow method. For the default values and parameters optimized for different domains, see :py:func:`pysteps.nowcasts.steps.forecast`. kmperpixel: float, optional Spatial resolution of the input data (kilometers/pixel). Required if vel_pert_method is not None. timestep: float, optional Time step of the motion vectors (minutes). Required if vel_pert_method is not None. seed: int, optional Optional seed for the random generators. num_workers: int, optional The number of workers to use for parallel computations. Applicable if dask is installed. Default: 1 use_multiprocessing: bool, optional Set to True to improve the performance of certain parallelized parts of the code. If set to True, the main script calling linda.forecast must be enclosed within the 'if __name__ == "__main__":' block. Default: False measure_time: bool, optional If set to True, measure, print and return the computation time. Default: False callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input precipitation fields, respectively. This can be used, for instance, writing the outputs into files. Default: None return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. Default: True Returns ------- out: numpy.ndarray A four-dimensional array of shape (n_ens_members, len(timesteps), m, n) containing a time series of forecast precipitation fields for each ensemble member. If add_perturbations is False, the first dimension is dropped. The time series starts from t0 + timestep, where timestep is taken from the input fields. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). If return_output is set to False, a single None value is returned instead. Notes ----- It is recommended to choose the feature detector parameters so that the number of features is around 20-40. This gives a good tradeoff between localization and computation time. It is highly recommented to set num_workers>1 to reduce computation time. In this case, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. """ _check_inputs(precip, velocity, timesteps, ari_order) if feature_kwargs is None: feature_kwargs = dict() if extrap_kwargs is None: extrap_kwargs = dict() else: extrap_kwargs = extrap_kwargs.copy() if localization_window_radius is None: localization_window_radius = 0.2 * np.min(precip.shape[1:]) if add_perturbations: if errdist_window_radius is None: errdist_window_radius = 0.15 * min(precip.shape[1], precip.shape[2]) if acf_window_radius is None: acf_window_radius = 0.25 * min(precip.shape[1], precip.shape[2]) if vel_pert_method is not None: if kmperpixel is None: raise ValueError("vel_pert_method is set but kmperpixel is None") if timestep is None: raise ValueError("vel_pert_method is set but timestep is None") if vel_pert_kwargs is None: vel_pert_kwargs = dict() print("Computing LINDA nowcast") print("-----------------------") print("") print("Inputs") print("------") print(f"dimensions: {precip.shape[1]}x{precip.shape[2]}") print(f"number of time steps: {precip.shape[0]}") print("") print("Methods") print("-------") nowcast_type = "ensemble" if add_perturbations else "deterministic" print(f"nowcast type: {nowcast_type}") print(f"feature detector: {feature_method}") print(f"extrapolator: {extrap_method}") print(f"kernel type: {kernel_type}") if add_perturbations and vel_pert_method is not None: print(f"velocity perturbator: {vel_pert_method}") print("") print("Parameters") print("----------") if isinstance(timesteps, int): print(f"number of time steps: {timesteps}") else: print(f"time steps: {timesteps}") print(f"ARI model order: {ari_order}") print(f"localization window radius: {localization_window_radius}") if add_perturbations: print(f"error dist. window radius: {errdist_window_radius}") print(f"error ACF window radius: {acf_window_radius}") print(f"ensemble size: {n_ens_members}") print(f"parallel workers: {num_workers}") print(f"seed: {seed}") if vel_pert_method == "bps": vp_par = vel_pert_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) vp_perp = vel_pert_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) print( f"velocity perturbations, parallel: {vp_par[0]:.2f}, {vp_par[1]:.2f}, {vp_par[2]:.2f}" ) print( f"velocity perturbations, perpendicular: {vp_perp[0]:.2f}, {vp_perp[1]:.2f}, {vp_perp[2]:.2f}" ) vel_pert_kwargs = vel_pert_kwargs.copy() vel_pert_kwargs["vp_par"] = vp_par vel_pert_kwargs["vp_perp"] = vp_perp extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(precip)) else False ) starttime_init = time.time() if check_norain(precip, 0.0, 0.0, None): return zero_precipitation_forecast( n_ens_members if nowcast_type == "ensemble" else None, timesteps, precip, callback, return_output, measure_time, starttime_init, ) forecast_gen = _linda_deterministic_init( precip, velocity, feature_method, max_num_features, feature_kwargs, ari_order, kernel_type, localization_window_radius, extrap_method, extrap_kwargs, add_perturbations, num_workers, measure_time, ) if measure_time: forecast_gen, precip_lagr_diff, init_time = forecast_gen else: forecast_gen, precip_lagr_diff = forecast_gen if add_perturbations: pert_gen = _linda_perturbation_init( precip, precip_lagr_diff, velocity, forecast_gen, pert_thrs, localization_window_radius, errdist_window_radius, acf_window_radius, vel_pert_method, vel_pert_kwargs, kmperpixel, timestep, num_workers, use_multiprocessing, measure_time, ) if measure_time: precip_pert_gen, velocity_pert_gen, pert_init_time = pert_gen init_time += pert_init_time else: precip_pert_gen, velocity_pert_gen = pert_gen else: precip_pert_gen = None velocity_pert_gen = None precip_forecast = _linda_forecast( precip, precip_lagr_diff[1:], timesteps, forecast_gen, precip_pert_gen, velocity_pert_gen, n_ens_members, seed, measure_time, True, return_output, callback, ) if return_output: if measure_time: return precip_forecast[0], init_time, precip_forecast[1] else: return precip_forecast else: return None def _check_inputs(precip, velocity, timesteps, ari_order): if ari_order not in [1, 2]: raise ValueError(f"ari_order {ari_order} given, 1 or 2 required") if len(precip.shape) != 3: raise ValueError("precip must be a three-dimensional array") if precip.shape[0] < ari_order + 2: raise ValueError("precip.shape[0] < ari_order+2") if len(velocity.shape) != 3: raise ValueError("velocity must be a three-dimensional array") if precip.shape[1:3] != velocity.shape[1:3]: raise ValueError( f"dimension mismatch between precip and velocity: precip.shape={precip.shape}, velocity.shape={velocity.shape}" ) if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: raise ValueError("timesteps must be in ascending order") def _composite_convolution(field, kernels, weights): """ Compute a localized convolution by applying a set of kernels with the given spatial weights. The weights are assumed to be normalized. """ n = len(kernels) field_c = 0.0 for i in range(n): field_c += weights[i] * _masked_convolution(field, kernels[i]) return field_c def _compute_ellipse_bbox(phi, sigma1, sigma2, cutoff): """Compute the bounding box of an ellipse.""" r1 = cutoff * sigma1 r2 = cutoff * sigma2 phi_r = phi / 180.0 * np.pi if np.abs(phi_r - np.pi / 2) > 1e-6 and np.abs(phi_r - 3 * np.pi / 2) > 1e-6: alpha = np.arctan(-r2 * np.sin(phi_r) / (r1 * np.cos(phi_r))) w = r1 * np.cos(alpha) * np.cos(phi_r) - r2 * np.sin(alpha) * np.sin(phi_r) alpha = np.arctan(r2 * np.cos(phi_r) / (r1 * np.sin(phi_r))) h = r1 * np.cos(alpha) * np.sin(phi_r) + r2 * np.sin(alpha) * np.cos(phi_r) else: w = sigma2 * cutoff h = sigma1 * cutoff return -abs(h), -abs(w), abs(h), abs(w) def _compute_inverse_acf_mapping(target_dist, target_dist_params, n_intervals=10): """Compute the inverse ACF mapping between two distributions.""" phi = ( lambda x1, x2, rho: 1.0 / (2 * np.pi * np.sqrt(1 - rho**2)) * np.exp(-(x1**2 + x2**2 - 2 * rho * x1 * x2) / (2 * (1 - rho**2))) ) rho_1 = np.linspace(-0.9, 0.9, n_intervals) rho_2 = np.empty(len(rho_1)) mu = target_dist.mean(*target_dist_params) sigma = target_dist.std(*target_dist_params) cdf_trans = lambda x: target_dist.ppf(stats.norm.cdf(x), *target_dist_params) int_range = (-6, 6) for i, rho_1_ in enumerate(rho_1): f = ( lambda x1, x2: (cdf_trans(x1) - mu) * (cdf_trans(x2) - mu) * phi(x1, x2, rho_1_) ) opts = {"epsabs": 1e-8, "epsrel": 1e-8, "limit": 1} rho_2[i] = nquad(f, (int_range, int_range), opts=opts)[0] / (sigma * sigma) return interp1d(rho_2, rho_1, fill_value="extrapolate") def _compute_kernel_anisotropic(params, cutoff=6.0): """Compute anisotropic Gaussian convolution kernel.""" phi, sigma1, sigma2 = params phi_r = phi / 180.0 * np.pi rot_inv = np.array( [[np.cos(phi_r), np.sin(phi_r)], [-np.sin(phi_r), np.cos(phi_r)]] ) bb_y1, bb_x1, bb_y2, bb_x2 = _compute_ellipse_bbox(phi, sigma1, sigma2, cutoff) x = np.arange(int(bb_x1), int(bb_x2) + 1).astype(float) if len(x) % 2 == 0: x = np.arange(int(bb_x1) - 1, int(bb_x2) + 1).astype(float) y = np.arange(int(bb_y1), int(bb_y2) + 1).astype(float) if len(y) % 2 == 0: y = np.arange(int(bb_y1) - 1, int(bb_y2) + 1).astype(float) x_grid, y_grid = np.meshgrid(x, y) xy_grid = np.vstack([x_grid.flatten(), y_grid.flatten()]) xy_grid = np.dot(rot_inv, xy_grid) x2 = xy_grid[0, :] * xy_grid[0, :] y2 = xy_grid[1, :] * xy_grid[1, :] result = np.exp(-(x2 / sigma1**2 + y2 / sigma2**2)) return np.reshape(result / np.sum(result), x_grid.shape) def _compute_kernel_isotropic(sigma, cutoff=6.0): """Compute isotropic Gaussian convolution kernel.""" bb_y1, bb_x1, bb_y2, bb_x2 = ( -sigma * cutoff, -sigma * cutoff, sigma * cutoff, sigma * cutoff, ) x = np.arange(int(bb_x1), int(bb_x2) + 1).astype(float) if len(x) % 2 == 0: x = np.arange(int(bb_x1) - 1, int(bb_x2) + 1).astype(float) y = np.arange(int(bb_y1), int(bb_y2) + 1).astype(float) if len(y) % 2 == 0: y = np.arange(int(bb_y1) - 1, int(bb_y2) + 1).astype(float) x_grid, y_grid = np.meshgrid(x / sigma, y / sigma) r2 = x_grid * x_grid + y_grid * y_grid result = np.exp(-0.5 * r2) return result / np.sum(result) def _compute_parametric_acf(params, m, n): """Compute parametric ACF.""" c, phi, sigma1, sigma2 = params phi_r = phi / 180.0 * np.pi rot_inv = np.array( [[np.cos(phi_r), np.sin(phi_r)], [-np.sin(phi_r), np.cos(phi_r)]] ) if n % 2 == 0: n_max = int(n / 2) else: n_max = int(n / 2) + 1 x = np.fft.ifftshift(np.arange(-int(n / 2), n_max)) if m % 2 == 0: m_max = int(m / 2) else: m_max = int(m / 2) + 1 y = np.fft.ifftshift(np.arange(-int(m / 2), m_max)) grid_x, grid_y = np.meshgrid(x, y) grid_xy = np.vstack([grid_x.flatten(), grid_y.flatten()]) grid_xy = np.dot(rot_inv, grid_xy) grid_xy[0, :] = grid_xy[0, :] / sigma1 grid_xy[1, :] = grid_xy[1, :] / sigma2 r2 = np.reshape( grid_xy[0, :] * grid_xy[0, :] + grid_xy[1, :] * grid_xy[1, :], grid_x.shape ) result = np.exp(-np.sqrt(r2)) return c * result def _compute_sample_acf(field): """Compute sample ACF from FFT.""" # TODO: let user choose the FFT method field_fft = np.fft.rfft2((field - np.mean(field)) / np.std(field)) fft_abs = np.abs(field_fft * np.conj(field_fft)) return np.fft.irfft2(fft_abs, s=field.shape) / (field.shape[0] * field.shape[1]) def _compute_window_weights(coords, grid_height, grid_width, window_radius): """Compute interpolation weights.""" coords = coords.astype(float).copy() num_features = coords.shape[0] coords[:, 0] /= grid_height coords[:, 1] /= grid_width window_radius_1 = window_radius / grid_height window_radius_2 = window_radius / grid_width grid_x = (np.arange(grid_width) + 0.5) / grid_width grid_y = (np.arange(grid_height) + 0.5) / grid_height grid_x, grid_y = np.meshgrid(grid_x, grid_y) w = np.empty((num_features, grid_x.shape[0], grid_x.shape[1])) if coords.shape[0] > 1: for i, c in enumerate(coords): dy = c[0] - grid_y dx = c[1] - grid_x w[i, :] = np.exp( -dy * dy / (2 * window_radius_1**2) - dx * dx / (2 * window_radius_2**2) ) else: w[0, :] = np.ones((grid_height, grid_width)) return w def _estimate_ar1_params( field_src, field_dst, estim_weights, interp_weights, num_workers=1 ): """Constrained optimization of AR(1) parameters.""" def objf(p, *args): i = args[0] field_ar = p * field_src return np.nansum(estim_weights[i] * (field_dst - field_ar) ** 2.0) bounds = (-0.98, 0.98) def worker(i): return opt.minimize_scalar(objf, method="bounded", bounds=bounds, args=(i,)).x if DASK_IMPORTED and num_workers > 1: res = [] for i in range(len(estim_weights)): res.append(dask.delayed(worker)(i)) psi = dask.compute(*res, num_workers=num_workers, scheduler="threads") else: psi = [] for i in range(len(estim_weights)): psi.append(worker(i)) return [np.sum([psi_ * interp_weights[i] for i, psi_ in enumerate(psi)], axis=0)] def _estimate_ar2_params( field_src, field_dst, estim_weights, interp_weights, num_workers=1 ): """Constrained optimization of AR(2) parameters.""" def objf(p, *args): i = args[0] field_ar = p[0] * field_src[1] + p[1] * field_src[0] return np.nansum(estim_weights[i] * (field_dst - field_ar) ** 2.0) bounds = [(-1.98, 1.98), (-0.98, 0.98)] constraints = [ opt.LinearConstraint( np.array([(1, 1), (-1, 1)]), (-np.inf, -np.inf), (0.98, 0.98), keep_feasible=True, ) ] def worker(i): return opt.minimize( objf, (0.8, 0.0), method="trust-constr", bounds=bounds, constraints=constraints, args=(i,), ).x if DASK_IMPORTED and num_workers > 1: res = [] for i in range(len(estim_weights)): res.append(dask.delayed(worker)(i)) psi = dask.compute(*res, num_workers=num_workers, scheduler="threads") else: psi = [] for i in range(len(estim_weights)): psi.append(worker(i)) psi_out = [] for i in range(2): psi_out.append( np.sum([psi[j][i] * interp_weights[j] for j in range(len(psi))], axis=0) ) return psi_out def _estimate_convol_params( field_src, field_dst, weights, mask, kernel_type="anisotropic", kernel_params=None, num_workers=1, ): """Estimation of convolution kernel.""" if kernel_params is None: kernel_params = {} masks = [] for weight in weights: masks.append(np.logical_and(mask, weight > 1e-3)) def objf_aniso(p, *args): i = args[0] p = _get_anisotropic_kernel_params(p) kernel = _compute_kernel_anisotropic(p, **kernel_params) field_src_c = _masked_convolution(field_src, kernel) fval = np.sqrt(weights[i][masks[i]]) * ( field_dst[masks[i]] - field_src_c[masks[i]] ) return fval def objf_iso(p, *args): i = args[0] kernel = _compute_kernel_isotropic(p, **kernel_params) field_src_c = _masked_convolution(field_src, kernel) fval = np.sum( weights[i][masks[i]] * (field_dst[masks[i]] - field_src_c[masks[i]]) ** 2 ) return fval def worker(i): if kernel_type == "anisotropic": bounds = ((-np.inf, 0.1, 0.2), (np.inf, 10.0, 5.0)) p_opt = opt.least_squares( objf_aniso, np.array((0.0, 1.0, 1.0)), bounds=bounds, method="trf", ftol=1e-6, xtol=1e-4, gtol=1e-6, args=(i,), ) p_opt = _get_anisotropic_kernel_params(p_opt.x) return _compute_kernel_anisotropic(p_opt, **kernel_params) else: p_opt = opt.minimize_scalar( objf_iso, bounds=[0.01, 10.0], method="bounded", args=(i,) ) p_opt = p_opt.x return _compute_kernel_isotropic(p_opt, **kernel_params) if DASK_IMPORTED and num_workers > 1: res = [] for i in range(len(weights)): res.append(dask.delayed(worker)(i)) kernels = dask.compute(*res, num_workers=num_workers, scheduler="threads") else: kernels = [] for i in range(len(weights)): kernels.append(worker(i)) return kernels def _estimate_perturbation_params( forecast_err, forecast_gen, errdist_window_radius, acf_window_radius, interp_window_radius, measure_time, num_workers, use_multiprocessing, ): """ Estimate perturbation generator parameters from forecast errors.""" pert_gen = {} pert_gen["m"] = forecast_err.shape[0] pert_gen["n"] = forecast_err.shape[1] feature_coords = forecast_gen["feature_coords"] print("Estimating perturbation parameters... ", end="", flush=True) if measure_time: starttime = time.time() mask_finite = np.isfinite(forecast_err) forecast_err = forecast_err.copy() forecast_err[~mask_finite] = 1.0 weights_dist = _compute_window_weights( feature_coords, forecast_err.shape[0], forecast_err.shape[1], errdist_window_radius, ) acf_winfunc = _window_tukey if feature_coords.shape[0] > 1 else _window_uniform def worker(i): weights_acf = acf_winfunc( forecast_err.shape[0], forecast_err.shape[1], feature_coords[i, 0], feature_coords[i, 1], acf_window_radius, acf_window_radius, ) mask = np.logical_and(mask_finite, weights_dist[i] > 0.1) if np.sum(mask) > 10 and np.sum(np.abs(forecast_err[mask] - 1.0) >= 1e-3) > 10: distpar = _fit_dist(forecast_err, stats.lognorm, weights_dist[i], mask) inv_acf_mapping = _compute_inverse_acf_mapping(stats.lognorm, distpar) mask_acf = weights_acf > 1e-4 std = _weighted_std(forecast_err[mask_acf], weights_dist[i][mask_acf]) if np.isfinite(std): acf = inv_acf_mapping( _compute_sample_acf(weights_acf * (forecast_err - 1.0) / std) ) acf = _fit_acf(acf) valid_data = True else: valid_data = False else: valid_data = False if valid_data: return distpar, std, np.sqrt(np.abs(np.fft.rfft2(acf))) else: return ( (1e-10, 1e-10), 1e-10, np.ones((weights_acf.shape[0], int(weights_acf.shape[1] / 2) + 1)) * 1e-10, ) dist_params = [] stds = [] acf_fft_ampl = [] if DASK_IMPORTED and num_workers > 1: res = [] for i in range(feature_coords.shape[0]): res.append(dask.delayed(worker)(i)) scheduler = "threads" if not use_multiprocessing else "multiprocessing" res = dask.compute(*res, num_workers=num_workers, scheduler=scheduler) for r in res: dist_params.append(r[0]) stds.append(r[1]) acf_fft_ampl.append(r[2]) else: for i in range(feature_coords.shape[0]): r = worker(i) dist_params.append(r[0]) stds.append(r[1]) acf_fft_ampl.append(r[2]) pert_gen["dist_param"] = dist_params pert_gen["std"] = stds pert_gen["acf_fft_ampl"] = acf_fft_ampl weights = _compute_window_weights( feature_coords, forecast_err.shape[0], forecast_err.shape[1], interp_window_radius, ) pert_gen["weights"] = weights / np.sum(weights, axis=0) if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") return pert_gen def _fit_acf(acf): """ Fit a parametric ACF to the given sample estimate.""" def objf(p, *args): p = _get_acf_params(p) fitted_acf = _compute_parametric_acf(p, acf.shape[0], acf.shape[1]) return (acf - fitted_acf).flatten() bounds = ((0.01, -np.inf, 0.1, 0.2), (10.0, np.inf, 10.0, 5.0)) p_opt = opt.least_squares( objf, np.array((1.0, 0.0, 1.0, 1.0)), bounds=bounds, method="trf", ftol=1e-6, xtol=1e-4, gtol=1e-6, ) return _compute_parametric_acf(_get_acf_params(p_opt.x), acf.shape[0], acf.shape[1]) def _fit_dist(err, dist, wf, mask): """ Fit a lognormal distribution by maximizing the log-likelihood function with the constraint that the mean value is one.""" func = lambda p: -np.sum(np.log(stats.lognorm.pdf(err[mask], p, -0.5 * p**2))) p_opt = opt.minimize_scalar(func, bounds=(1e-3, 20.0), method="Bounded") return (p_opt.x, -0.5 * p_opt.x**2) # TODO: restrict the perturbation generation inside the radar mask def _generate_perturbations(pert_gen, num_workers, seed): """Generate perturbations based on the estimated forecast error statistics.""" m, n = pert_gen["m"], pert_gen["n"] dist_param = pert_gen["dist_param"] std = pert_gen["std"] acf_fft_ampl = pert_gen["acf_fft_ampl"] weights = pert_gen["weights"] perturb = stats.norm.rvs(size=(m, n), random_state=seed) perturb_fft = np.fft.rfft2(perturb) out = np.zeros((m, n)) def worker(i): if std[i] > 0.0: filtered_noise = np.fft.irfft2(acf_fft_ampl[i] * perturb_fft, s=(m, n)) filtered_noise /= np.std(filtered_noise) filtered_noise = stats.lognorm.ppf( stats.norm.cdf(filtered_noise), *dist_param[i] ) else: filtered_noise = np.ones(weights[i].shape) return weights[i] * filtered_noise if DASK_IMPORTED and num_workers > 1: res = [] for i in range(weights.shape[0]): res.append(dask.delayed(worker)(i)) res = dask.compute(*res, num_workers=num_workers, scheduler="threads") for r in res: out += r else: for i in range(weights.shape[0]): out += worker(i) return out def _get_acf_params(p): """Get ACF parameters from the given parameter vector.""" return p[0], p[1], p[2], p[3] * p[2] def _get_anisotropic_kernel_params(p): """Get anisotropic convolution kernel parameters from the given parameter vector.""" return p[0], p[1], p[2] * p[1] # TODO: use the method implemented in pysteps.timeseries.autoregression def _iterate_ar_model(input_fields, psi): """Iterate autoregressive process.""" input_field_new = 0.0 for i, psi_ in enumerate(psi): input_field_new += psi_ * input_fields[-(i + 1), :] return np.concatenate([input_fields[1:, :], input_field_new[np.newaxis, :]]) def _linda_forecast( precip, precip_lagr_diff, timesteps, forecast_gen, precip_pert_gen, velocity_pert_gen, n_ensemble_members, seed, measure_time, print_info, return_output, callback, ): """Compute LINDA nowcast.""" # compute convolved difference fields precip_lagr_diff = precip_lagr_diff.copy() for i in range(precip_lagr_diff.shape[0]): for _ in range(forecast_gen["ari_order"] - i): precip_lagr_diff[i] = _composite_convolution( precip_lagr_diff[i], forecast_gen["kernels_1"], forecast_gen["interp_weights"], ) # initialize the random generators if precip_pert_gen is not None: rs_precip_pert = [] np.random.seed(seed) for _ in range(n_ensemble_members): rs = np.random.RandomState(seed) rs_precip_pert.append(rs) seed = rs.randint(0, high=1e9) else: rs_precip_pert = None if velocity_pert_gen is not None: velocity_perturbators = [] np.random.seed(seed) for _ in range(n_ensemble_members): vp = velocity_pert_gen["init_func"](seed) velocity_perturbators.append( lambda t, vp=vp: velocity_pert_gen["gen_func"]( vp, t * velocity_pert_gen["timestep"] ) ) seed = np.random.RandomState(seed).randint(0, high=1e9) else: velocity_perturbators = None state = { "precip_forecast": [precip[-1].copy() for _ in range(n_ensemble_members)], "precip_lagr_diff": [ precip_lagr_diff.copy() for _ in range(n_ensemble_members) ], "rs_precip_pert": rs_precip_pert, } params = { "interp_weights": forecast_gen["interp_weights"], "kernels_1": forecast_gen["kernels_1"], "kernels_2": forecast_gen["kernels_2"], "mask_adv": forecast_gen["mask_adv"], "num_ens_members": n_ensemble_members, "num_workers": forecast_gen["num_workers"], "num_ensemble_workers": min(n_ensemble_members, forecast_gen["num_workers"]), "precip_pert_gen": precip_pert_gen, "psi": forecast_gen["psi"], } precip_forecast = nowcast_main_loop( precip[-1], forecast_gen["velocity"], state, timesteps, forecast_gen["extrap_method"], _update, extrap_kwargs=forecast_gen["extrap_kwargs"], velocity_pert_gen=velocity_perturbators, params=params, ensemble=True, num_ensemble_members=n_ensemble_members, callback=callback, return_output=return_output, num_workers=forecast_gen["num_workers"], measure_time=measure_time, ) if measure_time: precip_forecast, mainloop_time = precip_forecast if return_output: if not forecast_gen["add_perturbations"]: precip_forecast = precip_forecast[0] if measure_time: return precip_forecast, mainloop_time else: return precip_forecast else: return None def _linda_deterministic_init( precip, velocity, feature_method, max_num_features, feature_kwargs, ari_order, kernel_type, localization_window_radius, extrap_method, extrap_kwargs, add_perturbations, num_workers, measure_time, ): """Initialize the deterministic LINDA nowcast model.""" forecast_gen = {} forecast_gen["velocity"] = velocity forecast_gen["extrap_method"] = extrap_method forecast_gen["ari_order"] = ari_order forecast_gen["add_perturbations"] = add_perturbations forecast_gen["num_workers"] = num_workers forecast_gen["measure_time"] = measure_time precip = precip[-(ari_order + 2) :] input_length = precip.shape[0] starttime_init = time.time() extrapolator = extrapolation.get_method(extrap_method) extrap_kwargs = extrap_kwargs.copy() extrap_kwargs["allow_nonfinite_values"] = True forecast_gen["extrapolator"] = extrapolator forecast_gen["extrap_kwargs"] = extrap_kwargs # detect features from the most recent input field if feature_method in {"blob", "shitomasi"}: precip_ = precip[-1].copy() precip_[~np.isfinite(precip_)] = 0.0 feature_detector = feature.get_method(feature_method) if measure_time: starttime = time.time() feature_kwargs = feature_kwargs.copy() feature_kwargs["max_num_features"] = max_num_features feature_coords = np.fliplr(feature_detector(precip_, **feature_kwargs)[:, :2]) feature_type = "blobs" if feature_method == "blob" else "corners" print("") print("Detecting features... ", end="", flush=True) if measure_time: print( f"found {feature_coords.shape[0]} {feature_type} in {time.time() - starttime:.2f} seconds." ) else: print(f"found {feature_coords.shape[0]} {feature_type}.") if len(feature_coords) == 0: raise ValueError( "no features found, check input data and feature detector configuration" ) elif feature_method == "domain": feature_coords = np.zeros((1, 2), dtype=int) else: raise NotImplementedError( "feature detector '%s' not implemented" % feature_method ) forecast_gen["feature_coords"] = feature_coords # compute interpolation weights interp_weights = _compute_window_weights( feature_coords, precip.shape[1], precip.shape[2], localization_window_radius, ) interp_weights /= np.sum(interp_weights, axis=0) forecast_gen["interp_weights"] = interp_weights # transform the input fields to the Lagrangian coordinates precip_lagr = np.empty(precip.shape) def worker(i): precip_lagr[i, :] = extrapolator( precip[i, :], velocity, input_length - 1 - i, "min", **extrap_kwargs, )[-1] if DASK_IMPORTED and num_workers > 1: res = [] print("Transforming to Lagrangian coordinates... ", end="", flush=True) if measure_time: starttime = time.time() for i in range(precip.shape[0] - 1): if DASK_IMPORTED and num_workers > 1: res.append(dask.delayed(worker)(i)) else: worker(i) if DASK_IMPORTED and num_workers > 1: dask.compute(*res, num_workers=min(num_workers, len(res)), scheduler="threads") precip_lagr[-1] = precip[-1] if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") # compute advection mask and set nan to pixels, where one or more of the # advected input fields has a nan value mask_adv = np.all(np.isfinite(precip_lagr), axis=0) forecast_gen["mask_adv"] = mask_adv for i in range(precip_lagr.shape[0]): precip_lagr[i, ~mask_adv] = np.nan # compute differenced input fields in the Lagrangian coordinates precip_lagr_diff = np.diff(precip_lagr, axis=0) # estimate parameters of the deterministic model (i.e. the convolution and # the ARI process) print("Estimating the first convolution kernel... ", end="", flush=True) if measure_time: starttime = time.time() # estimate convolution kernel for the differenced component convol_weights = _compute_window_weights( feature_coords, precip.shape[1], precip.shape[2], localization_window_radius, ) kernels_1 = _estimate_convol_params( precip_lagr_diff[-2], precip_lagr_diff[-1], convol_weights, mask_adv, kernel_type=kernel_type, num_workers=num_workers, ) forecast_gen["kernels_1"] = kernels_1 if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") # compute convolved difference fields precip_lagr_diff_c = precip_lagr_diff[:-1].copy() for i in range(precip_lagr_diff_c.shape[0]): for _ in range(ari_order - i): precip_lagr_diff_c[i] = _composite_convolution( precip_lagr_diff_c[i], kernels_1, interp_weights, ) print("Estimating the ARI(p,1) parameters... ", end="", flush=True) if measure_time: starttime = time.time() # estimate ARI(p,1) parameters weights = _compute_window_weights( feature_coords, precip.shape[1], precip.shape[2], localization_window_radius, ) if ari_order == 1: psi = _estimate_ar1_params( precip_lagr_diff_c[-1], precip_lagr_diff[-1], weights, interp_weights, num_workers=num_workers, ) else: psi = _estimate_ar2_params( precip_lagr_diff_c[-2:], precip_lagr_diff[-1], weights, interp_weights, num_workers=num_workers, ) forecast_gen["psi"] = psi if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") # apply the ARI(p,1) model and integrate the differences precip_lagr_diff_c = _iterate_ar_model(precip_lagr_diff_c, psi) precip_forecast = precip_lagr[-2] + precip_lagr_diff_c[-1] precip_forecast[precip_forecast < 0.0] = 0.0 print("Estimating the second convolution kernel... ", end="", flush=True) if measure_time: starttime = time.time() # estimate the second convolution kernels based on the forecast field # computed above kernels_2 = _estimate_convol_params( precip_forecast, precip[-1], convol_weights, mask_adv, kernel_type=kernel_type, num_workers=num_workers, ) forecast_gen["kernels_2"] = kernels_2 if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") if measure_time: return forecast_gen, precip_lagr_diff, time.time() - starttime_init else: return forecast_gen, precip_lagr_diff def _linda_perturbation_init( precip, precip_lagr_diff, velocity, forecast_gen, pert_thrs, localization_window_radius, errdist_window_radius, acf_window_radius, vel_pert_method, vel_pert_kwargs, kmperpixel, timestep, num_workers, use_multiprocessing, measure_time, ): """Initialize the LINDA perturbation generator.""" if measure_time: starttime = time.time() print("Estimating forecast errors... ", end="", flush=True) forecast_gen = forecast_gen.copy() forecast_gen["add_perturbations"] = False forecast_gen["num_ens_members"] = 1 precip_forecast_det = _linda_forecast( precip[:-1], precip_lagr_diff[:-1], 1, forecast_gen, None, None, 1, None, False, False, True, None, ) # compute multiplicative forecast errors err = precip_forecast_det[-1] / precip[-1] # mask small precipitation intensities mask = np.logical_or( np.logical_and( precip_forecast_det[-1] >= pert_thrs[1], precip[-1] >= pert_thrs[0] ), np.logical_and( precip_forecast_det[-1] >= pert_thrs[0], precip[-1] >= pert_thrs[1] ), ) err[~mask] = np.nan if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") pert_gen = _estimate_perturbation_params( err, forecast_gen, errdist_window_radius, acf_window_radius, localization_window_radius, measure_time, num_workers, use_multiprocessing, ) if vel_pert_method == "bps": init_vel_noise, generate_vel_noise = noise.get_method("bps") vp_par = vel_pert_kwargs["vp_par"] vp_perp = vel_pert_kwargs["vp_perp"] kwargs = { "p_par": vp_par, "p_perp": vp_perp, } velocity_pert_gen = { "gen_func": generate_vel_noise, "init_func": lambda seed: init_vel_noise( velocity, 1.0 / kmperpixel, timestep, seed=seed, **kwargs ), "timestep": timestep, } else: velocity_pert_gen = None if measure_time: return pert_gen, velocity_pert_gen, time.time() - starttime else: return pert_gen, velocity_pert_gen def _masked_convolution(field, kernel): """Compute "masked" convolution where non-finite values are ignored.""" mask = np.isfinite(field) field = field.copy() field[~mask] = 0.0 field_c = np.ones(field.shape) * np.nan field_c[mask] = convolve(field, kernel, mode="same")[mask] field_c[mask] /= convolve(mask.astype(float), kernel, mode="same")[mask] return field_c def _update(state, params): def worker(j): state["precip_lagr_diff"][j] = _iterate_ar_model( state["precip_lagr_diff"][j], params["psi"] ) state["precip_forecast"][j] += state["precip_lagr_diff"][j][-1] for i in range(state["precip_lagr_diff"][j].shape[0]): state["precip_lagr_diff"][j][i] = _composite_convolution( state["precip_lagr_diff"][j][i], params["kernels_1"], params["interp_weights"], ) state["precip_forecast"][j] = _composite_convolution( state["precip_forecast"][j], params["kernels_2"], params["interp_weights"] ) out = state["precip_forecast"][j].copy() out[out < 0.0] = 0.0 out[~params["mask_adv"]] = np.nan # apply perturbations if params["precip_pert_gen"] is not None: seed = state["rs_precip_pert"][j].randint(0, high=1e9) perturb = _generate_perturbations( params["precip_pert_gen"], params["num_workers"], seed ) out *= perturb return out out = [] if DASK_IMPORTED and params["num_workers"] > 1 and params["num_ens_members"] > 1: res = [] for j in range(params["num_ens_members"]): res.append(dask.delayed(worker)(j)) out = dask.compute( *res, num_workers=params["num_ensemble_workers"], scheduler="threads" ) else: for j in range(params["num_ens_members"]): out.append(worker(j)) return np.stack(out), state def _weighted_std(f, w): """ Compute standard deviation of forecast errors with spatially varying weights. Values close to zero are omitted. """ mask = np.abs(f - 1.0) > 1e-4 n = np.count_nonzero(mask) if n > 0: c = (w[mask].size - 1.0) / n return np.sqrt(np.sum(w[mask] * (f[mask] - 1.0) ** 2.0) / (c * np.sum(w[mask]))) else: return np.nan def _window_tukey(m, n, ci, cj, ri, rj, alpha=0.5): """Tukey window function centered at the given coordinates.""" j, i = np.meshgrid(np.arange(n), np.arange(m)) di = np.abs(i - ci) dj = np.abs(j - cj) mask1 = np.logical_and(di <= ri, dj <= rj) w1 = np.zeros(di.shape) mask2 = di <= alpha * ri mask12 = np.logical_and(mask1, ~mask2) w1[mask12] = 0.5 * ( 1.0 + np.cos(np.pi * (di[mask12] - alpha * ri) / ((1.0 - alpha) * ri)) ) w1[np.logical_and(mask1, mask2)] = 1.0 w2 = np.zeros(dj.shape) mask2 = dj <= alpha * rj mask12 = np.logical_and(mask1, ~mask2) w2[mask12] = 0.5 * ( 1.0 + np.cos(np.pi * (dj[mask12] - alpha * rj) / ((1.0 - alpha) * rj)) ) w2[np.logical_and(mask1, mask2)] = 1.0 weights = np.zeros((m, n)) weights[mask1] = w1[mask1] * w2[mask1] return weights def _window_uniform(m, n, ci, cj, ri, rj): """Uniform window function with all values set to one.""" return np.ones((m, n)) ================================================ FILE: pysteps/nowcasts/sprog.py ================================================ """ pysteps.nowcasts.sprog ====================== Implementation of the S-PROG method described in :cite:`Seed2003` .. autosummary:: :toctree: ../generated/ forecast """ import time import numpy as np from pysteps import cascade, extrapolation, utils from pysteps.nowcasts import utils as nowcast_utils from pysteps.nowcasts.utils import compute_percentile_mask, nowcast_main_loop from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation from pysteps.utils.check_norain import check_norain try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False def forecast( precip, velocity, timesteps, precip_thr=None, norain_thr=0.0, n_cascade_levels=6, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", ar_order=2, conditional=False, probmatching_method="cdf", num_workers=1, fft_method="numpy", domain="spatial", extrap_kwargs=None, filter_kwargs=None, measure_time=False, ): """ Generate a nowcast by using the Spectral Prognosis (S-PROG) method. Parameters ---------- precip: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. precip_thr: float, required The threshold value for minimum observable precipitation intensity. norain_thr: float Specifies the threshold value for the fraction of rainy (see above) pixels in the radar rainfall field below which we consider there to be no rain. Depends on the amount of clutter typically present. Standard set to 0.0 n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. decomp_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of pysteps.cascade.interface. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of pysteps.cascade.interface. ar_order: int, optional The order of the autoregressive model to use. Must be >= 1. conditional: bool, optional If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold precip_thr. probmatching_method: {'cdf','mean',None}, optional Method for matching the conditional statistics of the forecast field (areas with precipitation intensity above the threshold precip_thr) with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, 'mean'=adjust only the mean value, None=no matching applied. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical S-PROG model). If "spectral", the AR(2) models are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019a`. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. measure_time: bool If set to True, measure, print and return the computation time. Returns ------- out: ndarray A three-dimensional array of shape (num_timesteps,m,n) containing a time series of forecast precipitation fields. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields precip. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). See also -------- pysteps.extrapolation.interface, pysteps.cascade.interface References ---------- :cite:`Seed2003`, :cite:`PCH2019a` """ _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: extrap_kwargs = dict() if filter_kwargs is None: filter_kwargs = dict() if np.any(~np.isfinite(velocity)): raise ValueError("velocity contains non-finite values") if precip_thr is None: raise ValueError("precip_thr required but not specified") print("Computing S-PROG nowcast") print("------------------------") print("") print("Inputs") print("------") print(f"input dimensions: {precip.shape[1]}x{precip.shape[2]}") print("") print("Methods") print("-------") print(f"extrapolation: {extrap_method}") print(f"bandpass filter: {bandpass_filter_method}") print(f"decomposition: {decomp_method}") print("conditional statistics: {}".format("yes" if conditional else "no")) print(f"probability matching: {probmatching_method}") print(f"FFT method: {fft_method}") print(f"domain: {domain}") print("") print("Parameters") print("----------") if isinstance(timesteps, int): print(f"number of time steps: {timesteps}") else: print(f"time steps: {timesteps}") print(f"parallel threads: {num_workers}") print(f"number of cascade levels: {n_cascade_levels}") print(f"order of the AR(p) model: {ar_order}") print(f"precip. intensity threshold: {precip_thr}") if measure_time: starttime_init = time.time() else: starttime_init = None fft = utils.get_method(fft_method, shape=precip.shape[1:], n_threads=num_workers) m, n = precip.shape[1:] # initialize the band-pass filter filter_method = cascade.get_method(bandpass_filter_method) bp_filter = filter_method((m, n), n_cascade_levels, **filter_kwargs) decomp_method, recomp_method = cascade.get_method(decomp_method) extrapolator_method = extrapolation.get_method(extrap_method) precip = precip[-(ar_order + 1) :, :, :].copy() precip_min = np.nanmin(precip) # determine the domain mask from non-finite values domain_mask = np.logical_or.reduce( [~np.isfinite(precip[i, :]) for i in range(precip.shape[0])] ) if check_norain(precip, precip_thr, norain_thr, None): return nowcast_utils.zero_precipitation_forecast( None, timesteps, precip, None, True, measure_time, starttime_init ) # determine the precipitation threshold mask if conditional: mask_thr = np.logical_and.reduce( [precip[i, :, :] >= precip_thr for i in range(precip.shape[0])] ) else: mask_thr = None # initialize the extrapolator x_values, y_values = np.meshgrid( np.arange(precip.shape[2]), np.arange(precip.shape[1]) ) xy_coords = np.stack([x_values, y_values]) extrap_kwargs = extrap_kwargs.copy() extrap_kwargs["xy_coords"] = xy_coords extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(precip)) else False ) # advect the previous precipitation fields to the same position with the # most recent one (i.e. transform them into the Lagrangian coordinates) res = list() def f(precip, i): return extrapolator_method( precip[i, :], velocity, ar_order - i, "min", **extrap_kwargs )[-1] for i in range(ar_order): if not DASK_IMPORTED: precip[i, :, :] = f(precip, i) else: res.append(dask.delayed(f)(precip, i)) if DASK_IMPORTED: num_workers_ = len(res) if num_workers > len(res) else num_workers precip = np.stack( list(dask.compute(*res, num_workers=num_workers_)) + [precip[-1, :, :]] ) # replace non-finite values with the minimum value precip = precip.copy() for i in range(precip.shape[0]): precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :]) # compute the cascade decompositions of the input precipitation fields precip_decomp = [] for i in range(ar_order + 1): precip_ = decomp_method( precip[i, :, :], bp_filter, mask=mask_thr, fft_method=fft, output_domain=domain, normalize=True, compute_stats=True, compact_output=True, ) precip_decomp.append(precip_) # rearrange the cascade levels into a four-dimensional array of shape # (n_cascade_levels,ar_order+1,m,n) for the autoregressive model precip_cascades = nowcast_utils.stack_cascades( precip_decomp, n_cascade_levels, convert_to_full_arrays=True ) # compute lag-l temporal autocorrelation coefficients for each cascade level gamma = np.empty((n_cascade_levels, ar_order)) for i in range(n_cascade_levels): if domain == "spatial": gamma[i, :] = correlation.temporal_autocorrelation( precip_cascades[i], mask=mask_thr ) else: gamma[i, :] = correlation.temporal_autocorrelation( precip_cascades[i], domain="spectral", x_shape=precip.shape[1:] ) precip_cascades = nowcast_utils.stack_cascades( precip_decomp, n_cascade_levels, convert_to_full_arrays=False ) precip_decomp = precip_decomp[-1] nowcast_utils.print_corrcoefs(gamma) if ar_order == 2: # adjust the lag-2 correlation coefficient to ensure that the AR(p) # process is stationary for i in range(n_cascade_levels): gamma[i, 1] = autoregression.adjust_lag2_corrcoef2(gamma[i, 0], gamma[i, 1]) # estimate the parameters of the AR(p) model from the autocorrelation # coefficients phi = np.empty((n_cascade_levels, ar_order + 1)) for i in range(n_cascade_levels): phi[i, :] = autoregression.estimate_ar_params_yw(gamma[i, :]) nowcast_utils.print_ar_params(phi) # discard all except the p-1 last cascades because they are not needed for # the AR(p) model precip_cascades = [precip_cascades[i][-ar_order:] for i in range(n_cascade_levels)] if probmatching_method == "mean": mu_0 = np.mean(precip[-1, :, :][precip[-1, :, :] >= precip_thr]) else: mu_0 = None # compute precipitation mask and wet area ratio mask_p = precip[-1, :, :] >= precip_thr war = 1.0 * np.sum(mask_p) / (precip.shape[1] * precip.shape[2]) if measure_time: init_time = time.time() - starttime_init precip = precip[-1, :, :] print("Starting nowcast computation.") precip_forecast = [] state = {"precip_cascades": precip_cascades, "precip_decomp": precip_decomp} params = { "domain": domain, "domain_mask": domain_mask, "fft": fft, "mu_0": mu_0, "n_cascade_levels": n_cascade_levels, "phi": phi, "precip_0": precip, "precip_min": precip_min, "probmatching_method": probmatching_method, "recomp_method": recomp_method, "war": war, } precip_forecast = nowcast_main_loop( precip, velocity, state, timesteps, extrap_method, _update, extrap_kwargs=extrap_kwargs, params=params, measure_time=measure_time, ) if measure_time: precip_forecast, mainloop_time = precip_forecast precip_forecast = np.stack(precip_forecast) if measure_time: return precip_forecast, init_time, mainloop_time else: return precip_forecast def _check_inputs(precip, velocity, timesteps, ar_order): if precip.ndim != 3: raise ValueError("precip must be a three-dimensional array") if precip.shape[0] < ar_order + 1: raise ValueError("precip.shape[0] < ar_order+1") if velocity.ndim != 3: raise ValueError("velocity must be a three-dimensional array") if precip.shape[1:3] != velocity.shape[1:3]: raise ValueError( "dimension mismatch between precip and velocity: shape(precip)=%s, shape(velocity)=%s" % (str(precip.shape), str(velocity.shape)) ) if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: raise ValueError("timesteps is not in ascending order") def _update(state, params): for i in range(params["n_cascade_levels"]): state["precip_cascades"][i] = autoregression.iterate_ar_model( state["precip_cascades"][i], params["phi"][i, :] ) state["precip_decomp"]["cascade_levels"] = [ state["precip_cascades"][i][-1, :] for i in range(params["n_cascade_levels"]) ] if params["domain"] == "spatial": state["precip_decomp"]["cascade_levels"] = np.stack( state["precip_decomp"]["cascade_levels"] ) precip_forecast_recomp = params["recomp_method"](state["precip_decomp"]) if params["domain"] == "spectral": precip_forecast_recomp = params["fft"].irfft2(precip_forecast_recomp) mask = compute_percentile_mask(precip_forecast_recomp, params["war"]) precip_forecast_recomp[~mask] = params["precip_min"] if params["probmatching_method"] == "cdf": # adjust the CDF of the forecast to match the most recently # observed precipitation field precip_forecast_recomp = probmatching.nonparam_match_empirical_cdf( precip_forecast_recomp, params["precip_0"] ) elif params["probmatching_method"] == "mean": mu_fct = np.mean(precip_forecast_recomp[mask]) precip_forecast_recomp[mask] = ( precip_forecast_recomp[mask] - mu_fct + params["mu_0"] ) precip_forecast_recomp[params["domain_mask"]] = np.nan return precip_forecast_recomp, state ================================================ FILE: pysteps/nowcasts/sseps.py ================================================ """ pysteps.nowcasts.sseps ====================== Implementation of the Short-space ensemble prediction system (SSEPS) method. Essentially, SSEPS is a localized version of STEPS. For localization we intend the use of a subset of the observations in order to estimate model parameters that are distributed in space. The short-space approach used in :cite:`NBSG2017` is generalized to the whole nowcasting system. This essentially boils down to a moving window localization of the nowcasting procedure, whereby all parameters are estimated over a subdomain of prescribed size. .. autosummary:: :toctree: ../generated/ forecast """ import time import numpy as np from scipy.ndimage import generate_binary_structure, iterate_structure from pysteps import cascade, extrapolation, noise from pysteps.nowcasts import utils as nowcast_utils from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation from pysteps.utils.check_norain import check_norain try: import dask dask_imported = True except ImportError: dask_imported = False def forecast( precip, metadata, velocity, timesteps, n_ens_members=24, n_cascade_levels=6, win_size=256, overlap=0.1, war_thr=0.1, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", noise_method="ssft", ar_order=2, vel_pert_method=None, probmatching_method="cdf", mask_method="incremental", callback=None, fft_method="numpy", return_output=True, seed=None, num_workers=1, extrap_kwargs=None, filter_kwargs=None, noise_kwargs=None, vel_pert_kwargs=None, mask_kwargs=None, measure_time=False, ): """ Generate a nowcast ensemble by using the Short-space ensemble prediction system (SSEPS) method. This is an experimental version of STEPS which allows for localization by means of a window function. Parameters ---------- precip: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular, and the inputs are required to have finite values. metadata: dict Metadata dictionary containing the accutime, xpixelsize, threshold and zerovalue attributes as described in the documentation of :py:mod:`pysteps.io.importers`. xpixelsize is assumed to be in meters. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. win_size: int or two-element sequence of ints Size-length of the localization window. overlap: float [0,1[ A float between 0 and 1 prescribing the level of overlap between successive windows. If set to 0, no overlap is used. war_thr: float Threshold for the minimum fraction of rain in a given window. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. n_ens_members: int The number of ensemble members to generate. n_cascade_levels: int The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. extrap_method: {'semilagrangian'} Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. decomp_method: {'fft'} Name of the cascade decomposition method to use. See the documentation of pysteps.cascade.interface. bandpass_filter_method: {'gaussian', 'uniform'} Name of the bandpass filter method to use with the cascade decomposition. noise_method: {'parametric','nonparametric','ssft','nested',None} Name of the noise generator to use for perturbating the precipitation field. See the documentation of pysteps.noise.interface. If set to None, no noise is generated. ar_order: int The order of the autoregressive model to use. Must be >= 1. vel_pert_method: {'bps',None} Name of the noise generator to use for perturbing the advection field. See the documentation of pysteps.noise.interface. If set to None, the advection field is not perturbed. mask_method: {'incremental', None} The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. probmatching_method: {'cdf', None} Method for matching the statistics of the forecast field with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, None=no matching applied. Using 'mean' requires that mask_method is not None. callback: function Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input field precip, respectively. This can be used, for instance, writing the outputs into files. return_output: bool Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. seed: int Optional seed number for the random generators. num_workers: int The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. extrap_kwargs: dict Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. noise_kwargs: dict Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of pysteps.noise.fftgenerators. vel_pert_kwargs: dict Optional dictionary containing keyword arguments "p_pert_par" and "p_pert_perp" for the initializer of the velocity perturbator. See the documentation of pysteps.noise.motion. mask_kwargs: dict Optional dictionary containing mask keyword arguments 'mask_f' and 'mask_rim', the factor defining the the mask increment and the rim size, respectively. The mask increment is defined as mask_f*timestep/kmperpixel. measure_time: bool If set to True, measure, print and return the computation time. Returns ------- out: ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields. See also -------- pysteps.extrapolation.interface, pysteps.cascade.interface, pysteps.noise.interface, pysteps.noise.utils.compute_noise_stddev_adjs Notes ----- Please be aware that this represents a (very) experimental implementation. References ---------- :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`NBSG2017` """ _check_inputs(precip, velocity, timesteps, ar_order) if extrap_kwargs is None: extrap_kwargs = dict() else: extrap_kwargs = extrap_kwargs.copy() if filter_kwargs is None: filter_kwargs = dict() if noise_kwargs is None: noise_kwargs = {"win_fun": "tukey"} if vel_pert_kwargs is None: vel_pert_kwargs = dict() if mask_kwargs is None: mask_kwargs = dict() if np.any(~np.isfinite(precip)): raise ValueError("precip contains non-finite values") if np.any(~np.isfinite(velocity)): raise ValueError("velocity contains non-finite values") if mask_method not in ["incremental", None]: raise ValueError( "unknown mask method %s: must be 'incremental' or None" % mask_method ) if np.isscalar(win_size): win_size = (int(win_size), int(win_size)) else: win_size = tuple([int(win_size[i]) for i in range(2)]) timestep = metadata["accutime"] kmperpixel = metadata["xpixelsize"] / 1000 print("Computing SSEPS nowcast") print("-----------------------") print("") print("Inputs") print("------") print("input dimensions: %dx%d" % (precip.shape[1], precip.shape[2])) print(f"km/pixel: {kmperpixel}") print(f"time step: {timestep} minutes") print("") print("Methods") print("-------") print(f"extrapolation: {extrap_method}") print(f"bandpass filter: {bandpass_filter_method}") print(f"decomposition: {decomp_method}") print(f"noise generator: {noise_method}") print(f"velocity perturbator: {vel_pert_method}") print(f"precip. mask method: {mask_method}") print(f"probability matching: {probmatching_method}") print(f"FFT method: {fft_method}") print("") print("Parameters") print("----------") print(f"localization window: {win_size[0]}x{win_size[1]}") print(f"overlap: {overlap:.1f}") print(f"war thr: {war_thr:.2f}") if isinstance(timesteps, int): print(f"number of time steps: {timesteps}") else: print(f"time steps: {timesteps}") print(f"ensemble size: {n_ens_members}") print(f"number of cascade levels: {n_cascade_levels}") print(f"order of the AR(p) model: {ar_order}") print("dask imported: {}".format(("yes" if dask_imported else "no"))) print(f"num workers: {num_workers}") if vel_pert_method == "bps": vp_par = vel_pert_kwargs.get( "p_pert_par", noise.motion.get_default_params_bps_par() ) vp_perp = vel_pert_kwargs.get( "p_pert_perp", noise.motion.get_default_params_bps_perp() ) print( f"velocity perturbations, parallel: {vp_par[0]},{vp_par[1]},{vp_par[2]}" ) print( f"velocity perturbations, perpendicular: {vp_perp[0]},{vp_perp[1]},{vp_perp[2]}" ) precip_thr = metadata["threshold"] precip_min = metadata["zerovalue"] num_ensemble_workers = n_ens_members if num_workers > n_ens_members else num_workers if measure_time: starttime_init = time.time() else: starttime_init = None # get methods extrapolator_method = extrapolation.get_method(extrap_method) x_values, y_values = np.meshgrid( np.arange(precip.shape[2]), np.arange(precip.shape[1]) ) xy_coords = np.stack([x_values, y_values]) decomp_method, __ = cascade.get_method(decomp_method) filter_method = cascade.get_method(bandpass_filter_method) if noise_method is not None: init_noise, generate_noise = noise.get_method(noise_method) if check_norain( precip, precip_thr, war_thr, noise_kwargs["win_fun"], ): return nowcast_utils.zero_precipitation_forecast( n_ens_members, timesteps, precip, callback, return_output, measure_time, starttime_init, ) # advect the previous precipitation fields to the same position with the # most recent one (i.e. transform them into the Lagrangian coordinates) precip = precip[-(ar_order + 1) :, :, :].copy() extrap_kwargs = extrap_kwargs.copy() extrap_kwargs["xy_coords"] = xy_coords extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(precip)) else False ) res = [] extrapolate = lambda precip, i: extrapolator_method( precip[i, :, :], velocity, ar_order - i, "min", **extrap_kwargs )[-1] for i in range(ar_order): if not dask_imported: precip[i, :, :] = extrapolate(precip, i) else: res.append(dask.delayed(extrapolate)(precip, i)) if dask_imported: num_workers_ = len(res) if num_workers > len(res) else num_workers precip = np.stack( list(dask.compute(*res, num_workers=num_workers_)) + [precip[-1, :, :]] ) if mask_method == "incremental": # get mask parameters mask_rim = mask_kwargs.get("mask_rim", 10) mask_f = mask_kwargs.get("mask_f", 1.0) # initialize the structuring element struct = generate_binary_structure(2, 1) # iterate it to expand it nxn n = mask_f * timestep / kmperpixel struct = iterate_structure(struct, int((n - 1) / 2.0)) noise_kwargs.update( { "win_size": win_size, "overlap": overlap, "war_thr": war_thr, "rm_rdisc": True, "donorm": True, } ) print("Estimating nowcast parameters...", end="") def estimator(precip, parsglob=None, idxm=None, idxn=None): pars = {} # initialize the perturbation generator for the precipitation field if noise_method is not None and parsglob is None: pert_gen = init_noise(precip, fft_method=fft_method, **noise_kwargs) else: pert_gen = None pars["pert_gen"] = pert_gen # initialize the band-pass filter if parsglob is None: bp_filter = filter_method( precip.shape[1:], n_cascade_levels, **filter_kwargs ) pars["filter"] = bp_filter else: pars["filter"] = None # compute the cascade decompositions of the input precipitation fields if parsglob is None: precip_decomp = [] for i in range(ar_order + 1): precip_decomp_ = decomp_method( precip[i, :, :], bp_filter, fft_method=fft_method, normalize=True, compute_stats=True, ) precip_decomp.append(precip_decomp_) precip_decomp_ = None # normalize the cascades and rearrange them into a four-dimensional array # of shape (n_cascade_levels,ar_order+1,m,n) for the autoregressive model if parsglob is None: precip_cascades = nowcast_utils.stack_cascades( precip_decomp, n_cascade_levels ) mu = precip_decomp[-1]["means"] sigma = precip_decomp[-1]["stds"] precip_decomp = None else: precip_cascades = parsglob["precip_cascades"][0][ :, :, idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1) ].copy() mu = np.mean(precip_cascades, axis=(2, 3)) sigma = np.std(precip_cascades, axis=(2, 3)) precip_cascades = (precip_cascades - mu[:, :, None, None]) / sigma[ :, :, None, None ] mu = mu[:, -1] sigma = sigma[:, -1] pars["mu"] = mu pars["sigma"] = sigma # compute lag-l temporal autocorrelation coefficients for each cascade level gamma = np.empty((n_cascade_levels, ar_order)) for i in range(n_cascade_levels): precip_cascades_ = np.stack( [precip_cascades[i, j, :, :] for j in range(ar_order + 1)] ) gamma[i, :] = correlation.temporal_autocorrelation(precip_cascades_) precip_cascades_ = None if ar_order == 2: # adjust the local lag-2 correlation coefficient to ensure that the AR(p) # process is stationary for i in range(n_cascade_levels): gamma[i, 1] = autoregression.adjust_lag2_corrcoef2( gamma[i, 0], gamma[i, 1] ) # estimate the parameters of the AR(p) model from the autocorrelation # coefficients phi = np.empty((n_cascade_levels, ar_order + 1)) for i in range(n_cascade_levels): phi[i, :] = autoregression.estimate_ar_params_yw(gamma[i, :]) pars["phi"] = phi # stack the cascades into a five-dimensional array containing all ensemble # members precip_cascades = [precip_cascades.copy() for _ in range(n_ens_members)] pars["precip_cascades"] = precip_cascades if mask_method is not None and parsglob is None: mask_prec = precip[-1, :, :] >= precip_thr if mask_method == "incremental": # initialize precip mask for each member mask_prec = nowcast_utils.compute_dilated_mask( mask_prec, struct, mask_rim ) mask_prec = [mask_prec.copy() for _ in range(n_ens_members)] else: mask_prec = None pars["mask_prec"] = mask_prec return pars # prepare windows M, N = precip.shape[1:] n_windows_M = np.ceil(1.0 * M / win_size[0]).astype(int) n_windows_N = np.ceil(1.0 * N / win_size[1]).astype(int) idxm = np.zeros(2, dtype=int) idxn = np.zeros(2, dtype=int) if measure_time: starttime = time.time() # compute global parameters to be used as defaults parsglob = estimator(precip) # loop windows if n_windows_M > 1 or n_windows_N > 1: war = np.empty((n_windows_M, n_windows_N)) phi = np.empty((n_windows_M, n_windows_N, n_cascade_levels, ar_order + 1)) mu = np.empty((n_windows_M, n_windows_N, n_cascade_levels)) sigma = np.empty((n_windows_M, n_windows_N, n_cascade_levels)) ff = [] rc = [] pp = [] mm = [] for m in range(n_windows_M): ff_ = [] pp_ = [] rc_ = [] mm_ = [] for n in range(n_windows_N): # compute indices of local window idxm[0] = int(np.max((m * win_size[0] - overlap * win_size[0], 0))) idxm[1] = int( np.min((idxm[0] + win_size[0] + overlap * win_size[0], M)) ) idxn[0] = int(np.max((n * win_size[1] - overlap * win_size[1], 0))) idxn[1] = int( np.min((idxn[0] + win_size[1] + overlap * win_size[1], N)) ) mask = np.zeros((M, N), dtype=bool) mask[idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1)] = True precip_ = precip[ :, idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1) ] war[m, n] = ( np.sum(precip_[-1, :, :] >= precip_thr) / precip_[-1, :, :].size ) if war[m, n] > war_thr: # estimate local parameters pars = estimator(precip, parsglob, idxm, idxn) ff_.append(pars["filter"]) pp_.append(pars["pert_gen"]) rc_.append(pars["precip_cascades"]) mm_.append(pars["mask_prec"]) mu[m, n, :] = pars["mu"] sigma[m, n, :] = pars["sigma"] phi[m, n, :, :] = pars["phi"] else: # dry window ff_.append(None) pp_.append(None) rc_.append(None) mm_.append(None) ff.append(ff_) pp.append(pp_) rc.append(rc_) mm.append(mm_) # remove unnecessary variables ff_ = None pp_ = None rc_ = None mm_ = None pars = None if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print(" done.") # initialize the random generators if noise_method is not None: randgen_prec = [] randgen_motion = [] np.random.seed(seed) for _ in range(n_ens_members): rs = np.random.RandomState(seed) randgen_prec.append(rs) seed = rs.randint(0, high=1e9) rs = np.random.RandomState(seed) randgen_motion.append(rs) seed = rs.randint(0, high=1e9) if vel_pert_method is not None: init_vel_noise, generate_vel_noise = noise.get_method(vel_pert_method) # initialize the perturbation generators for the motion field velocity_perturbators = [] for j in range(n_ens_members): kwargs = { "randstate": randgen_motion[j], "p_par": vp_par, "p_perp": vp_perp, } vp = init_vel_noise(velocity, 1.0 / kmperpixel, timestep, **kwargs) velocity_perturbators.append(vp) D = [None for _ in range(n_ens_members)] precip_forecast = [[] for _ in range(n_ens_members)] if measure_time: init_time = time.time() - starttime_init precip = precip[-1, :, :] print("Starting nowcast computation.") if measure_time: starttime_mainloop = time.time() if isinstance(timesteps, int): timesteps = range(timesteps + 1) timestep_type = "int" else: original_timesteps = [0] + list(timesteps) timesteps = nowcast_utils.binned_timesteps(original_timesteps) timestep_type = "list" extrap_kwargs["return_displacement"] = True precip_forecast_prev = [precip for _ in range(n_ens_members)] t_prev = [0.0 for _ in range(n_ens_members)] t_total = [0.0 for _ in range(n_ens_members)] # iterate each time step for t, subtimestep_idx in enumerate(timesteps): if timestep_type == "list": subtimesteps = [original_timesteps[t_] for t_ in subtimestep_idx] else: subtimesteps = [t] if (timestep_type == "list" and subtimesteps) or ( timestep_type == "int" and t > 0 ): is_nowcast_time_step = True else: is_nowcast_time_step = False if is_nowcast_time_step: print( f"Computing nowcast for time step {t}... ", end="", flush=True, ) if measure_time: starttime = time.time() # iterate each ensemble member def worker(j): # first the global step if noise_method is not None: # generate noise field EPS = generate_noise( parsglob["pert_gen"], randstate=randgen_prec[j], fft_method=fft_method, ) # decompose the noise field into a cascade EPS_d = decomp_method( EPS, parsglob["filter"], fft_method=fft_method, normalize=True, compute_stats=True, ) else: EPS_d = None # iterate the AR(p) model for each cascade level precip_cascades = parsglob["precip_cascades"][j].copy() if precip_cascades.shape[1] >= ar_order: precip_cascades = precip_cascades[:, -ar_order:, :, :].copy() for i in range(n_cascade_levels): # normalize the noise cascade if EPS_d is not None: EPS_ = ( EPS_d["cascade_levels"][i, :, :] - EPS_d["means"][i] ) / EPS_d["stds"][i] else: EPS_ = None # apply AR(p) process to cascade level precip_cascades[i, :, :, :] = autoregression.iterate_ar_model( precip_cascades[i, :, :, :], parsglob["phi"][i, :], eps=EPS_ ) EPS_ = None parsglob["precip_cascades"][j] = precip_cascades.copy() EPS = None # compute the recomposed precipitation field(s) from the cascades # obtained from the AR(p) model(s) precip_forecast_new = _recompose_cascade( precip_cascades, parsglob["mu"], parsglob["sigma"] ) precip_cascades = None # then the local steps if n_windows_M > 1 or n_windows_N > 1: idxm = np.zeros(2, dtype=int) idxn = np.zeros(2, dtype=int) precip_l = np.zeros((M, N), dtype=float) M_s = np.zeros((M, N), dtype=float) for m in range(n_windows_M): for n in range(n_windows_N): # compute indices of local window idxm[0] = int( np.max((m * win_size[0] - overlap * win_size[0], 0)) ) idxm[1] = int( np.min((idxm[0] + win_size[0] + overlap * win_size[0], M)) ) idxn[0] = int( np.max((n * win_size[1] - overlap * win_size[1], 0)) ) idxn[1] = int( np.min((idxn[0] + win_size[1] + overlap * win_size[1], N)) ) # build localization mask mask = _get_mask((M, N), idxm, idxn) mask_l = mask[ idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1) ] M_s += mask # skip if dry if war[m, n] > war_thr: precip_cascades = rc[m][n][j].copy() if precip_cascades.shape[1] >= ar_order: precip_cascades = precip_cascades[:, -ar_order:, :, :] if noise_method is not None: # extract noise field EPS_d_l = EPS_d["cascade_levels"][ :, idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1), ].copy() mu_ = np.mean(EPS_d_l, axis=(1, 2)) sigma_ = np.std(EPS_d_l, axis=(1, 2)) else: EPS_d_l = None # iterate the AR(p) model for each cascade level for i in range(n_cascade_levels): # normalize the noise cascade if EPS_d_l is not None: EPS_ = ( EPS_d_l[i, :, :] - mu_[i, None, None] ) / sigma_[i, None, None] else: EPS_ = None # apply AR(p) process to cascade level precip_cascades[i, :, :, :] = ( autoregression.iterate_ar_model( precip_cascades[i, :, :, :], phi[m, n, i, :], eps=EPS_, ) ) EPS_ = None rc[m][n][j] = precip_cascades.copy() EPS_d_l = mu_ = sigma_ = None # compute the recomposed precipitation field(s) from the cascades # obtained from the AR(p) model(s) mu_ = mu[m, n, :] sigma_ = sigma[m, n, :] precip_cascades = [ ((precip_cascades[i, -1, :, :] * sigma_[i]) + mu_[i]) * parsglob["sigma"][i] + parsglob["mu"][i] for i in range(len(mu_)) ] precip_l_ = np.sum(np.stack(precip_cascades), axis=0) precip_cascades = mu_ = sigma_ = None # precip_l_ = _recompose_cascade(precip_cascades[:, :, :], mu[m, n, :], sigma[m, n, :]) else: precip_l_ = precip_forecast_new[ idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1) ].copy() if probmatching_method == "cdf": # adjust the CDF of the forecast to match the most recently # observed precipitation field precip_ = precip[ idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1) ].copy() precip_l_ = probmatching.nonparam_match_empirical_cdf( precip_l_, precip_ ) precip_ = None precip_l[ idxm.item(0) : idxm.item(1), idxn.item(0) : idxn.item(1) ] += (precip_l_ * mask_l) precip_l_ = None ind = M_s > 0 precip_l[ind] *= 1 / M_s[ind] precip_l[~ind] = precip_min precip_forecast_new = precip_l.copy() precip_l = None if probmatching_method == "cdf": # adjust the CDF of the forecast to match the most recently # observed precipitation field precip_forecast_new[precip_forecast_new < precip_thr] = precip_min precip_forecast_new = probmatching.nonparam_match_empirical_cdf( precip_forecast_new, precip ) if mask_method is not None: # apply the precipitation mask to prevent generation of new # precipitation into areas where it was not originally # observed if mask_method == "incremental": mask_prec = parsglob["mask_prec"][j].copy() precip_forecast_new = ( precip_forecast_new.min() + (precip_forecast_new - precip_forecast_new.min()) * mask_prec ) mask_prec = None if mask_method == "incremental": parsglob["mask_prec"][j] = nowcast_utils.compute_dilated_mask( precip_forecast_new >= precip_thr, struct, mask_rim ) precip_forecast_out = [] extrap_kwargs_ = extrap_kwargs.copy() extrap_kwargs_["xy_coords"] = xy_coords extrap_kwargs_["return_displacement"] = True V_pert = velocity # advect the recomposed precipitation field to obtain the forecast for # the current time step (or subtimesteps if non-integer time steps are # given) for t_sub in subtimesteps: if t_sub > 0: t_diff_prev_int = t_sub - int(t_sub) if t_diff_prev_int > 0.0: precip_forecast_ip = ( 1.0 - t_diff_prev_int ) * precip_forecast_prev[ j ] + t_diff_prev_int * precip_forecast_new else: precip_forecast_ip = precip_forecast_prev[j] t_diff_prev = t_sub - t_prev[j] t_total[j] += t_diff_prev # compute the perturbed motion field if vel_pert_method is not None: V_pert = velocity + generate_vel_noise( velocity_perturbators[j], t_total[j] * timestep ) extrap_kwargs_["displacement_prev"] = D[j] precip_forecast_ep, D[j] = extrapolator_method( precip_forecast_ip, V_pert, [t_diff_prev], **extrap_kwargs_, ) precip_forecast_ep[0][ precip_forecast_ep[0] < precip_thr ] = precip_min precip_forecast_out.append(precip_forecast_ep[0]) t_prev[j] = t_sub # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not subtimesteps: t_diff_prev = t + 1 - t_prev[j] t_total[j] += t_diff_prev # compute the perturbed motion field if vel_pert_method is not None: V_pert = velocity + generate_vel_noise( velocity_perturbators[j], t_total[j] * timestep ) extrap_kwargs_["displacement_prev"] = D[j] _, D[j] = extrapolator_method( None, V_pert, [t_diff_prev], **extrap_kwargs_, ) t_prev[j] = t + 1 precip_forecast_prev[j] = precip_forecast_new return precip_forecast_out res = [] for j in range(n_ens_members): if not dask_imported or n_ens_members == 1: res.append(worker(j)) else: res.append(dask.delayed(worker)(j)) precip_forecast_ = ( dask.compute(*res, num_workers=num_ensemble_workers) if dask_imported and n_ens_members > 1 else res ) res = None if is_nowcast_time_step: if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") if callback is not None: precip_forecast_stacked = np.stack(precip_forecast_) if precip_forecast_stacked.shape[1] > 0: callback(precip_forecast_stacked.squeeze()) precip_forecast_ = None if return_output: for j in range(n_ens_members): precip_forecast[j].extend(precip_forecast_[j]) if measure_time: mainloop_time = time.time() - starttime_mainloop if return_output: outarr = np.stack([np.stack(precip_forecast[j]) for j in range(n_ens_members)]) if measure_time: return outarr, init_time, mainloop_time else: return outarr else: return None def _check_inputs(precip, velocity, timesteps, ar_order): if precip.ndim != 3: raise ValueError("precip must be a three-dimensional array") if precip.shape[0] < ar_order + 1: raise ValueError("precip.shape[0] < ar_order+1") if velocity.ndim != 3: raise ValueError("velocity must be a three-dimensional array") if precip.shape[1:3] != velocity.shape[1:3]: raise ValueError( "dimension mismatch between precip and velocity: precip.shape=%s, velocity.shape=%s" % (str(precip.shape), str(precip.shape)) ) if isinstance(timesteps, list) and not sorted(timesteps) == timesteps: raise ValueError("timesteps is not in ascending order") # TODO: Use the recomponse_cascade method in the cascade.decomposition module def _recompose_cascade(precip, mu, sigma): precip_rc = [(precip[i, -1, :, :] * sigma[i]) + mu[i] for i in range(len(mu))] precip_rc = np.sum(np.stack(precip_rc), axis=0) return precip_rc def _build_2D_tapering_function(win_size, win_type="flat-hanning"): """ Produces two-dimensional tapering function for rectangular fields. Parameters ---------- win_size: tuple of int Size of the tapering window as two-element tuple of integers. win_type: str Name of the tapering window type (hanning, flat-hanning) Returns ------- w2d: array-like A two-dimensional numpy array containing the 2D tapering function. """ if len(win_size) != 2: raise ValueError("win_size is not a two-element tuple") if win_type == "hanning": w1dr = np.hanning(win_size[0]) w1dc = np.hanning(win_size[1]) elif win_type == "flat-hanning": T = win_size[0] / 4.0 W = win_size[0] / 2.0 B = np.linspace(-W, W, int(2 * W)) R = np.abs(B) - T R[R < 0] = 0.0 A = 0.5 * (1.0 + np.cos(np.pi * R / T)) A[np.abs(B) > (2 * T)] = 0.0 w1dr = A T = win_size[1] / 4.0 W = win_size[1] / 2.0 B = np.linspace(-W, W, int(2 * W)) R = np.abs(B) - T R[R < 0] = 0.0 A = 0.5 * (1.0 + np.cos(np.pi * R / T)) A[np.abs(B) > (2 * T)] = 0.0 w1dc = A elif win_type == "rectangular": w1dr = np.ones(win_size[0]) w1dc = np.ones(win_size[1]) else: raise ValueError("unknown win_type %s" % win_type) # Expand to 2-D # w2d = np.sqrt(np.outer(w1dr,w1dc)) w2d = np.outer(w1dr, w1dc) # Set nans to zero if np.any(np.isnan(w2d)): w2d[np.isnan(w2d)] = np.min(w2d[w2d > 0]) w2d[w2d < 1e-3] = 1e-3 return w2d def _get_mask(Size, idxi, idxj, win_type="flat-hanning"): """Compute a mask of zeros with a window at a given position.""" idxi = np.array(idxi).astype(int) idxj = np.array(idxj).astype(int) win_size = (idxi[1] - idxi[0], idxj[1] - idxj[0]) wind = _build_2D_tapering_function(win_size, win_type) mask = np.zeros(Size) mask[idxi.item(0) : idxi.item(1), idxj.item(0) : idxj.item(1)] = wind return mask ================================================ FILE: pysteps/nowcasts/steps.py ================================================ """ pysteps.nowcasts.steps ====================== Implementation of the STEPS stochastic nowcasting method as described in :cite:`Seed2003`, :cite:`BPS2006` and :cite:`SPN2013`. .. autosummary:: :toctree: ../generated/ forecast """ import time from copy import deepcopy from dataclasses import dataclass, field from typing import Any, Callable import numpy as np from scipy.ndimage import generate_binary_structure, iterate_structure from pysteps import cascade, extrapolation, noise, utils from pysteps.nowcasts import utils as nowcast_utils from pysteps.nowcasts.utils import ( compute_percentile_mask, nowcast_main_loop, zero_precipitation_forecast, ) from pysteps.postprocessing import probmatching from pysteps.timeseries import autoregression, correlation from pysteps.utils.check_norain import check_norain try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False @dataclass(frozen=True) class StepsNowcasterConfig: """ Parameters ---------- n_ens_members: int, optional The number of ensemble members to generate. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. precip_threshold: float, optional Specifies the threshold value for minimum observable precipitation intensity. Required if mask_method is not None or conditional is True. norain_threshold: float Specifies the threshold value for the fraction of rainy (see above) pixels in the radar rainfall field below which we consider there to be no rain. Depends on the amount of clutter typically present. Standard set to 0.0 kmperpixel: float, optional Spatial resolution of the input data (kilometers/pixel). Required if vel_pert_method is not None or mask_method is 'incremental'. timestep: float, optional Time step of the motion vectors (minutes). Required if vel_pert_method is not None or mask_method is 'incremental'. extrapolation_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. decomposition_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of pysteps.cascade.interface. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of pysteps.cascade.interface. noise_method: {'parametric','nonparametric','ssft','nested',None}, optional Name of the noise generator to use for perturbating the precipitation field. See the documentation of pysteps.noise.interface. If set to None, no noise is generated. noise_stddev_adj: {'auto','fixed',None}, optional Optional adjustment for the standard deviations of the noise fields added to each cascade level. This is done to compensate incorrect std. dev. estimates of casace levels due to presence of no-rain areas. 'auto'=use the method implemented in pysteps.noise.utils.compute_noise_stddev_adjs. 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable noise std. dev adjustment. ar_order: int, optional The order of the autoregressive model to use. Must be >= 1. velocity_perturbation_method: {'bps',None}, optional Name of the noise generator to use for perturbing the advection field. See the documentation of pysteps.noise.interface. If set to None, the advection field is not perturbed. conditional: bool, optional If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold precip_thr. mask_method: {'obs','sprog','incremental',None}, optional The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'obs' = apply precip_thr to the most recently observed precipitation intensity field, 'sprog' = use the smoothed forecast field from S-PROG, where the AR(p) model has been applied, 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. probmatching_method: {'cdf','mean',None}, optional Method for matching the statistics of the forecast field with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, 'mean'=adjust only the conditional mean value of the forecast field in precipitation areas, None=no matching applied. Using 'mean' requires that precip_thr and mask_method are not None. seed: int, optional Optional seed number for the random generators. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical STEPS model). If "spectral", the AR(2) models and stochastic perturbations are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019b`. extrapolation_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. noise_kwargs: dict, optional Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of pysteps.noise.fftgenerators. velocity_perturbation_kwargs: dict, optional Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for the initializer of the velocity perturbator. The choice of the optimal parameters depends on the domain and the used optical flow method. Default parameters from :cite:`BPS2006`: p_par = [10.88, 0.23, -7.68] p_perp = [5.76, 0.31, -2.72] Parameters fitted to the data (optical flow/domain): darts/fmi: p_par = [13.71259667, 0.15658963, -16.24368207] p_perp = [8.26550355, 0.17820458, -9.54107834] darts/mch: p_par = [24.27562298, 0.11297186, -27.30087471] p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01] darts/fmi+mch: p_par = [16.55447057, 0.14160448, -19.24613059] p_perp = [14.75343395, 0.11785398, -16.26151612] lucaskanade/fmi: p_par = [2.20837526, 0.33887032, -2.48995355] p_perp = [2.21722634, 0.32359621, -2.57402761] lucaskanade/mch: p_par = [2.56338484, 0.3330941, -2.99714349] p_perp = [1.31204508, 0.3578426, -1.02499891] lucaskanade/fmi+mch: p_par = [2.31970635, 0.33734287, -2.64972861] p_perp = [1.90769947, 0.33446594, -2.06603662] vet/fmi: p_par = [0.25337388, 0.67542291, 11.04895538] p_perp = [0.02432118, 0.99613295, 7.40146505] vet/mch: p_par = [0.5075159, 0.53895212, 7.90331791] p_perp = [0.68025501, 0.41761289, 4.73793581] vet/fmi+mch: p_par = [0.29495222, 0.62429207, 8.6804131 ] p_perp = [0.23127377, 0.59010281, 5.98180004] fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set The above parameters have been fitten by using run_vel_pert_analysis.py and fit_vel_pert_params.py located in the scripts directory. See pysteps.noise.motion for additional documentation. mask_kwargs: dict Optional dictionary containing mask keyword arguments 'mask_f' and 'mask_rim', the factor defining the the mask increment and the rim size, respectively. The mask increment is defined as mask_f*timestep/kmperpixel. measure_time: bool If set to True, measure, print and return the computation time. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input precipitation fields, respectively. This can be used, for instance, writing the outputs into files. return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. """ n_ens_members: int = 24 n_cascade_levels: int = 6 precip_threshold: float | None = None norain_threshold: float = 0.0 kmperpixel: float | None = None timestep: float | None = None extrapolation_method: str = "semilagrangian" decomposition_method: str = "fft" bandpass_filter_method: str = "gaussian" noise_method: str | None = "nonparametric" noise_stddev_adj: str | None = None ar_order: int = 2 velocity_perturbation_method: str | None = "bps" conditional: bool = False probmatching_method: str | None = "cdf" mask_method: str | None = "incremental" seed: int | None = None num_workers: int = 1 fft_method: str = "numpy" domain: str = "spatial" extrapolation_kwargs: dict[str, Any] = field(default_factory=dict) filter_kwargs: dict[str, Any] = field(default_factory=dict) noise_kwargs: dict[str, Any] = field(default_factory=dict) velocity_perturbation_kwargs: dict[str, Any] = field(default_factory=dict) mask_kwargs: dict[str, Any] = field(default_factory=dict) measure_time: bool = False callback: Callable[[Any], None] | None = None return_output: bool = True @dataclass class StepsNowcasterParams: fft: Any = None bandpass_filter: Any = None extrapolation_method: Any = None decomposition_method: Any = None recomposition_method: Any = None noise_generator: Callable | None = None perturbation_generator: Callable | None = None noise_std_coefficients: np.ndarray | None = None ar_model_coefficients: np.ndarray | None = None # Corresponds to phi autocorrelation_coefficients: np.ndarray | None = None # Corresponds to gamma domain_mask: np.ndarray | None = None structuring_element: np.ndarray | None = None precipitation_mean: float | None = None wet_area_ratio: float | None = None mask_rim: int | None = None num_ensemble_workers: int = 1 xy_coordinates: np.ndarray | None = None velocity_perturbation_parallel: list[float] | None = None velocity_perturbation_perpendicular: list[float] | None = None filter_kwargs: dict | None = None noise_kwargs: dict | None = None velocity_perturbation_kwargs: dict | None = None mask_kwargs: dict | None = None @dataclass class StepsNowcasterState: precip_forecast: list[Any] | None = field(default_factory=list) precip_cascades: list[list[np.ndarray]] | None = field(default_factory=list) precip_decomposed: list[dict[str, Any]] | None = field(default_factory=list) # The observation mask (where the radar can observe the precipitation) precip_mask: list[Any] | None = field(default_factory=list) precip_mask_decomposed: dict[str, Any] | None = field(default_factory=dict) # The mask around the precipitation fields (to get only non-zero values) mask_precip: np.ndarray | None = None mask_threshold: np.ndarray | None = None random_generator_precip: list[np.random.RandomState] | None = field( default_factory=list ) random_generator_motion: list[np.random.RandomState] | None = field( default_factory=list ) velocity_perturbations: list[Callable] | None = field(default_factory=list) fft_objects: list[Any] | None = field(default_factory=list) extrapolation_kwargs: dict[str, Any] | None = field(default_factory=dict) class StepsNowcaster: def __init__( self, precip, velocity, time_steps, steps_config: StepsNowcasterConfig ): # Store inputs and optional parameters self.__precip = precip self.__velocity = velocity self.__time_steps = time_steps # Store the config data: self.__config = steps_config # Store the state and params data: self.__state = StepsNowcasterState() self.__params = StepsNowcasterParams() # Additional variables for time measurement self.__start_time_init = None self.__init_time = None self.__mainloop_time = None def compute_forecast(self): """ Generate a nowcast ensemble by using the Short-Term Ensemble Prediction System (STEPS) method. Parameters ---------- precip: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. config: StepsNowcasterConfig Provides a set of configuration parameters for the nowcast ensemble generation. Returns ------- out: ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). See also -------- pysteps.extrapolation.interface, pysteps.cascade.interface, pysteps.noise.interface, pysteps.noise.utils.compute_noise_stddev_adjs References ---------- :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ self.__check_inputs() self.__print_forecast_info() # Measure time for initialization if self.__config.measure_time: self.__start_time_init = time.time() # Slice the precipitation field to only use the last ar_order + 1 fields self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy() self.__initialize_nowcast_components() if check_norain( self.__precip, self.__config.precip_threshold, self.__config.norain_threshold, self.__params.noise_kwargs["win_fun"], ): return zero_precipitation_forecast( self.__config.n_ens_members, self.__time_steps, self.__precip, self.__config.callback, self.__config.return_output, self.__config.measure_time, self.__start_time_init, ) self.__perform_extrapolation() self.__apply_noise_and_ar_model() self.__initialize_velocity_perturbations() self.__initialize_precipitation_mask() self.__initialize_fft_objects() # Measure and print initialization time if self.__config.measure_time: self.__init_time = self.__measure_time( "Initialization", self.__start_time_init ) # Run the main nowcast loop self.__nowcast_main() # Unstack nowcast output if return_output is True if self.__config.measure_time: ( self.__state.precip_forecast, self.__mainloop_time, ) = self.__state.precip_forecast # Stack and return the forecast output if self.__config.return_output: self.__state.precip_forecast = np.stack( [ np.stack(self.__state.precip_forecast[j]) for j in range(self.__config.n_ens_members) ] ) if self.__config.measure_time: return ( self.__state.precip_forecast, self.__init_time, self.__mainloop_time, ) else: return self.__state.precip_forecast else: return None def __nowcast_main(self): """ Main nowcast loop that iterates through the ensemble members and time steps to generate forecasts. """ # Isolate the last time slice of observed precipitation precip = self.__precip[ -1, :, : ] # Extract the last available precipitation field # Prepare state and params dictionaries, these need to be formatted a specific way for the nowcast_main_loop state = self.__return_state_dict() params = self.__return_params_dict(precip) print("Starting nowcast computation.") # Run the nowcast main loop self.__state.precip_forecast = nowcast_main_loop( precip, self.__velocity, state, self.__time_steps, self.__config.extrapolation_method, self.__update_state, # Reference to the update function extrap_kwargs=self.__state.extrapolation_kwargs, velocity_pert_gen=self.__state.velocity_perturbations, params=params, ensemble=True, num_ensemble_members=self.__config.n_ens_members, callback=self.__config.callback, return_output=self.__config.return_output, num_workers=self.__params.num_ensemble_workers, measure_time=self.__config.measure_time, ) def __check_inputs(self): """ Validate the inputs to ensure consistency and correct shapes. """ if self.__precip.ndim != 3: raise ValueError("precip must be a three-dimensional array") if self.__precip.shape[0] < self.__config.ar_order + 1: raise ValueError( f"precip.shape[0] must be at least ar_order+1, " f"but found {self.__precip.shape[0]}" ) if self.__velocity.ndim != 3: raise ValueError("velocity must be a three-dimensional array") if self.__precip.shape[1:3] != self.__velocity.shape[1:3]: raise ValueError( f"Dimension mismatch between precip and velocity: " f"shape(precip)={self.__precip.shape}, shape(velocity)={self.__velocity.shape}" ) if ( isinstance(self.__time_steps, list) and not sorted(self.__time_steps) == self.__time_steps ): raise ValueError("timesteps must be in ascending order") if np.any(~np.isfinite(self.__velocity)): raise ValueError("velocity contains non-finite values") if self.__config.mask_method not in ["obs", "sprog", "incremental", None]: raise ValueError( f"Unknown mask method '{self.__config.mask_method}'. " "Must be 'obs', 'sprog', 'incremental', or None." ) if self.__config.precip_threshold is None: if self.__config.conditional: raise ValueError("conditional=True but precip_thr is not specified.") if self.__config.mask_method is not None: raise ValueError("mask_method is set but precip_thr is not specified.") if self.__config.probmatching_method == "mean": raise ValueError( "probmatching_method='mean' but precip_thr is not specified." ) if ( self.__config.noise_method is not None and self.__config.noise_stddev_adj == "auto" ): raise ValueError( "noise_stddev_adj='auto' but precip_thr is not specified." ) if self.__config.noise_stddev_adj not in ["auto", "fixed", None]: raise ValueError( f"Unknown noise_stddev_adj method '{self.__config.noise_stddev_adj}'. " "Must be 'auto', 'fixed', or None." ) if self.__config.kmperpixel is None: if self.__config.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but kmperpixel=None") if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but kmperpixel=None") if self.__config.timestep is None: if self.__config.velocity_perturbation_method is not None: raise ValueError("vel_pert_method is set but timestep=None") if self.__config.mask_method == "incremental": raise ValueError("mask_method='incremental' but timestep=None") # Handle None values for various kwargs if self.__config.extrapolation_kwargs is None: self.__state.extrapolation_kwargs = dict() else: self.__state.extrapolation_kwargs = deepcopy( self.__config.extrapolation_kwargs ) if self.__config.filter_kwargs is None: self.__params.filter_kwargs = dict() else: self.__params.filter_kwargs = deepcopy(self.__config.filter_kwargs) if self.__config.noise_kwargs is None: self.__params.noise_kwargs = {"win_fun": "tukey"} else: self.__params.noise_kwargs = deepcopy(self.__config.noise_kwargs) if self.__config.velocity_perturbation_kwargs is None: self.__params.velocity_perturbation_kwargs = dict() else: self.__params.velocity_perturbation_kwargs = deepcopy( self.__config.velocity_perturbation_kwargs ) if self.__config.mask_kwargs is None: self.__params.mask_kwargs = dict() else: self.__params.mask_kwargs = deepcopy(self.__config.mask_kwargs) print("Inputs validated and initialized successfully.") def __print_forecast_info(self): """ Print information about the forecast setup, including inputs, methods, and parameters. """ print("Computing STEPS nowcast") print("-----------------------") print("") print("Inputs") print("------") print(f"input dimensions: {self.__precip.shape[1]}x{self.__precip.shape[2]}") if self.__config.kmperpixel is not None: print(f"km/pixel: {self.__config.kmperpixel}") if self.__config.timestep is not None: print(f"time step: {self.__config.timestep} minutes") print("") print("Methods") print("-------") print(f"extrapolation: {self.__config.extrapolation_method}") print(f"bandpass filter: {self.__config.bandpass_filter_method}") print(f"decomposition: {self.__config.decomposition_method}") print(f"noise generator: {self.__config.noise_method}") print( "noise adjustment: {}".format( ("yes" if self.__config.noise_stddev_adj else "no") ) ) print(f"velocity perturbator: {self.__config.velocity_perturbation_method}") print( "conditional statistics: {}".format( ("yes" if self.__config.conditional else "no") ) ) print(f"precip. mask method: {self.__config.mask_method}") print(f"probability matching: {self.__config.probmatching_method}") print(f"FFT method: {self.__config.fft_method}") print(f"domain: {self.__config.domain}") print("") print("Parameters") print("----------") if isinstance(self.__time_steps, int): print(f"number of time steps: {self.__time_steps}") else: print(f"time steps: {self.__time_steps}") print(f"ensemble size: {self.__config.n_ens_members}") print(f"parallel threads: {self.__config.num_workers}") print(f"number of cascade levels: {self.__config.n_cascade_levels}") print(f"order of the AR(p) model: {self.__config.ar_order}") if self.__config.velocity_perturbation_method == "bps": self.__params.velocity_perturbation_parallel = ( self.__params.velocity_perturbation_kwargs.get( "p_par", noise.motion.get_default_params_bps_par() ) ) self.__params.velocity_perturbation_perpendicular = ( self.__params.velocity_perturbation_kwargs.get( "p_perp", noise.motion.get_default_params_bps_perp() ) ) print( f"velocity perturbations, parallel: {self.__params.velocity_perturbation_parallel[0]},{self.__params.velocity_perturbation_parallel[1]},{self.__params.velocity_perturbation_parallel[2]}" ) print( f"velocity perturbations, perpendicular: {self.__params.velocity_perturbation_perpendicular[0]},{self.__params.velocity_perturbation_perpendicular[1]},{self.__params.velocity_perturbation_perpendicular[2]}" ) if self.__config.precip_threshold is not None: print(f"precip. intensity threshold: {self.__config.precip_threshold}") def __initialize_nowcast_components(self): """ Initialize the FFT, bandpass filters, decomposition methods, and extrapolation method. """ # Initialize number of ensemble workers self.__params.num_ensemble_workers = min( self.__config.n_ens_members, self.__config.num_workers ) M, N = self.__precip.shape[1:] # Extract the spatial dimensions (height, width) # Initialize FFT method self.__params.fft = utils.get_method( self.__config.fft_method, shape=(M, N), n_threads=self.__config.num_workers ) # Initialize the band-pass filter for the cascade decomposition filter_method = cascade.get_method(self.__config.bandpass_filter_method) self.__params.bandpass_filter = filter_method( (M, N), self.__config.n_cascade_levels, **(self.__params.filter_kwargs or {}), ) # Get the decomposition method (e.g., FFT) ( self.__params.decomposition_method, self.__params.recomposition_method, ) = cascade.get_method(self.__config.decomposition_method) # Get the extrapolation method (e.g., semilagrangian) self.__params.extrapolation_method = extrapolation.get_method( self.__config.extrapolation_method ) # Generate the mesh grid for spatial coordinates x_values, y_values = np.meshgrid(np.arange(N), np.arange(M)) self.__params.xy_coordinates = np.stack([x_values, y_values]) # Determine the domain mask from non-finite values in the precipitation data self.__params.domain_mask = np.logical_or.reduce( [~np.isfinite(self.__precip[i, :]) for i in range(self.__precip.shape[0])] ) print("Nowcast components initialized successfully.") def __perform_extrapolation(self): """ Extrapolate (advect) precipitation fields based on the velocity field to align them in time. This prepares the precipitation fields for autoregressive modeling. """ # Determine the precipitation threshold mask if conditional is set if self.__config.conditional: self.__state.mask_threshold = np.logical_and.reduce( [ self.__precip[i, :, :] >= self.__config.precip_threshold for i in range(self.__precip.shape[0]) ] ) else: self.__state.mask_threshold = None extrap_kwargs = self.__state.extrapolation_kwargs.copy() extrap_kwargs["xy_coords"] = self.__params.xy_coordinates extrap_kwargs["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(self.__precip)) else False ) res = [] def __extrapolate_single_field(precip, i): # Extrapolate a single precipitation field using the velocity field return self.__params.extrapolation_method( precip[i, :, :], self.__velocity, self.__config.ar_order - i, "min", **extrap_kwargs, )[-1] for i in range(self.__config.ar_order): if ( not DASK_IMPORTED ): # If Dask is not available, perform sequential extrapolation self.__precip[i, :, :] = __extrapolate_single_field(self.__precip, i) else: # If Dask is available, accumulate delayed computations for parallel execution res.append(dask.delayed(__extrapolate_single_field)(self.__precip, i)) # If Dask is available, perform the parallel computation if DASK_IMPORTED and res: num_workers_ = min(self.__params.num_ensemble_workers, len(res)) self.__precip = np.stack( list(dask.compute(*res, num_workers=num_workers_)) + [self.__precip[-1, :, :]] ) print("Extrapolation complete and precipitation fields aligned.") def __apply_noise_and_ar_model(self): """ Apply noise and autoregressive (AR) models to precipitation cascades. This method applies the AR model to the decomposed precipitation cascades and adds noise perturbations if necessary. """ # Make a copy of the precipitation data and replace non-finite values precip = self.__precip.copy() for i in range(self.__precip.shape[0]): # Replace non-finite values with the minimum finite value of the precipitation field precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :]) # Store the precipitation data back in the object self.__precip = precip # Initialize the noise generator if the noise_method is provided if self.__config.noise_method is not None: np.random.seed( self.__config.seed ) # Set the random seed for reproducibility init_noise, generate_noise = noise.get_method(self.__config.noise_method) self.__params.noise_generator = generate_noise self.__params.perturbation_generator = init_noise( self.__precip, fft_method=self.__params.fft, **self.__params.noise_kwargs, ) # Handle noise standard deviation adjustments if necessary if self.__config.noise_stddev_adj == "auto": print("Computing noise adjustment coefficients... ", end="", flush=True) if self.__config.measure_time: starttime = time.time() # Compute noise adjustment coefficients self.__params.noise_std_coefficients = ( noise.utils.compute_noise_stddev_adjs( self.__precip[-1, :, :], self.__config.precip_threshold, np.min(self.__precip), self.__params.bandpass_filter, self.__params.decomposition_method, self.__params.perturbation_generator, self.__params.noise_generator, 20, conditional=self.__config.conditional, num_workers=self.__config.num_workers, seed=self.__config.seed, ) ) # Measure and print time taken if self.__config.measure_time: __ = self.__measure_time( "Noise adjustment coefficient computation", starttime ) else: print("done.") elif self.__config.noise_stddev_adj == "fixed": # Set fixed noise adjustment coefficients func = lambda k: 1.0 / (0.75 + 0.09 * k) self.__params.noise_std_coefficients = [ func(k) for k in range(1, self.__config.n_cascade_levels + 1) ] else: # Default to no adjustment self.__params.noise_std_coefficients = np.ones( self.__config.n_cascade_levels ) if self.__config.noise_stddev_adj is not None: # Print noise std deviation coefficients if adjustments were made print( f"noise std. dev. coeffs: {str(self.__params.noise_std_coefficients)}" ) else: # No noise, so set perturbation generator and noise_std_coefficients to None self.__params.perturbation_generator = None self.__params.noise_std_coefficients = np.ones( self.__config.n_cascade_levels ) # Keep default as 1.0 to avoid breaking AR model # Decompose the input precipitation fields self.__state.precip_decomposed = [] for i in range(self.__config.ar_order + 1): precip_ = self.__params.decomposition_method( self.__precip[i, :, :], self.__params.bandpass_filter, mask=self.__state.mask_threshold, fft_method=self.__params.fft, output_domain=self.__config.domain, normalize=True, compute_stats=True, compact_output=True, ) self.__state.precip_decomposed.append(precip_) # Normalize the cascades and rearrange them into a 4D array self.__state.precip_cascades = nowcast_utils.stack_cascades( self.__state.precip_decomposed, self.__config.n_cascade_levels ) self.__state.precip_decomposed = self.__state.precip_decomposed[-1] self.__state.precip_decomposed = [ self.__state.precip_decomposed.copy() for _ in range(self.__config.n_ens_members) ] # Compute temporal autocorrelation coefficients for each cascade level self.__params.autocorrelation_coefficients = np.empty( (self.__config.n_cascade_levels, self.__config.ar_order) ) for i in range(self.__config.n_cascade_levels): self.__params.autocorrelation_coefficients[i, :] = ( correlation.temporal_autocorrelation( self.__state.precip_cascades[i], mask=self.__state.mask_threshold ) ) nowcast_utils.print_corrcoefs(self.__params.autocorrelation_coefficients) # Adjust the lag-2 correlation coefficient if AR(2) model is used if self.__config.ar_order == 2: for i in range(self.__config.n_cascade_levels): self.__params.autocorrelation_coefficients[i, 1] = ( autoregression.adjust_lag2_corrcoef2( self.__params.autocorrelation_coefficients[i, 0], self.__params.autocorrelation_coefficients[i, 1], ) ) # Estimate the parameters of the AR model using auto-correlation coefficients self.__params.ar_model_coefficients = np.empty( (self.__config.n_cascade_levels, self.__config.ar_order + 1) ) for i in range(self.__config.n_cascade_levels): self.__params.ar_model_coefficients[i, :] = ( autoregression.estimate_ar_params_yw( self.__params.autocorrelation_coefficients[i, :] ) ) nowcast_utils.print_ar_params(self.__params.ar_model_coefficients) # Discard all except the last ar_order cascades for AR model self.__state.precip_cascades = [ self.__state.precip_cascades[i][-self.__config.ar_order :] for i in range(self.__config.n_cascade_levels) ] # Stack the cascades into a list containing all ensemble members self.__state.precip_cascades = [ [ self.__state.precip_cascades[j].copy() for j in range(self.__config.n_cascade_levels) ] for _ in range(self.__config.n_ens_members) ] # Initialize random generators if noise_method is provided if self.__config.noise_method is not None: self.__state.random_generator_precip = [] self.__state.random_generator_motion = [] seed = self.__config.seed for _ in range(self.__config.n_ens_members): # Create random state for precipitation noise generator rs = np.random.RandomState(seed) self.__state.random_generator_precip.append(rs) seed = rs.randint(0, high=int(1e9)) # Create random state for motion perturbations generator rs = np.random.RandomState(seed) self.__state.random_generator_motion.append(rs) seed = rs.randint(0, high=int(1e9)) else: self.__state.random_generator_precip = None self.__state.random_generator_motion = None print("AR model and noise applied to precipitation cascades.") def __initialize_velocity_perturbations(self): """ Initialize the velocity perturbators for each ensemble member if the velocity perturbation method is specified. """ if self.__config.velocity_perturbation_method is not None: init_vel_noise, generate_vel_noise = noise.get_method( self.__config.velocity_perturbation_method ) self.__state.velocity_perturbations = [] for j in range(self.__config.n_ens_members): kwargs = { "randstate": self.__state.random_generator_motion[j], "p_par": self.__params.velocity_perturbation_kwargs.get( "p_par", self.__params.velocity_perturbation_parallel ), "p_perp": self.__params.velocity_perturbation_kwargs.get( "p_perp", self.__params.velocity_perturbation_perpendicular ), } vp = init_vel_noise( self.__velocity, 1.0 / self.__config.kmperpixel, self.__config.timestep, **kwargs, ) self.__state.velocity_perturbations.append( lambda t, vp=vp: generate_vel_noise(vp, t * self.__config.timestep) ) else: self.__state.velocity_perturbations = None print("Velocity perturbations initialized successfully.") def __initialize_precipitation_mask(self): """ Initialize the precipitation mask and handle different mask methods (sprog, incremental). """ self.__state.precip_forecast = [[] for _ in range(self.__config.n_ens_members)] if self.__config.probmatching_method == "mean": self.__params.precipitation_mean = np.mean( self.__precip[-1, :, :][ self.__precip[-1, :, :] >= self.__config.precip_threshold ] ) else: self.__params.precipitation_mean = None if self.__config.mask_method is not None: self.__state.mask_precip = ( self.__precip[-1, :, :] >= self.__config.precip_threshold ) if self.__config.mask_method == "sprog": # Compute the wet area ratio and the precipitation mask self.__params.wet_area_ratio = np.sum(self.__state.mask_precip) / ( self.__precip.shape[1] * self.__precip.shape[2] ) self.__state.precip_mask = [ self.__state.precip_cascades[0][i].copy() for i in range(self.__config.n_cascade_levels) ] self.__state.precip_mask_decomposed = self.__state.precip_decomposed[ 0 ].copy() elif self.__config.mask_method == "incremental": # Get mask parameters self.__params.mask_rim = self.__params.mask_kwargs.get("mask_rim", 10) mask_f = self.__params.mask_kwargs.get("mask_f", 1.0) # Initialize the structuring element self.__params.structuring_element = generate_binary_structure(2, 1) # Expand the structuring element based on mask factor and timestep n = mask_f * self.__config.timestep / self.__config.kmperpixel self.__params.structuring_element = iterate_structure( self.__params.structuring_element, int((n - 1) / 2.0) ) # Compute and apply the dilated mask for each ensemble member self.__state.mask_precip = nowcast_utils.compute_dilated_mask( self.__state.mask_precip, self.__params.structuring_element, self.__params.mask_rim, ) self.__state.mask_precip = [ self.__state.mask_precip.copy() for _ in range(self.__config.n_ens_members) ] else: self.__state.mask_precip = None if self.__config.noise_method is None and self.__state.precip_mask is None: self.__state.precip_mask = [ self.__state.precip_cascades[0][i].copy() for i in range(self.__config.n_cascade_levels) ] print("Precipitation mask initialized successfully.") def __initialize_fft_objects(self): """ Initialize FFT objects for each ensemble member. """ self.__state.fft_objs = [] for _ in range(self.__config.n_ens_members): fft_obj = utils.get_method( self.__config.fft_method, shape=self.__precip.shape[1:] ) self.__state.fft_objs.append(fft_obj) print("FFT objects initialized successfully.") def __return_state_dict(self): """ Initialize the state dictionary used during the nowcast iteration. """ return { "fft_objs": self.__state.fft_objs, "mask_prec": self.__state.mask_precip, "precip_cascades": self.__state.precip_cascades, "precip_decomp": self.__state.precip_decomposed, "precip_m": self.__state.precip_mask, "precip_m_d": self.__state.precip_mask_decomposed, "randgen_prec": self.__state.random_generator_precip, } def __return_params_dict(self, precip): """ Initialize the params dictionary used during the nowcast iteration. """ return { "decomp_method": self.__params.decomposition_method, "domain": self.__config.domain, "domain_mask": self.__params.domain_mask, "filter": self.__params.bandpass_filter, "fft": self.__params.fft, "generate_noise": self.__params.noise_generator, "mask_method": self.__config.mask_method, "mask_rim": self.__params.mask_rim, "mu_0": self.__params.precipitation_mean, "n_cascade_levels": self.__config.n_cascade_levels, "n_ens_members": self.__config.n_ens_members, "noise_method": self.__config.noise_method, "noise_std_coeffs": self.__params.noise_std_coefficients, "num_ensemble_workers": self.__params.num_ensemble_workers, "phi": self.__params.ar_model_coefficients, "pert_gen": self.__params.perturbation_generator, "probmatching_method": self.__config.probmatching_method, "precip": precip, "precip_thr": self.__config.precip_threshold, "recomp_method": self.__params.recomposition_method, "struct": self.__params.structuring_element, "war": self.__params.wet_area_ratio, } def __update_state(self, state, params): """ Update the state during the nowcasting loop. This function handles the AR model iteration, noise generation, recomposition, and mask application for each ensemble member. """ precip_forecast_out = [None] * params["n_ens_members"] # Update the deterministic AR(p) model if noise or sprog mask is used if params["noise_method"] is None or params["mask_method"] == "sprog": self.__update_deterministic_ar_model(state, params) # Worker function for each ensemble member def worker(j): self.__apply_ar_model_to_cascades(j, state, params) precip_forecast_out[j] = self.__recompose_and_apply_mask(j, state, params) # Use Dask for parallel execution if available if ( DASK_IMPORTED and params["n_ens_members"] > 1 and params["num_ensemble_workers"] > 1 ): res = [] for j in range(params["n_ens_members"]): res.append(dask.delayed(worker)(j)) dask.compute(*res, num_workers=params["num_ensemble_workers"]) else: for j in range(params["n_ens_members"]): worker(j) return np.stack(precip_forecast_out), state def __update_deterministic_ar_model(self, state, params): """ Update the deterministic AR(p) model for each cascade level if noise is disabled or if the sprog mask is used. """ for i in range(params["n_cascade_levels"]): state["precip_m"][i] = autoregression.iterate_ar_model( state["precip_m"][i], params["phi"][i, :] ) state["precip_m_d"]["cascade_levels"] = [ state["precip_m"][i][-1] for i in range(params["n_cascade_levels"]) ] if params["domain"] == "spatial": state["precip_m_d"]["cascade_levels"] = np.stack( state["precip_m_d"]["cascade_levels"] ) precip_m_ = params["recomp_method"](state["precip_m_d"]) if params["domain"] == "spectral": precip_m_ = params["fft"].irfft2(precip_m_) if params["mask_method"] == "sprog": state["mask_prec"] = compute_percentile_mask(precip_m_, params["war"]) def __apply_ar_model_to_cascades(self, j, state, params): """ Apply the AR(p) model to the cascades for each ensemble member, including noise generation and normalization. """ # Generate noise if enabled if params["noise_method"] is not None: eps = self.__generate_and_decompose_noise(j, state, params) else: eps = None # Iterate the AR(p) model for each cascade level for i in range(params["n_cascade_levels"]): if eps is not None: eps_ = eps["cascade_levels"][i] eps_ *= params["noise_std_coeffs"][i] else: eps_ = None # Apply the AR(p) model with or without perturbations if eps is not None or params["vel_pert_method"] is not None: state["precip_cascades"][j][i] = autoregression.iterate_ar_model( state["precip_cascades"][j][i], params["phi"][i, :], eps=eps_ ) else: # use the deterministic AR(p) model computed above if # perturbations are disabled state["precip_cascades"][j][i] = state["precip_m"][i] eps = None eps_ = None def __generate_and_decompose_noise(self, j, state, params): """ Generate and decompose the noise field into cascades for a given ensemble member. """ eps = params["generate_noise"]( params["pert_gen"], randstate=state["randgen_prec"][j], fft_method=state["fft_objs"][j], domain=params["domain"], ) eps = params["decomp_method"]( eps, params["filter"], fft_method=state["fft_objs"][j], input_domain=params["domain"], output_domain=params["domain"], compute_stats=True, normalize=True, compact_output=True, ) return eps def __recompose_and_apply_mask(self, j, state, params): """ Recompose the precipitation field from cascades and apply the precipitation mask. """ state["precip_decomp"][j]["cascade_levels"] = [ state["precip_cascades"][j][i][-1, :] for i in range(params["n_cascade_levels"]) ] if params["domain"] == "spatial": state["precip_decomp"][j]["cascade_levels"] = np.stack( state["precip_decomp"][j]["cascade_levels"] ) precip_forecast = params["recomp_method"](state["precip_decomp"][j]) if params["domain"] == "spectral": precip_forecast = state["fft_objs"][j].irfft2(precip_forecast) # Apply the precipitation mask if params["mask_method"] is not None: precip_forecast = self.__apply_precipitation_mask( precip_forecast, j, state, params ) # Adjust the CDF of the forecast to match the observed precipitation field if params["probmatching_method"] == "cdf": precip_forecast = probmatching.nonparam_match_empirical_cdf( precip_forecast, params["precip"] ) # Adjust the mean of the forecast to match the observed mean elif params["probmatching_method"] == "mean": mask = precip_forecast >= params["precip_thr"] mu_fct = np.mean(precip_forecast[mask]) precip_forecast[mask] = precip_forecast[mask] - mu_fct + params["mu_0"] # Update the mask for incremental method if params["mask_method"] == "incremental": state["mask_prec"][j] = nowcast_utils.compute_dilated_mask( precip_forecast >= params["precip_thr"], params["struct"], params["mask_rim"], ) # Apply the domain mask (set masked areas to NaN) precip_forecast[params["domain_mask"]] = np.nan return precip_forecast def __apply_precipitation_mask(self, precip_forecast, j, state, params): """ Apply the precipitation mask to prevent new precipitation from generating in areas where it was not observed. """ precip_forecast_min = precip_forecast.min() if params["mask_method"] == "incremental": precip_forecast = ( precip_forecast_min + (precip_forecast - precip_forecast_min) * state["mask_prec"][j] ) mask_prec_ = precip_forecast > precip_forecast_min else: mask_prec_ = state["mask_prec"] # Set to min value outside the mask precip_forecast[~mask_prec_] = precip_forecast_min return precip_forecast def __measure_time(self, label, start_time): """ Measure and print the time taken for a specific part of the process. Parameters: - label: A description of the part of the process being measured. - start_time: The timestamp when the process started (from time.time()). """ if self.__config.measure_time: elapsed_time = time.time() - start_time print(f"{label} took {elapsed_time:.2f} seconds.") return elapsed_time return None def reset_states_and_params(self): """ Reset the internal state and parameters of the nowcaster to allow multiple forecasts. This method resets the state and params to their initial conditions without reinitializing the inputs like precip, velocity, time_steps, or config. """ # Re-initialize the state and parameters self.__state = StepsNowcasterState() self.__params = StepsNowcasterParams() # Reset time measurement variables self.__start_time_init = None self.__init_time = None self.__mainloop_time = None # Wrapper function to preserve backward compatibility def forecast( precip, velocity, timesteps, n_ens_members=24, n_cascade_levels=6, precip_thr=None, norain_thr=0.0, kmperpixel=None, timestep=None, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", noise_method="nonparametric", noise_stddev_adj=None, ar_order=2, vel_pert_method="bps", conditional=False, probmatching_method="cdf", mask_method="incremental", seed=None, num_workers=1, fft_method="numpy", domain="spatial", extrap_kwargs=None, filter_kwargs=None, noise_kwargs=None, vel_pert_kwargs=None, mask_kwargs=None, measure_time=False, callback=None, return_output=True, ): """ Generate a nowcast ensemble by using the Short-Term Ensemble Prediction System (STEPS) method. Parameters ---------- precip: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. n_ens_members: int, optional The number of ensemble members to generate. n_cascade_levels: int, optional The number of cascade levels to use. Defaults to 6, see issue #385 on GitHub. precip_thr: float, optional Specifies the threshold value for minimum observable precipitation intensity. Required if mask_method is not None or conditional is True. norain_thr: float Specifies the threshold value for the fraction of rainy (see above) pixels in the radar rainfall field below which we consider there to be no rain. Depends on the amount of clutter typically present. Standard set to 0.0 kmperpixel: float, optional Spatial resolution of the input data (kilometers/pixel). Required if vel_pert_method is not None or mask_method is 'incremental'. timestep: float, optional Time step of the motion vectors (minutes). Required if vel_pert_method is not None or mask_method is 'incremental'. extrap_method: str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. decomp_method: {'fft'}, optional Name of the cascade decomposition method to use. See the documentation of pysteps.cascade.interface. bandpass_filter_method: {'gaussian', 'uniform'}, optional Name of the bandpass filter method to use with the cascade decomposition. See the documentation of pysteps.cascade.interface. noise_method: {'parametric','nonparametric','ssft','nested',None}, optional Name of the noise generator to use for perturbating the precipitation field. See the documentation of pysteps.noise.interface. If set to None, no noise is generated. noise_stddev_adj: {'auto','fixed',None}, optional Optional adjustment for the standard deviations of the noise fields added to each cascade level. This is done to compensate incorrect std. dev. estimates of casace levels due to presence of no-rain areas. 'auto'=use the method implemented in pysteps.noise.utils.compute_noise_stddev_adjs. 'fixed'= use the formula given in :cite:`BPS2006` (eq. 6), None=disable noise std. dev adjustment. ar_order: int, optional The order of the autoregressive model to use. Must be >= 1. vel_pert_method: {'bps',None}, optional Name of the noise generator to use for perturbing the advection field. See the documentation of pysteps.noise.interface. If set to None, the advection field is not perturbed. conditional: bool, optional If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold precip_thr. mask_method: {'obs','sprog','incremental',None}, optional The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'obs' = apply precip_thr to the most recently observed precipitation intensity field, 'sprog' = use the smoothed forecast field from S-PROG, where the AR(p) model has been applied, 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. probmatching_method: {'cdf','mean',None}, optional Method for matching the statistics of the forecast field with those of the most recently observed one. 'cdf'=map the forecast CDF to the observed one, 'mean'=adjust only the conditional mean value of the forecast field in precipitation areas, None=no matching applied. Using 'mean' requires that precip_thr and mask_method are not None. seed: int, optional Optional seed number for the random generators. num_workers: int, optional The number of workers to use for parallel computation. Applicable if dask is enabled or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. fft_method: str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. domain: {"spatial", "spectral"} If "spatial", all computations are done in the spatial domain (the classical STEPS model). If "spectral", the AR(2) models and stochastic perturbations are applied directly in the spectral domain to reduce memory footprint and improve performance :cite:`PCH2019b`. extrap_kwargs: dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs: dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. noise_kwargs: dict, optional Optional dictionary containing keyword arguments for the initializer of the noise generator. See the documentation of pysteps.noise.fftgenerators. vel_pert_kwargs: dict, optional Optional dictionary containing keyword arguments 'p_par' and 'p_perp' for the initializer of the velocity perturbator. The choice of the optimal parameters depends on the domain and the used optical flow method. Default parameters from :cite:`BPS2006`: p_par = [10.88, 0.23, -7.68] p_perp = [5.76, 0.31, -2.72] Parameters fitted to the data (optical flow/domain): darts/fmi: p_par = [13.71259667, 0.15658963, -16.24368207] p_perp = [8.26550355, 0.17820458, -9.54107834] darts/mch: p_par = [24.27562298, 0.11297186, -27.30087471] p_perp = [-7.80797846e+01, -3.38641048e-02, 7.56715304e+01] darts/fmi+mch: p_par = [16.55447057, 0.14160448, -19.24613059] p_perp = [14.75343395, 0.11785398, -16.26151612] lucaskanade/fmi: p_par = [2.20837526, 0.33887032, -2.48995355] p_perp = [2.21722634, 0.32359621, -2.57402761] lucaskanade/mch: p_par = [2.56338484, 0.3330941, -2.99714349] p_perp = [1.31204508, 0.3578426, -1.02499891] lucaskanade/fmi+mch: p_par = [2.31970635, 0.33734287, -2.64972861] p_perp = [1.90769947, 0.33446594, -2.06603662] vet/fmi: p_par = [0.25337388, 0.67542291, 11.04895538] p_perp = [0.02432118, 0.99613295, 7.40146505] vet/mch: p_par = [0.5075159, 0.53895212, 7.90331791] p_perp = [0.68025501, 0.41761289, 4.73793581] vet/fmi+mch: p_par = [0.29495222, 0.62429207, 8.6804131 ] p_perp = [0.23127377, 0.59010281, 5.98180004] fmi=Finland, mch=Switzerland, fmi+mch=both pooled into the same data set The above parameters have been fitten by using run_vel_pert_analysis.py and fit_vel_pert_params.py located in the scripts directory. See pysteps.noise.motion for additional documentation. mask_kwargs: dict Optional dictionary containing mask keyword arguments 'mask_f' and 'mask_rim', the factor defining the the mask increment and the rim size, respectively. The mask increment is defined as mask_f*timestep/kmperpixel. measure_time: bool If set to True, measure, print and return the computation time. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input precipitation fields, respectively. This can be used, for instance, writing the outputs into files. return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. Returns ------- out: ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). See also -------- pysteps.extrapolation.interface, pysteps.cascade.interface, pysteps.noise.interface, pysteps.noise.utils.compute_noise_stddev_adjs References ---------- :cite:`Seed2003`, :cite:`BPS2006`, :cite:`SPN2013`, :cite:`PCH2019b` """ nowcaster_config = StepsNowcasterConfig( n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, precip_threshold=precip_thr, norain_threshold=norain_thr, kmperpixel=kmperpixel, timestep=timestep, extrapolation_method=extrap_method, decomposition_method=decomp_method, bandpass_filter_method=bandpass_filter_method, noise_method=noise_method, noise_stddev_adj=noise_stddev_adj, ar_order=ar_order, velocity_perturbation_method=vel_pert_method, conditional=conditional, probmatching_method=probmatching_method, mask_method=mask_method, seed=seed, num_workers=num_workers, fft_method=fft_method, domain=domain, extrapolation_kwargs=extrap_kwargs, filter_kwargs=filter_kwargs, noise_kwargs=noise_kwargs, velocity_perturbation_kwargs=vel_pert_kwargs, mask_kwargs=mask_kwargs, measure_time=measure_time, callback=callback, return_output=return_output, ) # Create an instance of the new class with all the provided arguments nowcaster = StepsNowcaster( precip, velocity, timesteps, steps_config=nowcaster_config ) forecast_steps_nowcast = nowcaster.compute_forecast() nowcaster.reset_states_and_params() # Call the appropriate methods within the class return forecast_steps_nowcast ================================================ FILE: pysteps/nowcasts/utils.py ================================================ """ pysteps.nowcasts.utils ====================== Module with common utilities used by nowcasts methods. .. autosummary:: :toctree: ../generated/ binned_timesteps compute_dilated_mask compute_percentile_mask nowcast_main_loop print_ar_params print_corrcoefs stack_cascades """ import time import numpy as np from scipy.ndimage import binary_dilation, generate_binary_structure from pysteps import extrapolation try: import dask DASK_IMPORTED = True except ImportError: DASK_IMPORTED = False def binned_timesteps(timesteps): """ Compute a binning of the given irregular time steps. Parameters ---------- timesteps: array_like List or one-dimensional array containing the time steps in ascending order. Returns ------- out: list List of length int(np.ceil(timesteps[-1]))+1 containing the bins. Each element is a list containing the indices of the time steps falling in the bin (excluding the right edge). """ timesteps = list(timesteps) if not sorted(timesteps) == timesteps: raise ValueError("timesteps is not in ascending order") if np.any(np.array(timesteps) < 0): raise ValueError("negative time steps are not allowed") num_bins = int(np.ceil(timesteps[-1])) timestep_range = np.arange(num_bins + 1) bin_idx = np.digitize(timesteps, timestep_range, right=False) out = [[] for _ in range(num_bins + 1)] for i, bi in enumerate(bin_idx): out[bi - 1].append(i) return out def compute_dilated_mask(input_mask, kr, r): """Buffer the input rain mask using the given kernel. Add a grayscale rim for smooth rain/no-rain transition by iteratively dilating the mask. Parameters ---------- input_mask : array_like Two-dimensional boolean array containing the input mask. kr : array_like Structuring element for the dilation. r : int The number of iterations for the dilation. Returns ------- out : array_like The dilated mask normalized to the range [0,1]. """ # buffer the input mask input_mask = np.ndarray.astype(input_mask.copy(), "uint8") mask_dilated = binary_dilation(input_mask, kr) # add grayscale rim kr1 = generate_binary_structure(2, 1) mask = mask_dilated.astype(float) for _ in range(r): mask_dilated = binary_dilation(mask_dilated, kr1) mask += mask_dilated # normalize between 0 and 1 return mask / mask.max() def compute_percentile_mask(precip, pct): """Compute a precipitation mask, where True/False values are assigned for pixels above/below the given percentile. .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- precip: array_like Two-dimensional array of shape (m,n) containing the input precipitation field. pct: float The percentile value. Returns ------- out: ndarray_ Array of shape (m,n), where True/False values are assigned for pixels above/below the precipitation intensity corresponding to the given percentile. """ # obtain the CDF from the input precipitation field precip_s = precip.flatten() # compute the precipitation intensity threshold corresponding to the given # percentile precip_s.sort(kind="quicksort") x = 1.0 * np.arange(1, len(precip_s) + 1)[::-1] / len(precip_s) i = np.argmin(np.abs(x - pct)) # handle ties if precip_s[i] == precip_s[i + 1]: i = np.where(precip_s == precip_s[i])[0][-1] precip_pct_thr = precip_s[i] # determine the mask using the above threshold value return precip >= precip_pct_thr def zero_precipitation_forecast( n_ens_members, timesteps, precip, callback, return_output, measure_time, start_time_init, ): """ Generate a zero-precipitation forecast (filled with the minimum precip value) when no precipitation above the threshold is detected. The forecast is optionally returned or passed to a callback. Parameters ---------- n_ens_members: int, optional The number of ensemble members to generate. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of the list are required to be in ascending order. precip: array-like Array of shape (ar_order+1,m,n) containing the input precipitation fields ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array of shape (n_ens_members,h,w), where h and w are the height and width of the input precipitation fields, respectively. This can be used, for instance, writing the outputs into files. return_output: bool, optional Set to False to disable returning the outputs as numpy arrays. This can save memory if the intermediate results are written to output files using the callback function. measure_time: bool If set to True, measure, print and return the computation time. start_time_init: float The value of the start time counter used to compute total run time. Returns ------- out: ndarray If return_output is True, a four-dimensional array of shape (n_ens_members,num_timesteps,m,n) containing a time series of forecast precipitation fields for each ensemble member. Otherwise, a None value is returned. The time series starts from t0+timestep, where timestep is taken from the input precipitation fields. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). """ print("No precipitation above the threshold found in the radar field") print("The resulting forecast will contain only zeros") return_single_member = False if n_ens_members is None: n_ens_members = 1 return_single_member = True # Create the output list precip_forecast = [[] for j in range(n_ens_members)] # Save per time step to ensure the array does not become too large if # no return_output is requested and callback is not None. timesteps, _, __ = create_timestep_range(timesteps) for t, subtimestep_idx in enumerate(timesteps): # If the timestep is not the first one, we need to provide the zero forecast if t > 0: # Create an empty np array with shape [n_ens_members, rows, cols] # and fill it with the minimum value from precip (corresponding to # zero precipitation) N, M = precip.shape[1:] precip_forecast_workers = np.full((n_ens_members, N, M), np.nanmin(precip)) if subtimestep_idx: if callback is not None: if precip_forecast_workers.shape[1] > 0: callback(precip_forecast_workers.squeeze()) if return_output: for j in range(n_ens_members): precip_forecast[j].append(precip_forecast_workers[j]) precip_forecast_workers = None if measure_time: zero_precip_time = time.time() - start_time_init if return_output: precip_forecast_all_members_all_times = np.stack( [np.stack(precip_forecast[j]) for j in range(n_ens_members)] ) if return_single_member: precip_forecast_all_members_all_times = ( precip_forecast_all_members_all_times[0] ) if measure_time: return ( precip_forecast_all_members_all_times, zero_precip_time, zero_precip_time, ) else: return precip_forecast_all_members_all_times else: return None def create_timestep_range(timesteps): """ create a range of time steps if an integer time step is given, create a simple range iterator otherwise, assing the time steps to integer bins so that each bin contains a list of time steps belonging to that bin """ if isinstance(timesteps, int): timesteps = range(timesteps + 1) timestep_type = "int" original_timesteps = None else: original_timesteps = [0] + list(timesteps) timesteps = binned_timesteps(original_timesteps) timestep_type = "list" return timesteps, original_timesteps, timestep_type def nowcast_main_loop( precip, velocity, state, timesteps, extrap_method, func, extrap_kwargs=None, velocity_pert_gen=None, params=None, ensemble=False, num_ensemble_members=1, callback=None, return_output=True, num_workers=1, measure_time=False, ): """Utility method for advection-based nowcast models that are applied in the Lagrangian coordinates. In addition, this method allows the case, where one or more components of the model (e.g. an autoregressive process) require using regular integer time steps but the user-supplied values are irregular or non-integer. Parameters ---------- precip : array_like Array of shape (m,n) containing the most recently observed precipitation field. velocity : array_like Array of shape (2,m,n) containing the x- and y-components of the advection field. state : object The initial state of the nowcast model. timesteps : int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed. The elements of the list are required to be in ascending order. extrap_method : str, optional Name of the extrapolation method to use. See the documentation of :py:mod:`pysteps.extrapolation.interface`. ensemble : bool Set to True to produce a nowcast ensemble. num_ensemble_members : int Number of ensemble members. Applicable if ensemble is set to True. func : function A function that takes the current state of the nowcast model and its parameters and returns a forecast field and the new state. The shape of the forecast field is expected to be (m,n) for a deterministic nowcast and (n_ens_members,m,n) for an ensemble. extrap_kwargs : dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. velocity_pert_gen : list, optional Optional list of functions that generate velocity perturbations. The length of the list is expected to be n_ens_members. The functions are expected to take lead time (relative to timestep index) as input argument and return a perturbation field of shape (2,m,n). params : dict, optional Optional dictionary containing keyword arguments for func. callback : function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: the nowcast array. This can be used, for instance, writing output files. return_output : bool, optional Set to False to disable returning the output forecast fields and return None instead. This can save memory if the intermediate results are instead written to files using the callback function. num_workers : int, optional Number of parallel workers to use. Applicable if a nowcast ensemble is generated. measure_time : bool, optional If set to True, measure, print and return the computation time. Returns ------- out : list List of forecast fields for the given time steps. If measure_time is True, return a pair, where the second element is the total computation time in the loop. """ precip_forecast_out = None timesteps, original_timesteps, timestep_type = create_timestep_range(timesteps) state_cur = state if not ensemble: precip_forecast_prev = precip[np.newaxis, :] else: precip_forecast_prev = np.stack([precip for _ in range(num_ensemble_members)]) displacement = None t_prev = 0.0 t_total = 0.0 # initialize the extrapolator extrapolator = extrapolation.get_method(extrap_method) x_values, y_values = np.meshgrid( np.arange(precip.shape[1]), np.arange(precip.shape[0]) ) xy_coords = np.stack([x_values, y_values]) if extrap_kwargs is None: extrap_kwargs = dict() else: extrap_kwargs = extrap_kwargs.copy() extrap_kwargs["xy_coords"] = xy_coords extrap_kwargs["return_displacement"] = True if measure_time: starttime_total = time.time() # loop through the integer time steps or bins if non-integer time steps # were given for t, subtimestep_idx in enumerate(timesteps): if timestep_type == "list": subtimesteps = [original_timesteps[t_] for t_ in subtimestep_idx] else: subtimesteps = [t] if (timestep_type == "list" and subtimesteps) or ( timestep_type == "int" and t > 0 ): is_nowcast_time_step = True else: is_nowcast_time_step = False # print a message if nowcasts are computed for the current integer time # step (this is not necessarily the case, since the current bin might # not contain any time steps) if is_nowcast_time_step: print( f"Computing nowcast for time step {t}... ", end="", flush=True, ) if measure_time: starttime = time.time() # call the function to iterate the integer-timestep part of the model # for one time step precip_forecast_new, state_new = func(state_cur, params) if not ensemble: precip_forecast_new = precip_forecast_new[np.newaxis, :] # advect the currect forecast field to the subtimesteps in the current # timestep bin and append the results to the output list # apply temporal interpolation to the forecasts made between the # previous and the next integer time steps for t_sub in subtimesteps: if t_sub > 0: t_diff_prev_int = t_sub - int(t_sub) if t_diff_prev_int > 0.0: precip_forecast_ip = ( 1.0 - t_diff_prev_int ) * precip_forecast_prev + t_diff_prev_int * precip_forecast_new else: precip_forecast_ip = precip_forecast_prev t_diff_prev = t_sub - t_prev t_total += t_diff_prev if displacement is None: displacement = [None for _ in range(precip_forecast_ip.shape[0])] if precip_forecast_out is None and return_output: precip_forecast_out = [ [] for _ in range(precip_forecast_ip.shape[0]) ] precip_forecast_out_cur = [ None for _ in range(precip_forecast_ip.shape[0]) ] def worker1(i): extrap_kwargs_ = extrap_kwargs.copy() extrap_kwargs_["displacement_prev"] = displacement[i] extrap_kwargs_["allow_nonfinite_values"] = ( True if np.any(~np.isfinite(precip_forecast_ip[i])) else False ) if velocity_pert_gen is not None: velocity_ = velocity + velocity_pert_gen[i](t_total) else: velocity_ = velocity precip_forecast_ep, displacement[i] = extrapolator( precip_forecast_ip[i], velocity_, [t_diff_prev], **extrap_kwargs_, ) precip_forecast_out_cur[i] = precip_forecast_ep[0] if return_output: precip_forecast_out[i].append(precip_forecast_ep[0]) if DASK_IMPORTED and ensemble and num_ensemble_members > 1: res = [] for i in range(precip_forecast_ip.shape[0]): res.append(dask.delayed(worker1)(i)) dask.compute(*res, num_workers=num_workers) else: for i in range(precip_forecast_ip.shape[0]): worker1(i) if callback is not None: precip_forecast_out_cur = np.stack(precip_forecast_out_cur) callback(precip_forecast_out_cur) precip_forecast_out_cur = None t_prev = t_sub # advect the forecast field by one time step if no subtimesteps in the # current interval were found if not subtimesteps: t_diff_prev = t + 1 - t_prev t_total += t_diff_prev if displacement is None: displacement = [None for _ in range(precip_forecast_new.shape[0])] def worker2(i): extrap_kwargs_ = extrap_kwargs.copy() extrap_kwargs_["displacement_prev"] = displacement[i] if velocity_pert_gen is not None: velocity_ = velocity + velocity_pert_gen[i](t_total) else: velocity_ = velocity _, displacement[i] = extrapolator( None, velocity_, [t_diff_prev], **extrap_kwargs_, ) if DASK_IMPORTED and ensemble and num_ensemble_members > 1: res = [] for i in range(precip_forecast_new.shape[0]): res.append(dask.delayed(worker2)(i)) dask.compute(*res, num_workers=num_workers) else: for i in range(precip_forecast_new.shape[0]): worker2(i) t_prev = t + 1 precip_forecast_prev = precip_forecast_new state_cur = state_new if is_nowcast_time_step: if measure_time: print(f"{time.time() - starttime:.2f} seconds.") else: print("done.") if return_output: precip_forecast_out = np.stack(precip_forecast_out) if not ensemble: precip_forecast_out = precip_forecast_out[0, :] if measure_time: return precip_forecast_out, time.time() - starttime_total else: return precip_forecast_out def print_ar_params(phi): """ Print the parameters of an AR(p) model. Parameters ---------- phi: array_like Array of shape (n, p) containing the AR(p) parameters for n cascade levels. """ print("****************************************") print("* AR(p) parameters for cascade levels: *") print("****************************************") n = phi.shape[1] hline_str = "---------" for _ in range(n): hline_str += "---------------" title_str = "| Level |" for i in range(n - 1): title_str += " Phi-%d |" % (i + 1) title_str += " Phi-0 |" print(hline_str) print(title_str) print(hline_str) fmt_str = "| %-5d |" for _ in range(n): fmt_str += " %-12.6f |" for i in range(phi.shape[0]): print(fmt_str % ((i + 1,) + tuple(phi[i, :]))) print(hline_str) def print_corrcoefs(gamma): """ Print the parameters of an AR(p) model. Parameters ---------- gamma: array_like Array of shape (m, n) containing n correlation coefficients for m cascade levels. """ print("************************************************") print("* Correlation coefficients for cascade levels: *") print("************************************************") m = gamma.shape[0] n = gamma.shape[1] hline_str = "---------" for _ in range(n): hline_str += "----------------" title_str = "| Level |" for i in range(n): title_str += " Lag-%d |" % (i + 1) print(hline_str) print(title_str) print(hline_str) fmt_str = "| %-5d |" for _ in range(n): fmt_str += " %-13.6f |" for i in range(m): print(fmt_str % ((i + 1,) + tuple(gamma[i, :]))) print(hline_str) def stack_cascades(precip_decomp, n_levels, convert_to_full_arrays=False): """ Stack the given cascades into a larger array. Parameters ---------- precip_decomp: list List of cascades obtained by calling a method implemented in pysteps.cascade.decomposition. n_levels: int The number of cascade levels. Returns ------- out: tuple A list of three-dimensional arrays containing the stacked cascade levels. """ out = [] n_inputs = len(precip_decomp) for i in range(n_levels): precip_cur_level = [] for j in range(n_inputs): precip_cur_input = precip_decomp[j]["cascade_levels"][i] if precip_decomp[j]["compact_output"] and convert_to_full_arrays: precip_tmp = np.zeros( precip_decomp[j]["weight_masks"].shape[1:], dtype=complex ) precip_tmp[precip_decomp[j]["weight_masks"][i]] = precip_cur_input precip_cur_input = precip_tmp precip_cur_level.append(precip_cur_input) out.append(np.stack(precip_cur_level)) if not np.any( [precip_decomp[i]["compact_output"] for i in range(len(precip_decomp))] ): out = np.stack(out) return out ================================================ FILE: pysteps/postprocessing/__init__.py ================================================ # -*- coding: utf-8 -*- """Methods for post-processing of forecasts.""" from . import ensemblestats from .diagnostics import * from .interface import * from .ensemblestats import * ================================================ FILE: pysteps/postprocessing/diagnostics.py ================================================ """ pysteps.postprocessing.diagnostics ==================== Methods for applying diagnostics postprocessing. The methods in this module implement the following interface:: diagnostic_xxx(optional arguments) where **xxx** is the name of the diagnostic to be applied. Available Diagnostics Postprocessors ------------------------ .. autosummary:: :toctree: ../generated/ """ # Add your diagnostic_ function here AND add this method to the _diagnostics_methods # dictionary in postprocessing.interface.py ================================================ FILE: pysteps/postprocessing/ensemblestats.py ================================================ # -*- coding: utf-8 -*- """ pysteps.postprocessing.ensemblestats ==================================== Methods for the computation of ensemble statistics. .. autosummary:: :toctree: ../generated/ mean excprob banddepth """ import numpy as np from scipy.special import comb def mean(X, ignore_nan=False, X_thr=None): """ Compute the mean value from a forecast ensemble field. Parameters ---------- X: array_like Array of shape (k, m, n) containing a k-member ensemble of forecast fields of shape (m, n). ignore_nan: bool If True, ignore nan values. X_thr: float Optional threshold for computing the ensemble mean. Values below **X_thr** are ignored. Returns ------- out: ndarray Array of shape (m, n) containing the ensemble mean. """ X = np.asanyarray(X) X_ndim = X.ndim if X_ndim > 3 or X_ndim <= 1: raise Exception( "Number of dimensions of X should be 2 or 3." + "It was: {}".format(X_ndim) ) elif X.ndim == 2: X = X[None, ...] if ignore_nan or X_thr is not None: if X_thr is not None: X = X.copy() X[X < X_thr] = np.nan return np.nanmean(X, axis=0) else: return np.mean(X, axis=0) def excprob(X, X_thr, ignore_nan=False): """ For a given forecast ensemble field, compute exceedance probabilities for the given intensity thresholds. Parameters ---------- X: array_like Array of shape (k, m, n, ...) containing an k-member ensemble of forecasts with shape (m, n, ...). X_thr: float or a sequence of floats Intensity threshold(s) for which the exceedance probabilities are computed. ignore_nan: bool If True, ignore nan values. Returns ------- out: ndarray Array of shape (len(X_thr), m, n) containing the exceedance probabilities for the given intensity thresholds. If len(X_thr)=1, the first dimension is dropped. """ # Checks X = np.asanyarray(X) X_ndim = X.ndim if X_ndim < 3: raise Exception( f"Number of dimensions of X should be 3 or more. It was: {X_ndim}" ) P = [] if np.isscalar(X_thr): X_thr = [X_thr] scalar_thr = True else: scalar_thr = False for x in X_thr: X_ = np.zeros(X.shape) X_[X >= x] = 1.0 X_[~np.isfinite(X)] = np.nan if ignore_nan: P.append(np.nanmean(X_, axis=0)) else: P.append(np.mean(X_, axis=0)) if not scalar_thr: return np.stack(P) else: return P[0] def banddepth(X, thr=None, norm=False): """ Compute the modified band depth (Lopez-Pintado and Romo, 2009) for a k-member ensemble data set. Implementation of the exact fast algorithm for computing the modified band depth as described in Sun et al (2012). Parameters ---------- X: array_like Array of shape (k, m, ...) representing an ensemble of *k* members (i.e., samples) with shape (m, ...). thr: float Optional threshold for excluding pixels that have no samples equal or above the **thr** value. Returns ------- out: array_like Array of shape *k* containing the (normalized) band depth values for each ensemble member. References ---------- Lopez-Pintado, Sara, and Juan Romo. 2009. "On the Concept of Depth for Functional Data." Journal of the American Statistical Association 104 (486): 718–34. https://doi.org/10.1198/jasa.2009.0108. Sun, Ying, Marc G. Genton, and Douglas W. Nychka. 2012. "Exact Fast Computation of Band Depth for Large Functional Datasets: How Quickly Can One Million Curves Be Ranked?" Stat 1 (1): 68–74. https://doi.org/10.1002/sta4.8. """ # mask invalid pixels if thr is None: thr = np.nanmin(X) mask = np.logical_and(np.all(np.isfinite(X), axis=0), np.any(X >= thr, axis=0)) n = X.shape[0] p = np.sum(mask) depth = np.zeros(n) # assign ranks b = np.random.random((n, p)) order = np.lexsort((b, X[:, mask]), axis=0) # random rank for ties rank = order.argsort(axis=0) + 1 # compute band depth nabove = n - rank nbelow = rank - 1 match = nabove * nbelow nchoose2 = comb(n, 2) proportion = np.sum(match, axis=1) / p depth = (proportion + n - 1) / nchoose2 # normalize depth between 0 and 1 if norm: depth = (depth - depth.min()) / (depth.max() - depth.min()) return depth ================================================ FILE: pysteps/postprocessing/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.postprocessing.interface ==================== Interface for the postprocessing module. Support postprocessing types: - ensmeblestats - diagnostics .. currentmodule:: pysteps.postprocessing.interface .. autosummary:: :toctree: ../generated/ get_method """ import importlib from importlib.metadata import entry_points from pysteps.postprocessing import diagnostics, ensemblestats from pprint import pprint import warnings _diagnostics_methods = dict() _ensemblestats_methods = dict( mean=ensemblestats.mean, excprob=ensemblestats.excprob, banddepth=ensemblestats.banddepth, ) def add_postprocessor( postprocessors_function_name, _postprocessors, module, attributes ): """ Add the postprocessor to the appropriate _methods dictionary and to the module. Parameters ---------- postprocessors_function_name: str for example, e.g. diagnostic_example1 _postprocessors: function the function to be added @param module: the module where the function is added, e.g. 'diagnostics' @param attributes: the existing functions in the selected module """ # the dictionary where the function is added methods_dict = ( _diagnostics_methods if "diagnostic" in module else _ensemblestats_methods ) # get funtion name without mo short_name = postprocessors_function_name.replace(f"{module}_", "") if short_name not in methods_dict: methods_dict[short_name] = _postprocessors else: warnings.warn( f"The {module} identifier '{short_name}' is already available in " f"'pysteps.postprocessing.interface_{module}_methods'.\n" f"Skipping {module}:{'.'.join(attributes)}", RuntimeWarning, ) if hasattr(globals()[module], postprocessors_function_name): warnings.warn( f"The {module} function '{short_name}' is already an attribute" f"of 'pysteps.postprocessing.{module}'.\n" f"Skipping {module}:{'.'.join(attributes)}", RuntimeWarning, ) else: setattr(globals()[module], postprocessors_function_name, _postprocessors) def discover_postprocessors(): """ Search for installed postprocessing plugins in the entrypoint 'pysteps.plugins.postprocessors' The postprocessors found are added to the appropriate `_methods` dictionary in 'pysteps.postprocessing.interface' containing the available postprocessors. """ # Discover the postprocessors available in the plugins for plugintype in ["diagnostic", "ensemblestat"]: for entry_point in entry_points(group=f"pysteps.plugins.{plugintype}"): _postprocessors = entry_point.load() postprocessors_function_name = _postprocessors.__name__ if plugintype in entry_point.module: add_postprocessor( postprocessors_function_name, _postprocessors, f"{plugintype}s", entry_point.attr, ) def print_postprocessors_info(module_name, interface_methods, module_methods): """ Helper function to print the postprocessors available in the module and in the interface. Parameters ---------- module_name: str Name of the module, for example 'pysteps.postprocessing.diagnostics'. interface_methods: dict Dictionary of the postprocessors declared in the interface, for example _diagnostics_methods. module_methods: list List of the postprocessors available in the module, for example 'diagnostic_example1'. """ print(f"\npostprocessors available in the {module_name} module") pprint(module_methods) print( "\npostprocessors available in the pysteps.postprocessing.get_method interface" ) pprint([(short_name, f.__name__) for short_name, f in interface_methods.items()]) module_methods_set = set(module_methods) interface_methods_set = set(interface_methods.keys()) difference = module_methods_set ^ interface_methods_set if len(difference) > 0: # print("\nIMPORTANT:") _diff = module_methods_set - interface_methods_set if len(_diff) > 0: print( f"\nIMPORTANT:\nThe following postprocessors are available in {module_name} module but not in the pysteps.postprocessing.get_method interface" ) pprint(_diff) _diff = interface_methods_set - module_methods_set if len(_diff) > 0: print( "\nWARNING:\n" f"The following postprocessors are available in the pysteps.postprocessing.get_method interface but not in the {module_name} module" ) pprint(_diff) def postprocessors_info(): """Print all the available postprocessors.""" available_postprocessors = set() postprocessors_in_the_interface = set() # List the plugins that have been added to the postprocessing.[plugintype] module for plugintype in ["diagnostics", "ensemblestats"]: # in the dictionary and found by get_methods() function interface_methods = ( _diagnostics_methods if plugintype == "diagnostics" else _ensemblestats_methods ) # in the pysteps.postprocessing module module_name = f"pysteps.postprocessing.{plugintype}" available_module_methods = [ attr for attr in dir(importlib.import_module(module_name)) if attr.startswith(plugintype[:-1]) ] # add the pre-existing ensemblestats functions (see _ensemblestats_methods above) # that do not follow the convention to start with "ensemblestat_" as the plugins if "ensemblestats" in plugintype: available_module_methods += [ em for em in _ensemblestats_methods.keys() if not em.startswith("ensemblestat_") ] print_postprocessors_info( module_name, interface_methods, available_module_methods ) available_postprocessors = available_postprocessors.union( available_module_methods ) postprocessors_in_the_interface = postprocessors_in_the_interface.union( interface_methods.keys() ) return available_postprocessors, postprocessors_in_the_interface def get_method(name, method_type): """ Return a callable function for the method corresponding to the given name. Parameters ---------- name: str Name of the method. The available options are:\n diagnostics: [nothing pre-installed] ensemblestats: pre-installed: mean, excprob, banddepth Additional options might exist if plugins are installed. method_type: {'diagnostics', 'ensemblestats'} Type of the method (see tables above). """ if isinstance(method_type, str): method_type = method_type.lower() else: raise TypeError( "Only strings supported for for the method_type" + " argument\n" + "The available types are: 'diagnostics', 'ensemblestats'" ) from None if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "\nAvailable diagnostics names:" + str(list(_diagnostics_methods.keys())) + "\nAvailable ensemblestats names:" + str(list(_ensemblestats_methods.keys())) ) from None if method_type == "diagnostics": methods_dict = _diagnostics_methods elif method_type == "ensemblestats": methods_dict = _ensemblestats_methods else: raise ValueError( "Unknown method type {}\n".format(method_type) + "The available types are: 'diagnostics', 'ensemblestats'" ) from None try: return methods_dict[name] except KeyError: raise ValueError( "Unknown {} method {}\n".format(method_type, name) + "The available methods are:" + str(list(methods_dict.keys())) ) from None ================================================ FILE: pysteps/postprocessing/probmatching.py ================================================ # -*- coding: utf-8 -*- """ pysteps.postprocessing.probmatching =================================== Methods for matching the probability distribution of two data sets. .. autosummary:: :toctree: ../generated/ compute_empirical_cdf nonparam_match_empirical_cdf pmm_init pmm_compute shift_scale resample_distributions """ import numpy as np from scipy import interpolate as sip from scipy import optimize as sop def compute_empirical_cdf(bin_edges, hist): """ Compute an empirical cumulative distribution function from the given histogram. Parameters ---------- bin_edges: array_like Coordinates of left edges of the histogram bins. hist: array_like Histogram counts for each bin. Returns ------- out: ndarray CDF values corresponding to the bin edges. """ cdf = [] xs = 0.0 for x, h in zip(zip(bin_edges[:-1], bin_edges[1:]), hist): cdf.append(xs) xs += (x[1] - x[0]) * h cdf.append(xs) cdf = np.array(cdf) / xs return cdf def nonparam_match_empirical_cdf(initial_array, target_array, ignore_indices=None): """ Matches the empirical CDF of the initial array with the empirical CDF of a target array. Initial ranks are conserved, but empirical distribution matches the target one. Zero-pixels (i.e. pixels having the minimum value) in the initial array are conserved. Parameters ---------- initial_array: array_like The initial array whose CDF is to be matched with the target. target_array: array_like The target array ignore_indices: array_like, optional Indices of pixels in the initial_array which are to be ignored (not rescaled) or an array of booleans with True at the pixel locations to be ignored in initial_array and False elsewhere. Returns ------- output_array: ndarray The matched array of the same shape as the initial array. """ if np.all(np.isnan(initial_array)): raise ValueError("Initial array contains only nans.") if initial_array.size != target_array.size: raise ValueError( "dimension mismatch between initial_array and target_array: " f"initial_array.shape={initial_array.shape}, target_array.shape={target_array.shape}" ) initial_array_copy = np.array(initial_array, dtype=float) target_array = np.array(target_array, dtype=float) # Determine zero in initial array and set nans to zero zvalue = np.nanmin(initial_array_copy) if ignore_indices is not None: initial_array_copy[ignore_indices] = zvalue # Check if there are still nans left after setting the values at ignore_indices to zero. if np.any(~np.isfinite(initial_array_copy)): raise ValueError( "Initial array contains non-finite values outside ignore_indices mask." ) idxzeros = initial_array_copy == zvalue # Determine zero of target_array and set nans to zero. zvalue_trg = np.nanmin(target_array) target_array = np.where(np.isnan(target_array), zvalue_trg, target_array) # adjust the fraction of rain in target distribution if the number of # nonzeros is greater than in the initial array (the lowest values will be set to zero) if np.sum(target_array > zvalue_trg) > np.sum(initial_array_copy > zvalue): war = np.sum(initial_array_copy > zvalue) / initial_array_copy.size p = np.percentile(target_array, 100 * (1 - war)) target_array[target_array < p] = zvalue_trg # flatten the arrays without copying them arrayshape = initial_array_copy.shape target_array = target_array.reshape(-1) initial_array_copy = initial_array_copy.reshape(-1) # rank target values order = target_array.argsort() ranked = target_array[order] # rank initial values order orderin = initial_array_copy.argsort() ranks = np.empty(len(initial_array_copy), int) ranks[orderin] = np.arange(len(initial_array_copy)) # get ranked values from target and rearrange with the initial order output_array = ranked[ranks] # reshape to the original array dimensions output_array = output_array.reshape(arrayshape) # read original zeros output_array[idxzeros] = zvalue_trg # Put back the original values outside the nan-mask of the target array. if ignore_indices is not None: output_array[ignore_indices] = initial_array[ignore_indices] return output_array # TODO: A more detailed explanation of the PMM method + references. def pmm_init(bin_edges_1, cdf_1, bin_edges_2, cdf_2): """ Initialize a probability matching method (PMM) object from binned cumulative distribution functions (CDF). Parameters ---------- bin_edges_1: array_like Coordinates of the left bin edges of the source cdf. cdf_1: array_like Values of the source CDF at the bin edges. bin_edges_2: array_like Coordinates of the left bin edges of the target cdf. cdf_2: array_like Values of the target CDF at the bin edges. """ pmm = {} pmm["bin_edges_1"] = bin_edges_1.copy() pmm["cdf_1"] = cdf_1.copy() pmm["bin_edges_2"] = bin_edges_2.copy() pmm["cdf_2"] = cdf_2.copy() pmm["cdf_interpolator"] = sip.interp1d(bin_edges_1, cdf_1, kind="linear") return pmm def pmm_compute(pmm, x): """ For a given PMM object and x-coordinate, compute the probability matched value (i.e. the x-coordinate for which the target CDF has the same value as the source CDF). Parameters ---------- pmm: dict A PMM object returned by pmm_init. x: float The coordinate for which to compute the probability matched value. """ mask = np.logical_and(x >= pmm["bin_edges_1"][0], x <= pmm["bin_edges_1"][-1]) p = pmm["cdf_interpolator"](x[mask]) result = np.ones(len(mask)) * np.nan result[mask] = _invfunc(p, pmm["bin_edges_2"], pmm["cdf_2"]) return result def shift_scale(R, f, rain_fraction_trg, second_moment_trg, **kwargs): """ Find shift and scale that is needed to return the required second_moment and rain area. The optimization is performed with the Nelder-Mead algorithm available in scipy. It assumes a forward transformation ln_rain = ln(rain)-ln(min_rain) if rain > min_rain, else 0. Parameters ---------- R: array_like The initial array to be shift and scaled. f: function The inverse transformation that is applied after the shift and scale. rain_fraction_trg: float The required rain fraction to be matched by shifting. second_moment_trg: float The required second moment to be matched by scaling. The second_moment is defined as second_moment = var + mean^2. Other Parameters ---------------- scale: float Optional initial value of the scale parameter for the Nelder-Mead optimisation. Typically, this would be the scale parameter estimated the previous time step. Default: 1. max_iterations: int Maximum allowed number of iterations and function evaluations. More details: https://docs.scipy.org/doc/scipy/reference/optimize.minimize-neldermead.html Deafult: 100. tol: float Tolerance for termination. More details: https://docs.scipy.org/doc/scipy/reference/optimize.minimize-neldermead.html Default: 0.05*second_moment_trg, i.e. terminate the search if the error is less than 5% since the second moment is a bit unstable. Returns ------- shift: float The shift value that produces the required rain fraction. scale: float The scale value that produces the required second_moment. R: array_like The shifted, scaled and back-transformed array. """ shape = R.shape R = R.flatten() # defaults scale = kwargs.get("scale", 1.0) max_iterations = kwargs.get("max_iterations", 100) tol = kwargs.get("tol", 0.05 * second_moment_trg) # calculate the shift parameter based on the required rain fraction shift = np.percentile(R, 100 * (1 - rain_fraction_trg)) idx_wet = R > shift # define objective function def _get_error(scale): R_ = np.zeros_like(R) R_[idx_wet] = f((R[idx_wet] - shift) * scale) R_[~idx_wet] = 0 second_moment = np.nanstd(R_) ** 2 + np.nanmean(R_) ** 2 return np.abs(second_moment - second_moment_trg) # Nelder-Mead optimisation nm_scale = sop.minimize( _get_error, scale, method="Nelder-Mead", tol=tol, options={"disp": False, "maxiter": max_iterations}, ) scale = nm_scale["x"][0] R[idx_wet] = f((R[idx_wet] - shift) * scale) R[~idx_wet] = 0 return shift, scale, R.reshape(shape) def resample_distributions( first_array, second_array, probability_first_array, randgen=np.random ): """ Merges two distributions (e.g., from the extrapolation nowcast and NWP in the blending module) to effectively combine two distributions for probability matching without losing extremes. Entries for which one array has a nan will not be included from the other array either. Parameters ---------- first_array: array_like One of the two arrays from which the distribution should be sampled (e.g., the extrapolation cascade). It must be of the same shape as `second_array`. Input must not contain NaNs. second_array: array_like One of the two arrays from which the distribution should be sampled (e.g., the NWP (model) cascade). It must be of the same shape as `first_array`. Input must not contain NaNs. probability_first_array: float The weight that `first_array` should get (a value between 0 and 1). This determines the likelihood of selecting elements from `first_array` over `second_array`. randgen: numpy.random or numpy.RandomState The random number generator to be used for the binomial distribution. You can pass a seeded random state here for reproducibility. Default is numpy.random. Returns ------- csort: array_like The combined output distribution. This is an array of the same shape as the input arrays, where each element is chosen from either `first_array` or `second_array` based on the specified probability, and then sorted in descending order. Raises ------ ValueError If `first_array` and `second_array` do not have the same shape. """ # Valide inputs if first_array.shape != second_array.shape: raise ValueError("first_array and second_array must have the same shape") probability_first_array = np.clip(probability_first_array, 0.0, 1.0) # Propagate the NaN values of the arrays to each other if there are any; convert to float to make sure this works. nanmask = np.isnan(first_array) | np.isnan(second_array) if np.any(nanmask): first_array = first_array.astype(float) first_array[nanmask] = np.nan second_array = second_array.astype(float) second_array[nanmask] = np.nan # Flatten and sort the arrays asort = np.sort(first_array, axis=None)[::-1] bsort = np.sort(second_array, axis=None)[::-1] n = asort.shape[0] # Resample the distributions idxsamples = randgen.binomial(1, probability_first_array, n).astype(bool) csort = np.where(idxsamples, asort, bsort) csort = np.sort(csort)[::-1] # Return the resampled array in descending order (starting with the nan values) return csort def _invfunc(y, fx, fy): if len(y) == 0: return np.array([]) b = np.digitize(y, fy) mask = np.logical_and(b > 0, b < len(fy)) c = (y[mask] - fy[b[mask] - 1]) / (fy[b[mask]] - fy[b[mask] - 1]) result = np.ones(len(y)) * np.nan result[mask] = c * fx[b[mask]] + (1.0 - c) * fx[b[mask] - 1] return result ================================================ FILE: pysteps/pystepsrc ================================================ // pysteps configuration { // "silent_import" : whether to suppress the initial pysteps message "silent_import": false, "outputs": { // path_outputs : path where to save results (figures, forecasts, etc) "path_outputs": "./" }, "plot": { // "motion_plot" : "streamplot" or "quiver" "motion_plot": "quiver", // "colorscale" : "BOM-RF3", "pysteps" or "STEPS-BE" "colorscale": "pysteps" }, "data_sources": { "bom": { "root_path": "./radar/bom", "path_fmt": "prcp-cscn/2/%Y/%m/%d", "fn_pattern": "2_%Y%m%d_%H%M00.prcp-cscn", "fn_ext": "nc", "importer": "bom_rf3", "timestep": 6, "importer_kwargs": { "gzipped": true } }, "dwd": { "root_path": "./radar/dwd/RY", "path_fmt": "%Y/%m/%d", "fn_pattern": "%Y%m%d_%H%M_RY", "fn_ext": "h5", "importer": "dwd_hdf5", "timestep": 5, "importer_kwargs": { "qty": "RATE" } }, "fmi": { "root_path": "./radar/fmi/pgm", "path_fmt": "%Y%m%d", "fn_pattern": "%Y%m%d%H%M_fmi.radar.composite.lowest_FIN_SUOMI1", "fn_ext": "pgm.gz", "importer": "fmi_pgm", "timestep": 5, "importer_kwargs": { "gzipped": true } }, "fmi_geotiff": { "root_path": "./radar/fmi/geotiff", "path_fmt": "%Y%m%d", "fn_pattern": "%Y%m%d%H%M_FINUTM.tif", "fn_ext": "tif", "importer": "geotiff", "timestep": 5, "importer_kwargs": {} }, "mch": { "root_path": "./radar/mch", "path_fmt": "%Y%m%d", "fn_pattern": "AQC%y%j%H%M?_00005.801", "fn_ext": "gif", "importer": "mch_gif", "timestep": 5, "importer_kwargs": { "product": "AQC", "unit": "mm", "accutime": 5 } }, "mrms": { "root_path": "./mrms", "path_fmt": "%Y/%m/%d", "fn_pattern": "PrecipRate_00.00_%Y%m%d-%H%M%S", "fn_ext": "grib2", "importer": "mrms_grib", "timestep": 2, "importer_kwargs": {} }, "opera": { "root_path": "./radar/OPERA", "path_fmt": "%Y%m%d", "fn_pattern": "T_PAAH21_C_EUOC_%Y%m%d%H%M%S", "fn_ext": "hdf", "importer": "opera_hdf5", "timestep": 15, "importer_kwargs": {} }, "knmi": { "root_path": "./radar/KNMI", "path_fmt": "%Y/%m", "fn_pattern": "RAD_NL25_RAP_5min_%Y%m%d%H%M", "fn_ext": "h5", "importer": "knmi_hdf5", "timestep": 5, "importer_kwargs": { "accutime": 5, "qty": "ACRR", "pixelsize": 1000.0 } }, "rmi": { "root_path": "./radar/rmi/radqpe", "path_fmt": "%Y%m%d", "fn_pattern": "%Y%m%d%H%M00.rad.best.comp.rate.qpe", "fn_ext": "hdf", "importer": "odim_hdf5", "timestep": 5, "importer_kwargs": { "accutime": 5.0 } }, "saf": { "root_path": "./saf", "path_fmt": "%Y%m%d/CRR", "fn_pattern": "S_NWC_CRR_MSG4_Europe-VISIR_%Y%m%dT%H%M00Z", "fn_ext": "nc", "importer": "saf_crri", "timestep": 15, "importer_kwargs": { "gzipped": true } }, "bom_nwp": { "root_path": "./nwp/bom", "path_fmt": "%Y/%m/%d", "fn_pattern": "%Y%m%d_%H00_regrid_short", "fn_ext": "nc", "importer": "bom_nwp", "timestep": 10, "importer_kwargs": { "gzipped": true } }, "dwd_nwp": { "root_path": "./nwp/dwd", "path_fmt": "%Y/%m/%d", "fn_ext": "grib2", "fn_pattern": "%Y%m%d_%H%M_PR_GSP_060_120", "importer": "dwd_nwp", "timestep": 5, "importer_kwargs": { "varname": "lsprate", "grid_file_path": "./aux/grid_files/dwd/icon/R19B07/icon_grid_0047_R19B07_L.nc" } }, "knmi_nwp": { "root_path": "./nwp/knmi", "path_fmt": "%Y/%m/%d", "fn_pattern": "%Y%m%d_%H00_Pforecast_Harmonie", "fn_ext": "nc", "importer": "knmi_nwp", "timestep": 60, "importer_kwargs": { "gzipped": true } }, "rmi_nwp": { "root_path": "./nwp/rmi", "path_fmt": "%Y/%m/%d", "fn_pattern": "ao13_%Y%m%d%H_native_5min", "fn_ext": "nc", "importer": "rmi_nwp", "timestep": 5, "importer_kwargs": { "gzipped": true } } } } ================================================ FILE: pysteps/pystepsrc_schema.json ================================================ { "title": "pystepsrc params", "description": "Pysteps default parameters", "required": [ "outputs", "plot", "data_sources" ], "type": "object", "properties": { "outputs": { "type": "object", "required": [ "path_outputs" ], "properties": { "path_outputs": { "type": "string" } } }, "plot": { "type": "object", "required": [ "motion_plot", "colorscale" ], "properties": { "motion_plot": { "type": "string" }, "colorscale": { "type": "string" } } }, "data_sources": { "type": "object", "patternProperties": { "": { "type": "object", "required": [ "root_path", "path_fmt", "fn_pattern", "fn_ext", "importer", "timestep", "importer_kwargs" ], "properties": { "root_path": { "type": "string" }, "path_fmt": { "type": "string" }, "fn_pattern": { "type": "string" }, "fn_ext": { "type": "string" }, "importer": { "type": "string" }, "timestep": { "type": "number" }, "importer_kwargs": { "type": "object" } } } } } } } ================================================ FILE: pysteps/scripts/__init__.py ================================================ # -*- coding: utf-8 -*- """ Standalone utility scripts for pysteps (e.g. parameter estimation from the given data). """ ================================================ FILE: pysteps/scripts/fit_vel_pert_params.py ================================================ # -*- coding: utf-8 -*- """Fit STEPS motion perturbation parameters to the output of run_vel_pert_analysis.py and optionally plots the results. For a description of the method, see :cite:`BPS2006`.""" import argparse import pickle from matplotlib import pyplot import numpy as np from scipy.optimize import curve_fit description = ( "Fit STEPS motion perturbation parameters to the results produced" " by run_vel_pert_analysis.py and optionally plot the results." ) argparser = argparse.ArgumentParser(description=description) argparser.add_argument("inputfile", type=str, help="name of the input file") argparser.add_argument( "--plot", nargs="?", type=str, metavar="filename", help="plot the results and save the figure to ", ) args = argparser.parse_args() with open(args.inputfile, "rb") as f: results = pickle.load(f) f = lambda t, a, b, c: a * pow(t, b) + c leadtimes = sorted(results.keys()) std_par = [] std_perp = [] for lt in leadtimes: dp_par_sum = results[lt]["dp_par_sum"] dp_par_sq_sum = results[lt]["dp_par_sq_sum"] dp_par_n = results[lt]["n_samples"] mu = dp_par_sum / dp_par_n std_par.append( np.sqrt((dp_par_sq_sum - 2 * mu * dp_par_sum + dp_par_n * mu**2) / dp_par_n) ) dp_perp_sum = results[lt]["dp_perp_sum"] dp_perp_sq_sum = results[lt]["dp_perp_sq_sum"] dp_perp_n = results[lt]["n_samples"] mu = dp_perp_sum / dp_perp_n std_perp.append( np.sqrt((dp_perp_sq_sum - 2 * mu * dp_perp_sum + dp_perp_n * mu**2) / dp_perp_n) ) try: p_par = curve_fit(f, leadtimes, std_par)[0] p_perp = curve_fit(f, leadtimes, std_perp)[0] fit_succeeded = True print("p_par = %s" % str(p_par)) print("p_perp = %s" % str(p_perp)) except RuntimeError: fit_succeeded = False print("Parameter fitting failed.") if args.plot is not None: pyplot.figure() pyplot.scatter(leadtimes, std_par, c="r") t = np.linspace(0.5 * leadtimes[0], 1.025 * leadtimes[-1], 200) pyplot.scatter(leadtimes, std_perp, c="g") if fit_succeeded: (l1,) = pyplot.plot(t, f(t, *p_par), "r-") (l2,) = pyplot.plot(t, f(t, *p_perp), "g-") p_str_1 = lambda p: "%.2f\\cdot t^{%.2f}+%.2f" % (p[0], p[1], p[2]) p_str_2 = lambda p: "%.2f\\cdot t^{%.2f}%.2f" % (p[0], p[1], p[2]) if fit_succeeded: lbl = lambda p: p_str_1(p) if p[2] > 0.0 else p_str_2(p) pyplot.legend( [l1, l2], [ "Parallel: $f(t)=%s$" % lbl(p_par), "Perpendicular: $f(t)=%s$" % lbl(p_perp), ], fontsize=12, ) pyplot.xlim(0.5 * leadtimes[0], 1.025 * leadtimes[-1]) pyplot.xlabel("Lead time (minutes)", fontsize=12) pyplot.ylabel("Standard deviation of differences (km/h)", fontsize=12) pyplot.grid(True) pyplot.savefig(args.plot, bbox_inches="tight") ================================================ FILE: pysteps/scripts/run_vel_pert_analysis.py ================================================ # -*- coding: utf-8 -*- """Analyze uncertainty of motion field with increasing lead time. The analyses are done by comparing initial motion fields to those estimated in the future. For a description of the method, see :cite:`BPS2006`.""" import argparse from datetime import datetime, timedelta import pickle import numpy as np from scipy import linalg as la from pysteps import io, motion from pysteps import rcparams from pysteps.utils import transformation # TODO: Don't hard-code these. num_prev_files = 9 use_precip_mask = False R_min = 0.1 argparser = argparse.ArgumentParser( description="Estimate motion perturbation parameters for STEPS." ) argparser.add_argument("startdate", type=str, help="start date (YYYYmmDDHHMM)") argparser.add_argument("enddate", type=str, help="end date (YYYYmmDDHHMM)") argparser.add_argument("datasource", type=str, help="data source to use") argparser.add_argument( "oflow", type=str, help="optical flow method to use (darts, lucaskanade or vet)" ) argparser.add_argument( "maxleadtime", type=int, help="maximum lead time for the analyses (minutes)" ) argparser.add_argument("outfile", type=str, help="output file name") argparser.add_argument( "--accum", nargs="?", type=str, metavar="filename", help="accumulate statistics to previously computed file ", ) args = argparser.parse_args() datasource = rcparams["data_sources"][args.datasource] startdate = datetime.strptime(args.startdate, "%Y%m%d%H%M") enddate = datetime.strptime(args.enddate, "%Y%m%d%H%M") importer = io.get_method(datasource["importer"], "importer") motionfields = {} oflow = motion.get_method(args.oflow) # compute motion fields # --------------------- # TODO: This keeps all motion fields in memory during the analysis period, which # can take a lot of memory. curdate = startdate while curdate <= enddate: try: fns = io.archive.find_by_date( curdate, datasource["root_path"], datasource["path_fmt"], datasource["fn_pattern"], datasource["fn_ext"], datasource["timestep"], num_prev_files=9, ) except IOError: curdate += timedelta(minutes=datasource["timestep"]) continue if any([fn[0] is None for fn in fns]): curdate += timedelta(minutes=datasource["timestep"]) continue R, _, metadata = io.readers.read_timeseries( fns, importer, **datasource["importer_kwargs"] ) # TODO: Here we assume that metadata["xpixelsize"] = metadata["ypixelsize"] vsf = 60.0 / datasource["timestep"] * metadata["xpixelsize"] / 1000.0 missing_data = False for i in range(R.shape[0]): if not np.any(np.isfinite(R[i, :, :])): missing_data = True break if missing_data: curdate += timedelta(minutes=datasource["timestep"]) continue R[~np.isfinite(R)] = metadata["zerovalue"] if use_precip_mask: MASK = np.any(R < R_min, axis=0) R = transformation.dB_transform(R)[0] if args.oflow == "vet": R_ = R[-2:, :, :] else: R_ = R # TODO: Allow the user to supply parameters for the optical flow. V = oflow(R_) * vsf # discard the motion field if the mean velocity is abnormally large if np.nanmean(np.linalg.norm(V, axis=0)) > 0.5 * R.shape[1]: curdate += timedelta(minutes=datasource["timestep"]) continue if use_precip_mask: V[0, :, :][MASK] = np.nan V[1, :, :][MASK] = np.nan motionfields[curdate] = V.astype(np.float32) curdate += timedelta(minutes=datasource["timestep"]) # compare initial and future motion fields # ---------------------------------------- dates = sorted(motionfields.keys()) if args.accum is None: results = {} else: with open(args.accum, "rb") as f: results = pickle.load(f) for i, date1 in enumerate(dates): V1 = motionfields[date1].astype(float) if not use_precip_mask: N = la.norm(V1, axis=0) else: N = np.ones(V1.shape[1:]) * np.nan MASK = np.isfinite(V1[0, :, :]) N[MASK] = la.norm(V1[:, MASK], axis=0) V1_par = V1 / N V1_perp = np.stack([-V1_par[1, :, :], V1_par[0, :, :]]) if date1 + timedelta(minutes=args.maxleadtime) > enddate: continue for date2 in dates[i + 1 :]: lt = (date2 - date1).total_seconds() / 60 if lt > args.maxleadtime: continue V2 = motionfields[date2].astype(float) DV = V2 - V1 DP_par = DV[0, :, :] * V1_par[0, :, :] + DV[1, :, :] * V1_par[1, :, :] DP_perp = DV[0, :, :] * V1_perp[0, :, :] + DV[1, :, :] * V1_perp[1, :, :] if not lt in results.keys(): results[lt] = {} results[lt]["dp_par_sum"] = 0.0 results[lt]["dp_par_sq_sum"] = 0.0 results[lt]["dp_perp_sum"] = 0.0 results[lt]["dp_perp_sq_sum"] = 0.0 results[lt]["n_samples"] = 0 if use_precip_mask: MASK = np.logical_and(np.isfinite(V1[0, :, :]), np.isfinite(V2[0, :, :])) DP_par = DP_par[MASK] DP_perp = DP_perp[MASK] n_samples = np.sum(MASK) else: n_samples = DP_par.size results[lt]["dp_par_sum"] += np.sum(DP_par) results[lt]["dp_par_sq_sum"] += np.sum(DP_par**2) results[lt]["dp_perp_sum"] += np.sum(DP_perp) results[lt]["dp_perp_sq_sum"] += np.sum(DP_perp**2) results[lt]["n_samples"] += n_samples with open("%s" % args.outfile, "wb") as f: pickle.dump(results, f) ================================================ FILE: pysteps/tests/__init__.py ================================================ ================================================ FILE: pysteps/tests/helpers.py ================================================ """ Testing helper functions ======================= Collection of helper functions for the testing suite. """ from datetime import datetime import numpy as np import pytest import pysteps as stp from pysteps import io, rcparams from pysteps.utils import aggregate_fields_space _reference_dates = dict() _reference_dates["bom"] = datetime(2018, 6, 16, 10, 0) _reference_dates["fmi"] = datetime(2016, 9, 28, 16, 0) _reference_dates["knmi"] = datetime(2010, 8, 26, 0, 0) _reference_dates["mch"] = datetime(2015, 5, 15, 16, 30) _reference_dates["dwd"] = datetime(2025, 6, 4, 17, 0) _reference_dates["opera"] = datetime(2018, 8, 24, 18, 0) _reference_dates["saf"] = datetime(2018, 6, 1, 7, 0) _reference_dates["mrms"] = datetime(2019, 6, 10, 0, 0) def get_precipitation_fields( num_prev_files=0, num_next_files=0, return_raw=False, metadata=False, upscale=None, source="mch", log_transform=True, clip=None, **importer_kwargs, ): """ Get a precipitation field from the archive to be used as reference. Source: bom Reference time: 2018/06/16 10000 UTC Source: fmi Reference time: 2016/09/28 1600 UTC Source: knmi Reference time: 2010/08/26 0000 UTC Source: mch Reference time: 2015/05/15 1630 UTC Source: dwd Reference time: 2025/06/04 1700 UTC Source: opera Reference time: 2018/08/24 1800 UTC Source: saf Reference time: 2018/06/01 0700 UTC Source: mrms Reference time: 2019/06/10 0000 UTC Parameters ---------- num_prev_files: int, optional Number of previous times (files) to return with respect to the reference time. num_next_files: int, optional Number of future times (files) to return with respect to the reference time. return_raw: bool, optional Do not preprocess the precipitation fields. False by default. The pre-processing steps are: 1) Convert to mm/h, 2) Mask invalid values, 3) Log-transform the data [dBR]. metadata: bool, optional If True, also return file metadata. upscale: float or None, optional Upscale fields in space during the pre-processing steps. If it is None, the precipitation field is not modified. If it is a float, represents the length of the space window that is used to upscale the fields. source: {"bom", "fmi" , "knmi", "mch", "opera", "saf", "mrms"}, optional Name of the data source to be used. log_transform: bool Whether to transform the output to dB. clip: scalars (left, right, bottom, top), optional The extent of the bounding box in data coordinates to be used to clip the data. Other Parameters ---------------- importer_kwargs : dict Additional keyword arguments passed to the importer. Returns ------- reference_field : array metadata : dict """ if source == "bom": pytest.importorskip("netCDF4") if source == "fmi": pytest.importorskip("pyproj") if source == "knmi": pytest.importorskip("h5py") if source == "mch": pytest.importorskip("PIL") if source == "dwd": pytest.importorskip("h5py") if source == "opera": pytest.importorskip("h5py") if source == "saf": pytest.importorskip("netCDF4") if source == "mrms": pytest.importorskip("pygrib") try: date = _reference_dates[source] except KeyError: raise ValueError( f"Unknown source name '{source}'\n" "The available data sources are: " f"{str(list(_reference_dates.keys()))}" ) data_source = rcparams.data_sources[source] root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] _importer_kwargs = data_source["importer_kwargs"].copy() _importer_kwargs.update(**importer_kwargs) timestep = data_source["timestep"] # Find the input files from the archive fns = io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=timestep, num_prev_files=num_prev_files, num_next_files=num_next_files, ) # Read the radar composites importer = io.get_method(importer_name, "importer") reference_field, __, ref_metadata = io.read_timeseries( fns, importer, **_importer_kwargs ) if not return_raw: if (num_prev_files == 0) and (num_next_files == 0): # Remove time dimension reference_field = np.squeeze(reference_field) # Convert to mm/h reference_field, ref_metadata = stp.utils.to_rainrate( reference_field, ref_metadata ) # Clip domain reference_field, ref_metadata = stp.utils.clip_domain( reference_field, ref_metadata, clip ) # Upscale data reference_field, ref_metadata = aggregate_fields_space( reference_field, ref_metadata, upscale ) # Mask invalid values reference_field = np.ma.masked_invalid(reference_field) if log_transform: # Log-transform the data [dBR] reference_field, ref_metadata = stp.utils.dB_transform( reference_field, ref_metadata, threshold=0.1, zerovalue=-15.0 ) # Set missing values with the fill value np.ma.set_fill_value(reference_field, ref_metadata["zerovalue"]) reference_field.data[reference_field.mask] = ref_metadata["zerovalue"] if metadata: return reference_field, ref_metadata return reference_field def smart_assert(actual_value, expected, tolerance=None): """ Assert by equality for non-numeric values, or by approximation otherwise. If the precision keyword is None, assert by equality. When the precision is not None, assert that two numeric values (or two sets of numbers) are equal to each other within the tolerance. """ if tolerance is None: assert actual_value == expected else: # Compare numbers up to a certain precision assert actual_value == pytest.approx( expected, rel=tolerance, abs=tolerance, nan_ok=True ) def get_invalid_mask(input_array, fillna=np.nan): """ Return a bool array indicating the invalid values in ``input_array``. If the input array is a MaskedArray, its mask will be returned. Otherwise, it returns an array with the ``input_array == fillna`` element-wise comparison. """ if isinstance(input_array, np.ma.MaskedArray): invalid_mask = np.ma.getmaskarray(input_array) else: if fillna is np.nan: invalid_mask = ~np.isfinite(input_array) else: invalid_mask = input_array == fillna return invalid_mask ================================================ FILE: pysteps/tests/test_archive.py ================================================ # -*- coding: utf-8 -*- import pytest from datetime import datetime from pysteps.io.archive import _generate_path test_argvalues = [ ("20190130_1200", "%Y/foo/%m", "./2019/foo/01"), ("20190225_1200", "%Y/foo/%m", "./2019/foo/02"), ("20190122_2222", "%Y/foo/%m", "./2019/foo/01"), ("20190130_1200", "%Y/foo/%m", "./2019/foo/01"), ("20190130_1205", "%Y%m%d/foo/bar/%H%M", "./20190130/foo/bar/1205"), ("20190130_1205", "foo/bar/%H%M", "./foo/bar/1205"), ] @pytest.mark.parametrize("timestamp, path_fmt, expected_path", test_argvalues) def test_generate_path(timestamp, path_fmt, expected_path): date = datetime.strptime(timestamp, "%Y%m%d_%H%M") assert _generate_path(date, "./", path_fmt) == expected_path ================================================ FILE: pysteps/tests/test_blending_clim.py ================================================ # -*- coding: utf-8 -*- from datetime import datetime, timedelta from os.path import join, exists import pickle import random import numpy as np from numpy.testing import assert_array_equal import pytest from pysteps.blending.clim import save_skill, calc_clim_skill random.seed(12356) n_cascade_levels = 7 model_names = ["alaro13", "arome13"] default_start_skill = [0.8, 0.5] # Helper functions def generate_fixed_skill(n_cascade_levels, n_models=1): """ Generate skill starting at default_start_skill which decay exponentially with scale. """ start_skill = np.resize(default_start_skill, n_models) powers = np.arange(1, n_cascade_levels + 1) return pow(start_skill[:, np.newaxis], powers) # Test arguments clim_arg_names = ("startdatestr", "enddatestr", "n_models", "expected_skill_today") test_enddates = ["20210701235500", "20210702000000", "20200930235500"] clim_arg_values = [ ( "20210701230000", "20210701235500", 1, { "mean_skill": generate_fixed_skill(n_cascade_levels), "n": 12, "last_validtime": datetime.strptime(test_enddates[0], "%Y%m%d%H%M%S"), }, ), ( "20210701235500", "20210702000000", 1, { "mean_skill": generate_fixed_skill(n_cascade_levels), "n": 1, "last_validtime": datetime.strptime(test_enddates[1], "%Y%m%d%H%M%S"), }, ), ( "20200801000000", "20200930235500", 1, { "mean_skill": generate_fixed_skill(n_cascade_levels), "n": 288, "last_validtime": datetime.strptime(test_enddates[2], "%Y%m%d%H%M%S"), }, ), ( "20210701230000", "20210701235500", 2, { "mean_skill": generate_fixed_skill(n_cascade_levels, 2), "n": 12, "last_validtime": datetime.strptime(test_enddates[0], "%Y%m%d%H%M%S"), }, ), ( "20210701230000", "20210702000000", 2, { "mean_skill": generate_fixed_skill(n_cascade_levels, 2), "n": 1, "last_validtime": datetime.strptime(test_enddates[1], "%Y%m%d%H%M%S"), }, ), ( "20200801000000", "20200930235500", 2, { "mean_skill": generate_fixed_skill(n_cascade_levels, 2), "n": 288, "last_validtime": datetime.strptime(test_enddates[2], "%Y%m%d%H%M%S"), }, ), ] @pytest.mark.parametrize(clim_arg_names, clim_arg_values) def test_save_skill(startdatestr, enddatestr, n_models, expected_skill_today, tmpdir): """Test if the skill are saved correctly and the daily average is computed""" # get validtime currentdate = datetime.strptime(startdatestr, "%Y%m%d%H%M%S") enddate = datetime.strptime(enddatestr, "%Y%m%d%H%M%S") timestep = timedelta(minutes=5) outdir_path = tmpdir while currentdate <= enddate: current_skill = generate_fixed_skill(n_cascade_levels, n_models) print("Saving skill: ", current_skill, currentdate, outdir_path) save_skill( current_skill, currentdate, outdir_path, n_models=n_models, window_length=2 ) currentdate += timestep skill_today_file = join(outdir_path, "NWP_skill_today.pkl") assert exists(skill_today_file) with open(skill_today_file, "rb") as f: skill_today = pickle.load(f) # Check type assert isinstance(skill_today, dict) assert "mean_skill" in skill_today assert "n" in skill_today assert "last_validtime" in skill_today assert_array_equal(skill_today["mean_skill"], expected_skill_today["mean_skill"]) assert skill_today["n"] == expected_skill_today["n"] assert skill_today["last_validtime"] == expected_skill_today["last_validtime"] # Finally, check if the clim skill calculation returns an array of values clim_skill = calc_clim_skill( outdir_path=tmpdir, n_cascade_levels=n_cascade_levels, n_models=n_models, window_length=2, ) assert clim_skill.shape[0] == n_models assert clim_skill.shape[1] == n_cascade_levels if __name__ == "__main__": save_skill( generate_fixed_skill(n_cascade_levels, 1), datetime.strptime("20200801000000", "%Y%m%d%H%M%S"), "./tmp/", ) ================================================ FILE: pysteps/tests/test_blending_linear_blending.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps.blending.linear_blending import forecast, _get_ranked_salience, _get_ws from numpy.testing import assert_array_almost_equal from pysteps.utils import transformation # Test function arguments linear_arg_values = [ (5, 30, 60, 20, 45, "eulerian", None, 1, False, True, False), (5, 30, 60, 20, 45, "eulerian", None, 2, False, False, False), (5, 30, 60, 20, 45, "eulerian", None, 0, False, False, False), (4, 23, 33, 9, 28, "eulerian", None, 1, False, False, False), (3, 18, 36, 13, 27, "eulerian", None, 1, False, False, False), (7, 30, 68, 11, 49, "eulerian", None, 1, False, False, False), (7, 30, 68, 11, 49, "eulerian", None, 1, False, False, True), (10, 100, 160, 25, 130, "eulerian", None, 1, False, False, False), (6, 60, 180, 22, 120, "eulerian", None, 1, False, False, False), (5, 100, 200, 40, 150, "eulerian", None, 1, False, False, False), ( 5, 30, 60, 20, 45, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, False, ), ( 4, 23, 33, 9, 28, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, False, ), ( 3, 18, 36, 13, 27, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, False, ), ( 7, 30, 68, 11, 49, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, False, ), ( 10, 100, 160, 25, 130, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, False, ), ( 6, 60, 180, 22, 120, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, False, ), ( 5, 100, 200, 40, 150, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, False, ), ( 5, 100, 200, 40, 150, "extrapolation", np.zeros((2, 200, 200)), 1, False, False, True, ), (5, 30, 60, 20, 45, "eulerian", None, 1, True, True, False), (5, 30, 60, 20, 45, "eulerian", None, 2, True, False, False), (5, 30, 60, 20, 45, "eulerian", None, 0, True, False, False), ( 5, 30, 60, 20, 45, "extrapolation", np.zeros((2, 200, 200)), 1, True, False, False, ), (4, 23, 33, 9, 28, "extrapolation", np.zeros((2, 200, 200)), 1, True, False, False), ( 3, 18, 36, 13, 27, "extrapolation", np.zeros((2, 200, 200)), 1, True, False, False, ), ] @pytest.mark.parametrize( "timestep, start_blending, end_blending, n_timesteps, controltime, nowcast_method, V, n_models, salient_blending, squeeze_nwp_array, fill_nwp", linear_arg_values, ) def test_linear_blending( timestep, start_blending, end_blending, n_timesteps, controltime, nowcast_method, V, n_models, salient_blending, squeeze_nwp_array, fill_nwp, ): """Tests if the linear blending function is correct. For the nowcast data a precipitation field which is constant over time is taken. One half of the field has no rain and the other half has a set value. For the NWP data a similar field is taken, the only difference being that now the other half of the field is zero. The blended field should have a constant value over the entire field at the timestep right in the middle between the start of the blending and the end of the blending. This assertion is checked to see if the linear blending function works well.""" # The argument controltime gives the timestep at which the field is assumed to be # entirely constant # Assert that the control time step is in the range of the forecasted time steps assert controltime <= ( n_timesteps * timestep ), "Control time needs to be within reach of forecasts, controltime = {} and n_timesteps = {}".format( controltime, n_timesteps ) # Assert that the start time of the blending comes before the end time of the blending assert ( start_blending < end_blending ), "Start time of blending needs to be smaller than end time of blending" # Assert that the control time is a multiple of the time step assert ( not controltime % timestep ), "Control time needs to be a multiple of the time step" # Initialise dummy NWP data if n_models == 0: r_nwp = None else: r_nwp = np.zeros((n_models, n_timesteps, 200, 200)) for i in range(100): r_nwp[:, :, i, :] = 11.0 if squeeze_nwp_array: r_nwp = np.squeeze(r_nwp) # Define nowcast input data (alternate between 2D and 3D arrays for testing) if timestep % 2 == 0: r_input = np.zeros((4, 200, 200)) for i in range(100, 200): r_input[:, i, :] = 11.0 else: r_input = np.zeros((200, 200)) for i in range(100, 200): r_input[i, :] = 11.0 # Transform from mm/h to dB r_input, _ = transformation.dB_transform( r_input, None, threshold=0.1, zerovalue=-15.0 ) # Calculate the blended field r_blended = forecast( r_input, dict({"unit": "mm/h", "transform": "dB"}), V, n_timesteps, timestep, nowcast_method, r_nwp, dict({"unit": "mm/h", "transform": None}), start_blending=start_blending, end_blending=end_blending, fill_nwp=fill_nwp, saliency=salient_blending, ) # Assert that the blended field has the expected dimension if n_models > 1: assert r_blended.shape == ( n_models, n_timesteps, 200, 200, ), "The shape of the blended array does not have the expected value. The shape is {}".format( r_blended.shape ) else: assert r_blended.shape == ( n_timesteps, 200, 200, ), "The shape of the blended array does not have the expected value. The shape is {}".format( r_blended.shape ) # Assert that the blended field at the control time step is equal to # a constant field with the expected value. if salient_blending == False: if n_models > 1: assert_array_almost_equal( r_blended[0, controltime // timestep - 1], np.ones((200, 200)) * 5.5, err_msg="The blended array does not have the expected value", ) elif n_models > 0: assert_array_almost_equal( r_blended[controltime // timestep - 1], np.ones((200, 200)) * 5.5, err_msg="The blended array does not have the expected value", ) ranked_salience_values = [ (np.ones((200, 200)), np.ones((200, 200)), 0.9), (np.zeros((200, 200)), np.random.rand(200, 200), 0.7), (np.random.rand(200, 200), np.random.rand(200, 200), 0.5), ] @pytest.mark.parametrize( "nowcast, nwp, weight_nowcast", ranked_salience_values, ) def test_salient_weight( nowcast, nwp, weight_nowcast, ): ranked_salience = _get_ranked_salience(nowcast, nwp) ws = _get_ws(weight_nowcast, ranked_salience) assert np.min(ws) >= 0, "Negative value for the ranked saliency output" assert np.max(ws) <= 1, "Too large value for the ranked saliency output" assert ws.shape == ( 200, 200, ), "The shape of the ranked salience array does not have the expected value. The shape is {}".format( ws.shape ) ================================================ FILE: pysteps/tests/test_blending_pca_ens_kalman_filter.py ================================================ # -*- coding: utf-8 -*- import datetime import numpy as np import pytest from pysteps import blending, motion, utils # fmt: off pca_enkf_arg_values = [ # Standard setting (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Smooth radar mask (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,20,False), # Coarser NWP temporal resolution (20,30,0,-60,False,False,5,15,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Coarser Obs temporal resolution (20,30,0,-60,False,False,10,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Larger shift of the NWP init (20,30,0,-30,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Zero rain case in observation (20,30,0,-60,True,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Zero rain case in NWP (20,30,0,-60,False,True,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Zero rain in both (20,30,0,-60,True,True,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Accumulated sampling probability (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",True,False,0,False), # Use full NWP weight (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,True,0,False), # Both (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",True,True,0,False), # Explained variance as sampling probability source (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"explained_var",False,False,0,False), # No combination (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",False,None,1.0,"ensemble",False,False,0,False), # Standard deviation adjustment (20,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,"auto",1.0,"ensemble",False,False,0,False), # Other number of ensemble members (10,30,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Other forecast length (20,35,0,-60,False,False,5,5,0.05,0.01,"ssft","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Other noise method (20,30,0,-60,False,False,5,5,0.05,0.01,"nonparametric","masked_enkf",True,None,1.0,"ensemble",False,False,0,False), # Verbose output (20,30,0,-60,False,False,5,5,0.05,0.01,"nonparametric","masked_enkf",True,None,1.0,"ensemble",False,False,0,True),] # fmt: on pca_enkf_arg_names = ( "n_ens_members", "forecast_length", "forecast_shift_radar", "forecast_shift_nwp", "zero_radar", "zero_nwp", "temporal_res_radar", "temporal_res_nwp", "thr_prec", "norain_thr", "noise_method", "enkf_method", "enable_combination", "noise_stddev_adj", "inflation_factor_bg", "sampling_prob_source", "use_accum_sampling_prob", "ensure_full_nwp_weight", "smooth_radar_mask_range", "verbose_output", ) @pytest.mark.parametrize(pca_enkf_arg_names, pca_enkf_arg_values) def test_pca_enkf_combination( n_ens_members, forecast_length, forecast_shift_radar, forecast_shift_nwp, zero_radar, zero_nwp, temporal_res_radar, temporal_res_nwp, thr_prec, norain_thr, noise_method, enkf_method, enable_combination, noise_stddev_adj, inflation_factor_bg, sampling_prob_source, use_accum_sampling_prob, ensure_full_nwp_weight, smooth_radar_mask_range, verbose_output, ): pytest.importorskip("sklearn") # Set forecast init forecast_init = datetime.datetime(2025, 6, 4, 17, 0) # Initialize dummy radar data radar_precip = np.zeros((2, 200, 200)) if not zero_radar: for i in range(radar_precip.shape[0]): a = 5 * i radar_precip[i, 5 + a : 100 - a, 30 + a : 180 - a] = 0.1 radar_precip[i, 10 + a : 105 - a, 35 + a : 178 - a] = 0.5 radar_precip[i, 15 + a : 110 - a, 40 + a : 176 - a] = 0.5 radar_precip[i, 20 + a : 115 - a, 45 + a : 174 - a] = 5.0 radar_precip[i, 25 + a : 120 - a, 50 + a : 172 - a] = 5.0 radar_precip[i, 30 + a : 125 - a, 55 + a : 170 - a] = 4.5 radar_precip[i, 35 + a : 130 - a, 60 + a : 168 - a] = 4.5 radar_precip[i, 40 + a : 135 - a, 65 + a : 166 - a] = 4.0 radar_precip[i, 45 + a : 140 - a, 70 + a : 164 - a] = 1.0 radar_precip[i, 50 + a : 145 - a, 75 + a : 162 - a] = 0.5 radar_precip[i, 55 + a : 150 - a, 80 + a : 160 - a] = 0.5 radar_precip[i, 60 + a : 155 - a, 85 + a : 158 - a] = 0.1 radar_precip_timestamps = np.array( sorted( [ forecast_init + datetime.timedelta(minutes=forecast_shift_radar) - datetime.timedelta(minutes=i * temporal_res_radar) for i in range(radar_precip.shape[0]) ] ) ) # Initialize dummy NWP data nwp_precip = np.zeros((n_ens_members, 20, 200, 200)) if not zero_nwp: for n_model in range(n_ens_members): for i in range(nwp_precip.shape[1]): a = 2 * n_model b = 2 * i nwp_precip[n_model, i, 20 + b : 160 - b, 30 + a : 180 - a] = 0.1 nwp_precip[n_model, i, 22 + b : 162 - b, 35 + a : 178 - a] = 0.1 nwp_precip[n_model, i, 24 + b : 164 - b, 40 + a : 176 - a] = 1.0 nwp_precip[n_model, i, 26 + b : 166 - b, 45 + a : 174 - a] = 5.0 nwp_precip[n_model, i, 28 + b : 168 - b, 50 + a : 172 - a] = 5.0 nwp_precip[n_model, i, 30 + b : 170 - b, 35 + a : 170 - a] = 4.5 nwp_precip[n_model, i, 32 + b : 172 - b, 40 + a : 168 - a] = 4.5 nwp_precip[n_model, i, 34 + b : 174 - b, 45 + a : 166 - a] = 4.0 nwp_precip[n_model, i, 36 + b : 176 - b, 50 + a : 164 - a] = 2.0 nwp_precip[n_model, i, 38 + b : 178 - b, 55 + a : 162 - a] = 1.0 nwp_precip[n_model, i, 40 + b : 180 - b, 60 + a : 160 - a] = 0.5 nwp_precip[n_model, i, 42 + b : 182 - b, 65 + a : 158 - a] = 0.1 nwp_precip_timestamps = np.array( sorted( [ forecast_init + datetime.timedelta(minutes=forecast_shift_nwp) + datetime.timedelta(minutes=i * temporal_res_nwp) for i in range(nwp_precip.shape[1]) ] ) ) # Metadata of dummy data is necessary for data conversion metadata = dict() metadata["unit"] = "mm" metadata["transformation"] = "dB" metadata["accutime"] = 5.0 metadata["transform"] = None metadata["zerovalue"] = 0.0 metadata["threshold"] = thr_prec metadata["zr_a"] = 200.0 metadata["zr_b"] = 1.6 # Converting the input data # Thresholding radar_precip[radar_precip < metadata["threshold"]] = 0.0 nwp_precip[nwp_precip < metadata["threshold"]] = 0.0 # Convert the data converter = utils.get_method("mm/h") radar_precip, _ = converter(radar_precip, metadata) nwp_precip, metadata = converter(nwp_precip, metadata) # Transform the data transformer = utils.get_method(metadata["transformation"]) radar_precip, _ = transformer(radar_precip, metadata) nwp_precip, metadata = transformer(nwp_precip, metadata) # Set NaN equal to zero radar_precip[~np.isfinite(radar_precip)] = metadata["zerovalue"] nwp_precip[~np.isfinite(nwp_precip)] = metadata["zerovalue"] assert ( np.any(~np.isfinite(radar_precip)) == False ), "There are still infinite values in the input radar data" assert ( np.any(~np.isfinite(nwp_precip)) == False ), "There are still infinite values in the NWP data" # Initialize radar velocity oflow_method = motion.get_method("LK") radar_velocity = oflow_method(radar_precip) # Set the combination kwargs combination_kwargs = dict( n_tapering=0, non_precip_mask=True, n_ens_prec=1, lien_criterion=True, n_lien=10, prob_matching="iterative", inflation_factor_bg=inflation_factor_bg, inflation_factor_obs=1.0, offset_bg=0.0, offset_obs=0.0, nwp_hres_eff=14.0, sampling_prob_source=sampling_prob_source, use_accum_sampling_prob=use_accum_sampling_prob, ensure_full_nwp_weight=ensure_full_nwp_weight, ) # Call the reduced-spaced ensemble Kalman filter approach. combined_forecast = blending.pca_ens_kalman_filter.forecast( obs_precip=radar_precip, obs_timestamps=radar_precip_timestamps, nwp_precip=nwp_precip, nwp_timestamps=nwp_precip_timestamps, velocity=radar_velocity, forecast_horizon=forecast_length, issuetime=forecast_init, n_ens_members=n_ens_members, precip_mask_dilation=1, smooth_radar_mask_range=smooth_radar_mask_range, n_cascade_levels=6, precip_thr=metadata["threshold"], norain_thr=norain_thr, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", noise_method=noise_method, enkf_method=enkf_method, enable_combination=enable_combination, noise_stddev_adj=noise_stddev_adj, ar_order=1, callback=None, return_output=True, seed=None, num_workers=1, fft_method="numpy", domain="spatial", extrap_kwargs=None, filter_kwargs=None, noise_kwargs=None, combination_kwargs=combination_kwargs, measure_time=False, verbose_output=verbose_output, ) if verbose_output: assert len(combined_forecast) == 2, "Wrong amount of output data" combined_forecast = combined_forecast[0] assert combined_forecast.ndim == 4, "Wrong amount of dimensions in forecast output" assert ( combined_forecast.shape[0] == n_ens_members ), "Wrong amount of output ensemble members in forecast output" assert ( combined_forecast.shape[1] == forecast_length // temporal_res_radar + 1 ), "Wrong amount of output time steps in forecast output" # Transform the data back into mm/h combined_forecast, _ = converter(combined_forecast, metadata) assert ( combined_forecast.ndim == 4 ), "Wrong amount of dimensions in converted forecast output" assert ( combined_forecast.shape[0] == n_ens_members ), "Wrong amount of output ensemble members in converted forecast output" assert ( combined_forecast.shape[1] == forecast_length // temporal_res_radar + 1 ), "Wrong amount of output time steps in converted forecast output" return ================================================ FILE: pysteps/tests/test_blending_skill_scores.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.blending.skill_scores import ( spatial_correlation, lt_dependent_cor_nwp, lt_dependent_cor_extrapolation, clim_regr_values, ) # Set the climatological correlations values clim_cor_values_8lev = np.array( [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] ) clim_cor_values_6lev = np.array([0.848, 0.537, 0.237, 0.065, 0.020, 0.0044]) clim_cor_values_9lev = np.array( [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040, 1e-4] ) # Set the regression values regr_pars_8lev = np.array( [ [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4], ] ) regr_pars_6lev = np.array( [ [130.0, 165.0, 120.0, 55.0, 50.0, 15.0], [155.0, 220.0, 200.0, 75.0, 10e4, 10e4], ] ) regr_pars_9lev = np.array( [ [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0, 10.0], [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4, 10e4], ] ) # Set the dummy observation and model values dummy_2d_array = np.array([[1.0, 2.0], [3.0, 4.0]]) obs_8lev = np.repeat(dummy_2d_array[None, :, :], 8, axis=0) obs_6lev = np.repeat(dummy_2d_array[None, :, :], 6, axis=0) obs_9lev = np.repeat(dummy_2d_array[None, :, :], 9, axis=0) mod_8lev = np.repeat(dummy_2d_array[None, :, :], 8, axis=0) mod_6lev = np.repeat(dummy_2d_array[None, :, :], 6, axis=0) mod_9lev = np.repeat(dummy_2d_array[None, :, :], 9, axis=0) # Gives some dummy values to PHI dummy_phi = np.array([0.472650, 0.523825, 0.103454]) PHI_8lev = np.repeat(dummy_phi[None, :], 8, axis=0) PHI_6lev = np.repeat(dummy_phi[None, :], 6, axis=0) PHI_9lev = np.repeat(dummy_phi[None, :], 9, axis=0) # Test function arguments skill_scores_arg_names = ( "obs", "mod", "lt", "PHI", "cor_prev", "clim_cor_values", "regr_pars", "n_cascade_levels", "expected_cor_t0", "expected_cor_nwp_lt", "expected_cor_nowcast_lt", "n_model", "number_of_models", ) # Test function values skill_scores_arg_values = [ ( obs_8lev, mod_8lev, 60, PHI_8lev, None, clim_cor_values_8lev, regr_pars_8lev, 8, np.repeat(1.0, 8), np.array( [ 0.97455941, 0.9356775, 0.81972779, 0.55202975, 0.31534738, 0.02264599, 0.02343133, 0.00647032, ] ), np.array( [ 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, ] ), 0, None, ), ( obs_6lev, mod_6lev, 60, PHI_6lev, None, clim_cor_values_6lev, regr_pars_6lev, 6, np.repeat(1.0, 6), np.array( [0.97455941, 0.9356775, 0.81972779, 0.55202975, 0.31534738, 0.02264599] ), np.array([0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475]), 0, 1, ), ( obs_9lev, mod_9lev, 60, PHI_9lev, None, clim_cor_values_9lev, regr_pars_9lev, 9, np.repeat(1.0, 9), np.array( [ 0.97455941, 0.9356775, 0.81972779, 0.55202975, 0.31534738, 0.02264599, 0.02343133, 0.00647032, 0.00347776, ] ), np.array( [ 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, ] ), 0, 1, ), ( obs_8lev, mod_8lev, 0, PHI_8lev, None, clim_cor_values_8lev, regr_pars_8lev, 8, np.repeat(1.0, 8), np.repeat(1.0, 8), np.array( [ 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, ] ), 0, 1, ), ( obs_8lev, mod_8lev, 0, PHI_8lev, None, clim_cor_values_8lev, regr_pars_8lev, 8, np.repeat(1.0, 8), np.repeat(1.0, 8), np.array( [ 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475, ] ), 1, 2, ), ] # The test @pytest.mark.parametrize(skill_scores_arg_names, skill_scores_arg_values) # The test function to be used def test_blending_skill_scores( obs, mod, lt, PHI, cor_prev, clim_cor_values, regr_pars, n_cascade_levels, expected_cor_t0, expected_cor_nwp_lt, expected_cor_nowcast_lt, n_model, number_of_models, ): """Tests if the skill_score functions behave correctly. A dummy gridded model and observation field should be given for n_cascade_levels, which leads to a given spatial correlation per cascade level. Then, the function tests if the correlation regresses towards the climatological values given lead time lt for the NWP fields or given the PHI-values for the extrapolation field. """ if number_of_models != None: skill_kwargs = {"n_models": number_of_models} else: skill_kwargs = None domain_mask = np.full(obs[0, :, :].shape, False, dtype=bool) # Calculate the spatial correlation of the given model field correlations_t0 = np.array(spatial_correlation(obs, mod, domain_mask)) # Check if the field has the same number of cascade levels as the model # field and as the given n_cascade_levels assert ( correlations_t0.shape[0] == mod.shape[0] ), "Number of cascade levels should be the same as in the model field" assert ( correlations_t0.shape[0] == n_cascade_levels ), "Number of cascade levels should be the same as n_cascade_levels" # Check if the returned values are as expected assert_array_almost_equal( correlations_t0, expected_cor_t0, decimal=3, err_msg="Returned spatial correlation is not the same as the expected value", ) # Test if the NWP correlation regresses towards the correct value given # a lead time in minutes # First, check if the climatological values are returned correctly correlations_clim, regr_clim = clim_regr_values( n_cascade_levels=n_cascade_levels, outdir_path="./tmp/", n_model=n_model, skill_kwargs=skill_kwargs, ) assert ( correlations_clim.shape[0] == n_cascade_levels ), "Number of cascade levels should be the same as n_cascade_levels" assert_array_almost_equal( correlations_clim, clim_cor_values, decimal=3, err_msg="Not the correct climatological correlations were returned", ) assert_array_almost_equal( regr_clim, regr_pars, decimal=3, err_msg="Not the correct regression parameters were returned", ) # Then, check the regression of the correlation values correlations_nwp_lt = lt_dependent_cor_nwp( lt=lt, correlations=correlations_t0, outdir_path="./tmp/" ) assert ( correlations_nwp_lt.shape[0] == mod.shape[0] ), "Number of cascade levels should be the same as in the model field" assert ( correlations_nwp_lt.shape[0] == n_cascade_levels ), "Number of cascade levels should be the same as n_cascade_levels" assert_array_almost_equal( correlations_nwp_lt, expected_cor_nwp_lt, decimal=3, err_msg="Correlations of NWP not equal to the expected correlations", ) # Finally, make sure nowcast correlation regresses towards the correct # value given some PHI-values. correlations_nowcast_lt, __ = lt_dependent_cor_extrapolation( PHI, correlations_t0, cor_prev ) print(correlations_nowcast_lt) assert ( correlations_nowcast_lt.shape[0] == mod.shape[0] ), "Number of cascade levels should be the same as in the model field" assert ( correlations_nowcast_lt.shape[0] == n_cascade_levels ), "Number of cascade levels should be the same as n_cascade_levels" assert_array_almost_equal( correlations_nowcast_lt, expected_cor_nowcast_lt, decimal=3, err_msg="Correlations of nowcast not equal to the expected correlations", ) ================================================ FILE: pysteps/tests/test_blending_steps.py ================================================ # -*- coding: utf-8 -*- import datetime import numpy as np import pytest import pysteps from pysteps import blending, cascade # fmt:off steps_arg_values = [ (1, 3, 4, 8, 'steps', None, None, False, "spn", True, 4, False, False, 0, False, None, None, None), (1, 3, 4, 8,'steps', "obs", None, False, "spn", True, 4, False, False, 0, False, None, None, None), (1, 3, 4, 8,'steps', "incremental", None, False, "spn", True, 4, False, False, 0, False, None, None, None), (1, 3, 4, 8,'steps', None, "mean", False, "spn", True, 4, False, False, 0, False, None, None, None), (1, 3, 4, 8,'steps', None, "mean", False, "spn", True, 4, False, False, 0, True, None, None, None), (1, 3, 4, 8,'steps', None, "cdf", False, "spn", True, 4, False, False, 0, False, None, None, None), (1, [1, 2, 3], 4, 8,'steps', None, "cdf", False, "spn", True, 4, False, False, 0, False, None, None, None), (1, 3, 4, 8,'steps', "incremental", "cdf", False, "spn", True, 4, False, False, 0, False, None, None, None), (1, 3, 4, 6,'steps', "incremental", "cdf", False, "bps", True, 4, False, False, 0, False, None, None, None), (1, 3, 4, 6,'steps', "incremental", "cdf", False, "bps", False, 4, False, False, 0, False, None, None, None), (1, 3, 4, 6,'steps', "incremental", "cdf", False, "bps", False, 4, False, False, 0, True, None, None, None), (1, 3, 4, 9,'steps', "incremental", "cdf", False, "spn", True, 4, False, False, 0, False, None, None, None), (2, 3, 10, 8,'steps', "incremental", "cdf", False, "spn", True, 10, False, False, 0, False, None, None, None), (5, 3, 5, 8,'steps', "incremental", "cdf", False, "spn", True, 5, False, False, 0, False, None, None, None), (1, 10, 1, 8,'steps', "incremental", "cdf", False, "spn", True, 1, False, False, 0, False, None, None, None), (2, 3, 2, 8,'steps', "incremental", "cdf", True, "spn", True, 2, False, False, 0, False, None, None, None), (1, 3, 6, 8,'steps', None, None, False, "spn", True, 6, False, False, 0, False, None, None, None), (1, 3, 6, 8,'steps', None, None, False, "spn", True, 6, False, False, 0, False, "bps", None, None), # Test the case where the radar image contains no rain. (1, 3, 6, 8,'steps', None, None, False, "spn", True, 6, True, False, 0, False, None, None, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "spn", False, 5, True, False, 0, False, None, None, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "spn", False, 5, True, False, 0, True, None, None, None), # Test the case where the NWP fields contain no rain. (1, 3, 6, 8,'steps', None, None, False, "spn", True, 6, False, True, 0, False, None, None, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "spn", False, 5, False, True, 0, True, None, None, None), # Test the case where both the radar image and the NWP fields contain no rain. (1, 3, 6, 8,'steps', None, None, False, "spn", True, 6, True, True, 0, False, None, None, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "spn", False, 5, True, True, 0, False, None, None, None), (5, 3, 5, 6,'steps', "obs", "mean", True, "spn", True, 5, True, True, 0, False, None, None, None), # Test cases where we apply timestep_start_full_nwp_weight (1, 10, 2, 6,'steps', "incremental", "cdf", False, "bps", False, 2, False, False, 0, True, None, None, 5), (1, 10, 2, 6,'steps', "incremental", "cdf", False, "spn", False, 2, False, False, 0, False, None, None, 5), # Test for smooth radar mask (1, 3, 6, 8,'steps', None, None, False, "spn", True, 6, False, False, 80, False, None, None, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "spn", False, 5, False, False, 80, False, None, None, None), (5, 3, 5, 6,'steps', "obs", "mean", False, "spn", False, 5, False, False, 80, False, None, None, None), (1, 3, 6, 8,'steps', None, None, False, "spn", True, 6, False, True, 80, False, None, None, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "spn", False, 5, True, False, 80, True, None, None, None), (5, 3, 5, 6,'steps', "obs", "mean", False, "spn", False, 5, True, True, 80, False, None, None, None), (5, [1, 2, 3], 5, 6,'steps', "obs", "mean", False, "spn", False, 5, True, True, 80, False, None, None, None), (5, [1, 3], 5, 6,'steps', "obs", "mean", False, "spn", False, 5, True, True, 80, False, None, None, None), # Test the usage of a max_mask_rim in the mask_kwargs (1, 3, 6, 8,'steps', None, None, False, "bps", True, 6, False, False, 80, False, None, 40, None), (5, 3, 5, 6,'steps', "obs", "mean", False, "bps", False, 5, False, False, 80, False, None, 40, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "bps", False, 5, False, False, 80, False, None, 25, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "bps", False, 5, False, False, 80, False, None, 40, None), (5, 3, 5, 6,'steps', "incremental", "cdf", False, "bps", False, 5, False, False, 80, False, None, 60, None), #Test the externally provided nowcast (1, 10, 1, 8,'external_nowcast_det', None, None, False, "spn", True, 1, False, False, 0, False, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", None, False, "bps", True, 1, False, False, 0, False, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", None, False, "spn", True, 1, False, False, 80, False, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", None, False, "bps", True, 1, True, False, 0, False, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", None, False, "spn", True, 1, False, True, 0, False, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", None, False, "bps", True, 1, True, True, 0, False, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", "cdf", False, "spn", True, 1, False, False, 0, True, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", "obs", False, "bps", True, 1, False, False, 0, False, None, None, None), (1, 10, 1, 8,'external_nowcast_det', "incremental", None, False, "bps", True, 1, False, False, 0, False, None, None, 5), (5, 10, 5, 8,'external_nowcast_ens', "incremental", None, False, "spn", True, 5, False, False, 0, False, None, None, None), (5, 10, 5, 8,'external_nowcast_ens', "incremental", None, False, "spn", True, 5, False, False, 0, False, None, None, None), (1, 10, 5, 8,'external_nowcast_ens', "incremental", None, False, "spn", True, 5, False, False, 0, False, None, None, None), (1, 10, 1, 8,'external_nowcast_ens', "incremental", "cdf", False, "bps", True, 5, False, False, 0, False, None, None, None), (5, 10, 1, 8,'external_nowcast_ens', "incremental", "obs", False, "spn", True, 5, False, False, 0, False, None, None, None), (1, 10, 5, 8,'external_nowcast_ens', "incremental", "cdf", False, "bps", True, 5, False, False, 0, False, None, None, 5) ] # fmt:on steps_arg_names = ( "n_models", "timesteps", "n_ens_members", "n_cascade_levels", "nowcasting_method", "mask_method", "probmatching_method", "blend_nwp_members", "weights_method", "decomposed_nwp", "expected_n_ens_members", "zero_radar", "zero_nwp", "smooth_radar_mask_range", "resample_distribution", "vel_pert_method", "max_mask_rim", "timestep_start_full_nwp_weight", ) @pytest.mark.parametrize(steps_arg_names, steps_arg_values) def test_steps_blending( n_models, timesteps, n_ens_members, n_cascade_levels, nowcasting_method, mask_method, probmatching_method, blend_nwp_members, weights_method, decomposed_nwp, expected_n_ens_members, zero_radar, zero_nwp, smooth_radar_mask_range, resample_distribution, vel_pert_method, max_mask_rim, timestep_start_full_nwp_weight, ): pytest.importorskip("cv2") ### # The input data ### # Initialise dummy NWP data if not isinstance(timesteps, int): n_timesteps = len(timesteps) last_timestep = timesteps[-1] else: n_timesteps = timesteps last_timestep = timesteps nwp_precip = np.zeros((n_models, last_timestep + 1, 200, 200)) if not zero_nwp: for n_model in range(n_models): for i in range(nwp_precip.shape[1]): nwp_precip[n_model, i, 30:185, 30 + 1 * (i + 1) * n_model] = 0.1 nwp_precip[n_model, i, 30:185, 31 + 1 * (i + 1) * n_model] = 0.1 nwp_precip[n_model, i, 30:185, 32 + 1 * (i + 1) * n_model] = 1.0 nwp_precip[n_model, i, 30:185, 33 + 1 * (i + 1) * n_model] = 5.0 nwp_precip[n_model, i, 30:185, 34 + 1 * (i + 1) * n_model] = 5.0 nwp_precip[n_model, i, 30:185, 35 + 1 * (i + 1) * n_model] = 4.5 nwp_precip[n_model, i, 30:185, 36 + 1 * (i + 1) * n_model] = 4.5 nwp_precip[n_model, i, 30:185, 37 + 1 * (i + 1) * n_model] = 4.0 nwp_precip[n_model, i, 30:185, 38 + 1 * (i + 1) * n_model] = 2.0 nwp_precip[n_model, i, 30:185, 39 + 1 * (i + 1) * n_model] = 1.0 nwp_precip[n_model, i, 30:185, 40 + 1 * (i + 1) * n_model] = 0.5 nwp_precip[n_model, i, 30:185, 41 + 1 * (i + 1) * n_model] = 0.1 # Define dummy nowcast input data radar_precip = np.zeros((3, 200, 200)) if not zero_radar: for i in range(2): radar_precip[i, 5:150, 30 + 1 * i] = 0.1 radar_precip[i, 5:150, 31 + 1 * i] = 0.5 radar_precip[i, 5:150, 32 + 1 * i] = 0.5 radar_precip[i, 5:150, 33 + 1 * i] = 5.0 radar_precip[i, 5:150, 34 + 1 * i] = 5.0 radar_precip[i, 5:150, 35 + 1 * i] = 4.5 radar_precip[i, 5:150, 36 + 1 * i] = 4.5 radar_precip[i, 5:150, 37 + 1 * i] = 4.0 radar_precip[i, 5:150, 38 + 1 * i] = 1.0 radar_precip[i, 5:150, 39 + 1 * i] = 0.5 radar_precip[i, 5:150, 40 + 1 * i] = 0.5 radar_precip[i, 5:150, 41 + 1 * i] = 0.1 radar_precip[2, 30:155, 30 + 1 * 2] = 0.1 radar_precip[2, 30:155, 31 + 1 * 2] = 0.1 radar_precip[2, 30:155, 32 + 1 * 2] = 1.0 radar_precip[2, 30:155, 33 + 1 * 2] = 5.0 radar_precip[2, 30:155, 34 + 1 * 2] = 5.0 radar_precip[2, 30:155, 35 + 1 * 2] = 4.5 radar_precip[2, 30:155, 36 + 1 * 2] = 4.5 radar_precip[2, 30:155, 37 + 1 * 2] = 4.0 radar_precip[2, 30:155, 38 + 1 * 2] = 2.0 radar_precip[2, 30:155, 39 + 1 * 2] = 1.0 radar_precip[2, 30:155, 40 + 1 * 3] = 0.5 radar_precip[2, 30:155, 41 + 1 * 3] = 0.1 precip_nowcast = np.zeros((n_ens_members, last_timestep + 1, 200, 200)) if nowcasting_method == "external_nowcast_ens": nowcasting_method = "external_nowcast" for n_ens_member in range(n_ens_members): for i in range(precip_nowcast.shape[1]): precip_nowcast[ n_ens_member, i, 30:165, 30 + 1 * (i + 1) * n_ens_member ] = 0.1 precip_nowcast[ n_ens_member, i, 30:165, 31 + 1 * (i + 1) * n_ens_member ] = 0.5 precip_nowcast[ n_ens_member, i, 30:165, 32 + 1 * (i + 1) * n_ens_member ] = 0.5 precip_nowcast[ n_ens_member, i, 30:165, 33 + 1 * (i + 1) * n_ens_member ] = 5.0 precip_nowcast[ n_ens_member, i, 30:165, 34 + 1 * (i + 1) * n_ens_member ] = 5.0 precip_nowcast[ n_ens_member, i, 30:165, 35 + 1 * (i + 1) * n_ens_member ] = 4.5 precip_nowcast[ n_ens_member, i, 30:165, 36 + 1 * (i + 1) * n_ens_member ] = 4.5 precip_nowcast[ n_ens_member, i, 30:165, 37 + 1 * (i + 1) * n_ens_member ] = 4.0 precip_nowcast[ n_ens_member, i, 30:165, 38 + 1 * (i + 1) * n_ens_member ] = 1.0 precip_nowcast[ n_ens_member, i, 30:165, 39 + 1 * (i + 1) * n_ens_member ] = 0.5 precip_nowcast[ n_ens_member, i, 30:165, 40 + 1 * (i + 1) * n_ens_member ] = 0.5 precip_nowcast[ n_ens_member, i, 30:165, 41 + 1 * (i + 1) * n_ens_member ] = 0.1 if n_ens_members < expected_n_ens_members: n_ens_members = expected_n_ens_members elif nowcasting_method == "external_nowcast_det": nowcasting_method = "external_nowcast" for i in range(precip_nowcast.shape[1]): precip_nowcast[0, i, 30:165, 30 + 1 * i] = 0.1 precip_nowcast[0, i, 30:165, 31 + 1 * i] = 0.5 precip_nowcast[0, i, 30:165, 32 + 1 * i] = 0.5 precip_nowcast[0, i, 30:165, 33 + 1 * i] = 5.0 precip_nowcast[0, i, 30:165, 34 + 1 * i] = 5.0 precip_nowcast[0, i, 30:165, 35 + 1 * i] = 4.5 precip_nowcast[0, i, 30:165, 36 + 1 * i] = 4.5 precip_nowcast[0, i, 30:165, 37 + 1 * i] = 4.0 precip_nowcast[0, i, 30:165, 38 + 1 * i] = 1.0 precip_nowcast[0, i, 30:165, 39 + 1 * i] = 0.5 precip_nowcast[0, i, 30:165, 40 + 1 * i] = 0.5 precip_nowcast[0, i, 30:165, 41 + 1 * i] = 0.1 metadata = dict() metadata["unit"] = "mm" metadata["transformation"] = "dB" metadata["accutime"] = 5.0 metadata["transform"] = "dB" metadata["zerovalue"] = 0.0 metadata["threshold"] = 0.01 metadata["zr_a"] = 200.0 metadata["zr_b"] = 1.6 # Also set the outdir_path, clim_kwargs and mask_kwargs outdir_path_skill = "./tmp/" if n_models == 1: clim_kwargs = None else: clim_kwargs = dict({"n_models": n_models, "window_length": 30}) if max_mask_rim is not None: mask_kwargs = dict({"mask_rim": 10, "max_mask_rim": max_mask_rim}) else: mask_kwargs = None ### # First threshold the data and convert it to dBR ### # threshold the data radar_precip[radar_precip < metadata["threshold"]] = 0.0 nwp_precip[nwp_precip < metadata["threshold"]] = 0.0 # convert the data converter = pysteps.utils.get_method("mm/h") radar_precip, _ = converter(radar_precip, metadata) nwp_precip, metadata = converter(nwp_precip, metadata) # transform the data transformer = pysteps.utils.get_method(metadata["transformation"]) radar_precip, _ = transformer(radar_precip, metadata) nwp_precip, metadata = transformer(nwp_precip, metadata) # set NaN equal to zero radar_precip[~np.isfinite(radar_precip)] = metadata["zerovalue"] nwp_precip[~np.isfinite(nwp_precip)] = metadata["zerovalue"] assert ( np.any(~np.isfinite(radar_precip)) == False ), "There are still infinite values in the input radar data" assert ( np.any(~np.isfinite(nwp_precip)) == False ), "There are still infinite values in the NWP data" ### # Decompose the R_NWP data ### # Initial decomposition settings decomp_method, _ = cascade.get_method("fft") bandpass_filter_method = "gaussian" precip_shape = radar_precip.shape[1:] filter_method = cascade.get_method(bandpass_filter_method) bp_filter = filter_method(precip_shape, n_cascade_levels) # If we only use one model: if nwp_precip.ndim == 3: nwp_precip = nwp_precip[None, :] if decomposed_nwp: nwp_precip_decomp = [] # Loop through the n_models for i in range(nwp_precip.shape[0]): R_d_models_ = [] # Loop through the time steps for j in range(nwp_precip.shape[1]): R_ = decomp_method( field=nwp_precip[i, j, :, :], bp_filter=bp_filter, normalize=True, compute_stats=True, compact_output=True, ) R_d_models_.append(R_) nwp_precip_decomp.append(R_d_models_) nwp_precip_decomp = np.array(nwp_precip_decomp) assert nwp_precip_decomp.ndim == 2, "Wrong number of dimensions in R_d_models" else: nwp_precip_decomp = nwp_precip.copy() assert nwp_precip_decomp.ndim == 4, "Wrong number of dimensions in R_d_models" ### # Determine the velocity fields ### oflow_method = pysteps.motion.get_method("lucaskanade") radar_velocity = oflow_method(radar_precip) nwp_velocity = [] # Loop through the models for n_model in range(nwp_precip.shape[0]): # Loop through the timesteps. We need two images to construct a motion # field, so we can start from timestep 1. Timestep 0 will be the same # as timestep 0. _V_NWP_ = [] for t in range(1, nwp_precip.shape[1]): V_NWP_ = oflow_method(nwp_precip[n_model, t - 1 : t + 1, :]) _V_NWP_.append(V_NWP_) V_NWP_ = None _V_NWP_ = np.insert(_V_NWP_, 0, _V_NWP_[0], axis=0) nwp_velocity.append(_V_NWP_) nwp_velocity = np.stack(nwp_velocity) assert nwp_velocity.ndim == 5, "nwp_velocity must be a five-dimensional array" ### # The blending ### precip_forecast = blending.steps.forecast( precip=radar_precip, precip_models=nwp_precip_decomp, velocity=radar_velocity, velocity_models=nwp_velocity, timesteps=timesteps, timestep=5.0, issuetime=datetime.datetime.strptime("202112012355", "%Y%m%d%H%M"), n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, blend_nwp_members=blend_nwp_members, precip_thr=metadata["threshold"], kmperpixel=1.0, extrap_method="semilagrangian", decomp_method="fft", bandpass_filter_method="gaussian", noise_method="nonparametric", noise_stddev_adj="auto", ar_order=2, vel_pert_method=vel_pert_method, weights_method=weights_method, timestep_start_full_nwp_weight=timestep_start_full_nwp_weight, conditional=False, probmatching_method=probmatching_method, mask_method=mask_method, resample_distribution=resample_distribution, smooth_radar_mask_range=smooth_radar_mask_range, callback=None, return_output=True, seed=None, num_workers=1, fft_method="numpy", domain="spatial", outdir_path_skill=outdir_path_skill, extrap_kwargs=None, filter_kwargs=None, noise_kwargs=None, vel_pert_kwargs=None, clim_kwargs=clim_kwargs, mask_kwargs=mask_kwargs, measure_time=False, ) assert precip_forecast.ndim == 4, "Wrong amount of dimensions in forecast output" assert ( precip_forecast.shape[0] == expected_n_ens_members ), "Wrong amount of output ensemble members in forecast output" assert ( precip_forecast.shape[1] == n_timesteps ), "Wrong amount of output time steps in forecast output" # Transform the data back into mm/h precip_forecast, _ = converter(precip_forecast, metadata) assert ( precip_forecast.ndim == 4 ), "Wrong amount of dimensions in converted forecast output" assert ( precip_forecast.shape[0] == expected_n_ens_members ), "Wrong amount of output ensemble members in converted forecast output" assert ( precip_forecast.shape[1] == n_timesteps ), "Wrong amount of output time steps in converted forecast output" ================================================ FILE: pysteps/tests/test_blending_utils.py ================================================ # -*- coding: utf-8 -*- import os import numpy as np import pytest from numpy.testing import assert_array_almost_equal import pysteps from pysteps.blending.utils import ( blend_cascades, blend_optical_flows, compute_smooth_dilated_mask, compute_store_nwp_motion, decompose_NWP, load_NWP, recompose_cascade, stack_cascades, ) from pysteps.utils.check_norain import check_norain pytest.importorskip("netCDF4") precip_nwp = np.zeros((24, 564, 564)) for t in range(precip_nwp.shape[0]): precip_nwp[t, 30 + t : 185 + t, 30 + 2 * t] = 0.1 precip_nwp[t, 30 + t : 185 + t, 31 + 2 * t] = 0.1 precip_nwp[t, 30 + t : 185 + t, 32 + 2 * t] = 1.0 precip_nwp[t, 30 + t : 185 + t, 33 + 2 * t] = 5.0 precip_nwp[t, 30 + t : 185 + t, 34 + 2 * t] = 5.0 precip_nwp[t, 30 + t : 185 + t, 35 + 2 * t] = 4.5 precip_nwp[t, 30 + t : 185 + t, 36 + 2 * t] = 4.5 precip_nwp[t, 30 + t : 185 + t, 37 + 2 * t] = 4.0 precip_nwp[t, 30 + t : 185 + t, 38 + 2 * t] = 2.0 precip_nwp[t, 30 + t : 185 + t, 39 + 2 * t] = 1.0 precip_nwp[t, 30 + t : 185 + t, 40 + 2 * t] = 0.5 precip_nwp[t, 30 + t : 185 + t, 41 + 2 * t] = 0.1 nwp_proj = ( "+proj=lcc +lon_0=4.55 +lat_1=50.8 +lat_2=50.8 " "+a=6371229 +es=0 +lat_0=50.8 +x_0=365950 +y_0=-365950.000000001" ) nwp_metadata = dict( projection=nwp_proj, institution="Royal Meteorological Institute of Belgium", transform=None, zerovalue=0.0, threshold=0, unit="mm", accutime=5, xpixelsize=1300.0, ypixelsize=1300.0, yorigin="upper", cartesian_unit="m", x1=0.0, x2=731900.0, y1=-731900.0, y2=0.0, ) # Get the analysis time and valid time times_nwp = np.array( [ "2021-07-04T16:05:00.000000000", "2021-07-04T16:10:00.000000000", "2021-07-04T16:15:00.000000000", "2021-07-04T16:20:00.000000000", "2021-07-04T16:25:00.000000000", "2021-07-04T16:30:00.000000000", "2021-07-04T16:35:00.000000000", "2021-07-04T16:40:00.000000000", "2021-07-04T16:45:00.000000000", "2021-07-04T16:50:00.000000000", "2021-07-04T16:55:00.000000000", "2021-07-04T17:00:00.000000000", "2021-07-04T17:05:00.000000000", "2021-07-04T17:10:00.000000000", "2021-07-04T17:15:00.000000000", "2021-07-04T17:20:00.000000000", "2021-07-04T17:25:00.000000000", "2021-07-04T17:30:00.000000000", "2021-07-04T17:35:00.000000000", "2021-07-04T17:40:00.000000000", "2021-07-04T17:45:00.000000000", "2021-07-04T17:50:00.000000000", "2021-07-04T17:55:00.000000000", "2021-07-04T18:00:00.000000000", ], dtype="datetime64[ns]", ) # Prepare input NWP files # Convert to rain rates [mm/h] converter = pysteps.utils.get_method("mm/h") precip_nwp, nwp_metadata = converter(precip_nwp, nwp_metadata) # Threshold the data nwp_metadata["threshold"] = 0.1 precip_nwp[precip_nwp < nwp_metadata["threshold"]] = 0.0 # Transform the data transformer = pysteps.utils.get_method("dB") precip_nwp, nwp_metadata = transformer( precip_nwp, nwp_metadata, threshold=nwp_metadata["threshold"] ) # Set two issue times for testing issue_time_first = times_nwp[0] issue_time_second = times_nwp[3] # Set the blending weights (we'll blend with a 50-50 weight) weights = np.full((2, 8), fill_value=0.5) # Set the testing arguments # Test function arguments utils_arg_names = ( "precip_nwp", "nwp_model", "issue_times", "timestep", "n_timesteps", "valid_times", "shape", "weights", ) # Test function values utils_arg_values = [ ( precip_nwp, "test", [issue_time_first, issue_time_second], 5.0, 3, times_nwp, precip_nwp.shape[1:], weights, ) ] smoothing_arg_names = ( "precip_nwp", "max_padding_size_in_px", "gaussian_kernel_size", "inverted", "non_linear_growth_kernel_sizes", ) smoothing_arg_values = [ (precip_nwp, 80, 9, False, False), (precip_nwp, 10, 9, False, False), (precip_nwp, 80, 5, False, False), (precip_nwp, 80, 9, True, False), (precip_nwp, 80, 9, False, True), ] ### # The test ### @pytest.mark.parametrize(utils_arg_names, utils_arg_values) # The test function to be used def test_blending_utils( precip_nwp, nwp_model, issue_times, timestep, n_timesteps, valid_times, shape, weights, ): """Tests if all blending utils functions behave correctly.""" # First, make the output path if it does not exist yet tmpdir = "./tmp/" os.makedirs(tmpdir, exist_ok=True) # Get the optical flow method oflow_method = pysteps.motion.get_method("lucaskanade") ### # Compute and store the motion ### compute_store_nwp_motion( precip_nwp=precip_nwp, oflow_method=oflow_method, analysis_time=valid_times[0], nwp_model=nwp_model, output_path=tmpdir, ) # Check if file exists date_string = np.datetime_as_string(valid_times[0], "s") motion_file = os.path.join( tmpdir, "motion_" + nwp_model + "_" + date_string[:4] + date_string[5:7] + date_string[8:10] + date_string[11:13] + date_string[14:16] + date_string[17:19] + ".npy", ) assert os.path.exists(motion_file) ### # Decompose and store NWP forecast ### decompose_NWP( R_NWP=precip_nwp, NWP_model=nwp_model, analysis_time=valid_times[0], timestep=timestep, valid_times=valid_times, num_cascade_levels=8, num_workers=1, output_path=tmpdir, decomp_method="fft", fft_method="numpy", domain="spatial", normalize=True, compute_stats=True, compact_output=False, ) # Check if file exists decomp_file = os.path.join( tmpdir, "cascade_" + nwp_model + "_" + date_string[:4] + date_string[5:7] + date_string[8:10] + date_string[11:13] + date_string[14:16] + date_string[17:19] + ".nc", ) assert os.path.exists(decomp_file) ### # Now check if files load correctly for two different issue times ### precip_decomposed_nwp_first, v_nwp_first = load_NWP( input_nc_path_decomp=os.path.join(decomp_file), input_path_velocities=os.path.join(motion_file), start_time=issue_times[0], n_timesteps=n_timesteps, ) precip_decomposed_nwp_second, v_nwp_second = load_NWP( input_nc_path_decomp=os.path.join(decomp_file), input_path_velocities=os.path.join(motion_file), start_time=issue_times[1], n_timesteps=n_timesteps, ) # Check if the output type and shapes are correct assert isinstance(precip_decomposed_nwp_first, list) assert isinstance(precip_decomposed_nwp_second, list) assert isinstance(precip_decomposed_nwp_first[0], dict) assert isinstance(precip_decomposed_nwp_second[0], dict) assert "domain" in precip_decomposed_nwp_first[0] assert "normalized" in precip_decomposed_nwp_first[0] assert "compact_output" in precip_decomposed_nwp_first[0] assert "valid_times" in precip_decomposed_nwp_first[0] assert "cascade_levels" in precip_decomposed_nwp_first[0] assert "means" in precip_decomposed_nwp_first[0] assert "stds" in precip_decomposed_nwp_first[0] assert precip_decomposed_nwp_first[0]["cascade_levels"].shape == ( 8, shape[0], shape[1], ) assert precip_decomposed_nwp_first[0]["domain"] == "spatial" assert precip_decomposed_nwp_first[0]["normalized"] == True assert precip_decomposed_nwp_first[0]["compact_output"] == False assert len(precip_decomposed_nwp_first) == n_timesteps + 1 assert len(precip_decomposed_nwp_second) == n_timesteps + 1 assert precip_decomposed_nwp_first[0]["means"].shape[0] == 8 assert precip_decomposed_nwp_first[0]["stds"].shape[0] == 8 assert np.array(v_nwp_first).shape == (n_timesteps + 1, 2, shape[0], shape[1]) assert np.array(v_nwp_second).shape == (n_timesteps + 1, 2, shape[0], shape[1]) # Check if the right times are loaded assert ( precip_decomposed_nwp_first[0]["valid_times"][0] == valid_times[0] ), "Not the right valid times were loaded for the first forecast" assert ( precip_decomposed_nwp_second[0]["valid_times"][0] == valid_times[3] ), "Not the right valid times were loaded for the second forecast" # Check, for a sample, if the stored motion fields are as expected assert_array_almost_equal( v_nwp_first[1], oflow_method(precip_nwp[0:2, :, :]), decimal=3, err_msg="Stored motion field of first forecast not equal to expected motion field", ) assert_array_almost_equal( v_nwp_second[1], oflow_method(precip_nwp[3:5, :, :]), decimal=3, err_msg="Stored motion field of second forecast not equal to expected motion field", ) ### # Stack the cascades ### precip_decomposed_first_stack, mu_first_stack, sigma_first_stack = stack_cascades( R_d=precip_decomposed_nwp_first, donorm=False ) print(precip_decomposed_nwp_first) print(precip_decomposed_first_stack) print(mu_first_stack) ( precip_decomposed_second_stack, mu_second_stack, sigma_second_stack, ) = stack_cascades(R_d=precip_decomposed_nwp_second, donorm=False) # Check if the array shapes are still correct assert precip_decomposed_first_stack.shape == ( n_timesteps + 1, 8, shape[0], shape[1], ) assert mu_first_stack.shape == (n_timesteps + 1, 8) assert sigma_first_stack.shape == (n_timesteps + 1, 8) ### # Blend the cascades ### precip_decomposed_blended = blend_cascades( cascades_norm=np.stack( (precip_decomposed_first_stack[0], precip_decomposed_second_stack[0]) ), weights=weights, ) assert precip_decomposed_blended.shape == precip_decomposed_first_stack[0].shape ### # Blend the optical flow fields ### v_nwp_blended = blend_optical_flows( flows=np.stack((v_nwp_first[1], v_nwp_second[1])), weights=weights[:, 1] ) assert v_nwp_blended.shape == v_nwp_first[1].shape assert_array_almost_equal( v_nwp_blended, (oflow_method(precip_nwp[0:2, :, :]) + oflow_method(precip_nwp[3:5, :, :])) / 2, decimal=3, err_msg="Blended motion field does not equal average of the two motion fields", ) ### # Recompose the fields (the non-blended fields are used for this here) ### precip_recomposed_first = recompose_cascade( combined_cascade=precip_decomposed_first_stack[0], combined_mean=mu_first_stack[0], combined_sigma=sigma_first_stack[0], ) precip_recomposed_second = recompose_cascade( combined_cascade=precip_decomposed_second_stack[0], combined_mean=mu_second_stack[0], combined_sigma=sigma_second_stack[0], ) assert_array_almost_equal( precip_recomposed_first, precip_nwp[0, :, :], decimal=3, err_msg="Recomposed field of first forecast does not equal original field", ) assert_array_almost_equal( precip_recomposed_second, precip_nwp[3, :, :], decimal=3, err_msg="Recomposed field of second forecast does not equal original field", ) precip_arr = precip_nwp # rainy fraction is 0.005847 assert not check_norain(precip_arr, win_fun=None) assert not check_norain( precip_arr, precip_thr=nwp_metadata["threshold"], win_fun=None ) assert not check_norain( precip_arr, precip_thr=nwp_metadata["threshold"], norain_thr=0.005, win_fun=None ) assert not check_norain(precip_arr, norain_thr=0.005, win_fun=None) # so with norain_thr beyond this number it should report that there's no rain assert check_norain(precip_arr, norain_thr=0.006, win_fun=None) assert check_norain( precip_arr, precip_thr=nwp_metadata["threshold"], norain_thr=0.006, win_fun=None ) # also if we set the precipitation threshold sufficiently high, it should report there's no rain # rainy fraction > 4mm/h is 0.004385 assert not check_norain(precip_arr, precip_thr=4.0, norain_thr=0.004, win_fun=None) assert check_norain(precip_arr, precip_thr=4.0, norain_thr=0.005, win_fun=None) # no rain above 100mm/h so it should give norain assert check_norain(precip_arr, precip_thr=100, win_fun=None) # should always give norain if the threshold is set to 100% assert check_norain(precip_arr, norain_thr=1.0, win_fun=None) # Finally, also test the compute_smooth_dilated mask functionality @pytest.mark.parametrize(smoothing_arg_names, smoothing_arg_values) def test_blending_smoothing_utils( precip_nwp, max_padding_size_in_px, gaussian_kernel_size, inverted, non_linear_growth_kernel_sizes, ): # First add some nans to indicate a mask precip_nwp[:, 0:100, 0:100] = np.nan nan_indices = np.isnan(precip_nwp[0]) new_mask = compute_smooth_dilated_mask( nan_indices, max_padding_size_in_px=max_padding_size_in_px, gaussian_kernel_size=gaussian_kernel_size, inverted=inverted, non_linear_growth_kernel_sizes=non_linear_growth_kernel_sizes, ) assert new_mask.shape == nan_indices.shape if max_padding_size_in_px > 0 and inverted == False: assert np.sum((new_mask > 0) & (new_mask < 1)) > 0 ================================================ FILE: pysteps/tests/test_cascade.py ================================================ # -*- coding: utf-8 -*- import os import numpy as np import pytest from numpy.testing import assert_array_almost_equal import pysteps from pysteps import nowcasts from pysteps.cascade.bandpass_filters import filter_gaussian from pysteps.cascade.bandpass_filters import filter_uniform from pysteps.cascade.decomposition import decomposition_fft, recompose_fft from pysteps.tests.helpers import smart_assert def test_decompose_recompose(): """Tests cascade decomposition.""" pytest.importorskip("netCDF4") root_path = pysteps.rcparams.data_sources["bom"]["root_path"] rel_path = os.path.join("prcp-cscn", "2", "2018", "06", "16") filename = os.path.join(root_path, rel_path, "2_20180616_120000.prcp-cscn.nc") precip, _, metadata = pysteps.io.import_bom_rf3(filename) # Convert to rain rate from mm precip, metadata = pysteps.utils.to_rainrate(precip, metadata) # Log-transform the data precip, metadata = pysteps.utils.dB_transform( precip, metadata, threshold=0.1, zerovalue=-15.0 ) # Set Nans as the fill value precip[~np.isfinite(precip)] = metadata["zerovalue"] # Set number of cascade levels num_cascade_levels = 9 # Construct the Gaussian bandpass filters _filter = filter_gaussian(precip.shape, num_cascade_levels) # Decompose precip decomp = decomposition_fft(precip, _filter) # Recomposed precip from decomp recomposed = recompose_fft(decomp) # Assert assert_array_almost_equal(recomposed.squeeze(), precip) test_metadata_filter = [ ("central_freqs", None, None), ("central_wavenumbers", None, None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_metadata_filter) def test_filter_uniform(variable, expected, tolerance): _filter = filter_uniform((8, 8), 1) smart_assert(_filter[variable], expected, tolerance) def test_filter_uniform_weights_1d(): _filter = filter_uniform((8, 8), 1) assert_array_almost_equal(_filter["weights_1d"], np.ones((1, 5))) def test_filter_uniform_weights_2d(): _filter = filter_uniform((8, 8), 1) assert_array_almost_equal(_filter["weights_2d"], np.ones((1, 8, 5))) ================================================ FILE: pysteps/tests/test_datasets.py ================================================ # -*- coding: utf-8 -*- import os from tempfile import TemporaryDirectory import pytest from _pytest.outcomes import Skipped import pysteps from pysteps.datasets import ( download_pysteps_data, create_default_pystepsrc, load_dataset, ) from pysteps.exceptions import DirectoryNotEmpty _datasets_opt_deps = dict( fmi=["pyproj"], mch=["PIL"], bom=["netCDF4"], knmi=["h5py"], opera=["h5py"], mrms=["pygrib"], ) @pytest.mark.parametrize("case_name", _datasets_opt_deps.keys()) def test_load_dataset(case_name): """Test the load dataset function.""" with pytest.raises(ValueError): load_dataset(frames=100) for mod_name in _datasets_opt_deps[case_name]: pytest.importorskip(mod_name) try: load_dataset(case=case_name, frames=1) except Skipped: pass def _test_download_data(): """Test the example data installers.""" temp_dir = TemporaryDirectory() try: download_pysteps_data(temp_dir.name, force=True) with pytest.raises(DirectoryNotEmpty): download_pysteps_data(temp_dir.name, force=False) params_file = create_default_pystepsrc(temp_dir.name, config_dir=temp_dir.name) pysteps.load_config_file(params_file) finally: temp_dir.cleanup() pysteps.load_config_file() def _default_path(): """ Default pystepsrc path.""" home_dir = os.path.expanduser("~") if os.name == "nt": subdir = "pysteps" else: subdir = ".pysteps" return os.path.join(home_dir, subdir, "pystepsrc") test_params_paths = [ (None, "pystepsrc", _default_path()), ("/root/path", "pystepsrc", "/root/path/pystepsrc"), ("/root/path", "pystepsrc2", "/root/path/pystepsrc2"), ("relative/path", "pystepsrc2", "relative/path/pystepsrc2"), ("relative/path", "pystepsrc", "relative/path/pystepsrc"), ] @pytest.mark.parametrize("config_dir, file_name, expected_path", test_params_paths) def test_params_file_creation_path(config_dir, file_name, expected_path): """Test that the default pysteps parameters file is created in the right place.""" # For windows compatibility if config_dir is not None: config_dir = os.path.normpath(config_dir) expected_path = os.path.normpath(expected_path) pysteps_data_dir = "dummy/path/to/data" params_file_path = create_default_pystepsrc( pysteps_data_dir, config_dir=config_dir, file_name=file_name, dryrun=True ) assert expected_path == params_file_path ================================================ FILE: pysteps/tests/test_decorators.py ================================================ # -*- coding: utf-8 -*- import time from pysteps.decorators import memoize def test_memoize(): @memoize(maxsize=1) def _slow_function(x, **kwargs): time.sleep(1) return x for i in range(2): out = _slow_function(i, hkey=i) assert out == i # cached result t0 = time.monotonic() out = _slow_function(1, hkey=1) assert time.monotonic() - t0 < 1 assert out == 1 # maxsize exceeded t0 = time.monotonic() out = _slow_function(0, hkey=0) assert time.monotonic() - t0 >= 1 assert out == 0 # no hash t0 = time.monotonic() out = _slow_function(1) assert time.monotonic() - t0 >= 1 assert out == 1 ================================================ FILE: pysteps/tests/test_downscaling_rainfarm.py ================================================ # -*- coding: utf-8 -*- import pytest import numpy as np from pysteps import downscaling from pysteps.tests.helpers import get_precipitation_fields from pysteps.utils import aggregate_fields_space, square_domain, aggregate_fields @pytest.fixture(scope="module") def data(): precip, metadata = get_precipitation_fields( num_prev_files=0, num_next_files=0, return_raw=False, metadata=True ) precip = precip.filled() precip, metadata = square_domain(precip, metadata, "crop") return precip, metadata rainfarm_arg_names = ( "alpha", "ds_factor", "threshold", "return_alpha", "spectral_fusion", "kernel_type", ) rainfarm_arg_values = [ (1.0, 1, 0, False, False, None), (1, 2, 0, False, False, "gaussian"), (1, 4, 0, False, False, "tophat"), (1, 4, 0, False, True, "uniform"), ] @pytest.mark.parametrize(rainfarm_arg_names, rainfarm_arg_values) def test_rainfarm_shape( data, alpha, ds_factor, threshold, return_alpha, spectral_fusion, kernel_type, ): """Test that the output of rainfarm is consistent with the downscaling factor.""" precip, metadata = data window = metadata["xpixelsize"] * ds_factor precip_lr, __ = aggregate_fields_space(precip, metadata, window) rainfarm = downscaling.get_method("rainfarm") precip_hr = rainfarm( precip_lr, alpha=alpha, ds_factor=ds_factor, threshold=threshold, return_alpha=return_alpha, spectral_fusion=spectral_fusion, kernel_type=kernel_type, ) assert precip_hr.ndim == precip.ndim assert precip_hr.shape[0] == precip.shape[0] assert precip_hr.shape[1] == precip.shape[1] rainfarm_arg_values = [ (1.0, 1, 0, False, False, None), (1, 2, 0, False, False, None), (1, 4, 0, False, False, None), (1, 4, 0, False, True, None), ] @pytest.mark.parametrize(rainfarm_arg_names, rainfarm_arg_values) def test_rainfarm_aggregate( data, alpha, ds_factor, threshold, return_alpha, spectral_fusion, kernel_type, ): """Test that the output of rainfarm is equal to original when aggregated.""" precip, metadata = data window = metadata["xpixelsize"] * ds_factor precip_lr, __ = aggregate_fields_space(precip, metadata, window) rainfarm = downscaling.get_method("rainfarm") precip_hr = rainfarm( precip_lr, alpha=alpha, ds_factor=ds_factor, threshold=threshold, return_alpha=return_alpha, spectral_fusion=spectral_fusion, kernel_type=kernel_type, ) precip_low = aggregate_fields(precip_hr, ds_factor, axis=(0, 1)) precip_lr[precip_lr < threshold] = 0.0 np.testing.assert_array_almost_equal(precip_lr, precip_low) rainfarm_arg_values = [(1.0, 2, 0, True, False, None), (None, 2, 0, True, True, None)] @pytest.mark.parametrize(rainfarm_arg_names, rainfarm_arg_values) def test_rainfarm_alpha( data, alpha, ds_factor, threshold, return_alpha, spectral_fusion, kernel_type, ): """Test that rainfarm computes and returns alpha.""" precip, metadata = data window = metadata["xpixelsize"] * ds_factor precip_lr, __ = aggregate_fields_space(precip, metadata, window) rainfarm = downscaling.get_method("rainfarm") precip_hr = rainfarm( precip_lr, alpha=alpha, ds_factor=ds_factor, threshold=threshold, return_alpha=return_alpha, spectral_fusion=spectral_fusion, kernel_type=kernel_type, ) assert len(precip_hr) == 2 if alpha is None: assert not precip_hr[1] == alpha else: assert precip_hr[1] == alpha ================================================ FILE: pysteps/tests/test_ensscores.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.tests.helpers import get_precipitation_fields from pysteps.verification import ensscores precip = get_precipitation_fields(num_next_files=10, return_raw=True) np.random.seed(42) # rankhist test_data = [ (precip[:10], precip[-1], None, True, 11), (precip[:10], precip[-1], None, False, 11), ] @pytest.mark.parametrize("X_f, X_o, X_min, normalize, expected", test_data) def test_rankhist_size(X_f, X_o, X_min, normalize, expected): """Test the rankhist.""" assert_array_almost_equal( ensscores.rankhist(X_f, X_o, X_min, normalize).size, expected ) # ensemble_skill test_data = [ ( precip[:10], precip[-1], "RMSE", {"axis": None, "conditioning": "single"}, 0.26054151, ), (precip[:10], precip[-1], "CSI", {"thr": 1.0, "axis": None}, 0.22017924), (precip[:10], precip[-1], "FSS", {"thr": 1.0, "scale": 10}, 0.63239752), ] @pytest.mark.parametrize("X_f, X_o, metric, kwargs, expected", test_data) def test_ensemble_skill(X_f, X_o, metric, kwargs, expected): """Test the ensemble_skill.""" assert_array_almost_equal( ensscores.ensemble_skill(X_f, X_o, metric, **kwargs), expected ) # ensemble_spread test_data = [ (precip, "RMSE", {"axis": None, "conditioning": "single"}, 0.22635757), (precip, "CSI", {"thr": 1.0, "axis": None}, 0.25218158), (precip, "FSS", {"thr": 1.0, "scale": 10}, 0.70235667), ] @pytest.mark.parametrize("X_f, metric, kwargs, expected", test_data) def test_ensemble_spread(X_f, metric, kwargs, expected): """Test the ensemble_spread.""" assert_array_almost_equal( ensscores.ensemble_spread(X_f, metric, **kwargs), expected ) ================================================ FILE: pysteps/tests/test_exporters.py ================================================ # -*- coding: utf-8 -*- import os import tempfile from datetime import datetime import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.io import import_netcdf_pysteps from pysteps.io.exporters import _get_geotiff_filename from pysteps.io.exporters import close_forecast_files from pysteps.io.exporters import export_forecast_dataset from pysteps.io.exporters import initialize_forecast_exporter_netcdf from pysteps.io.exporters import _convert_proj4_to_grid_mapping from pysteps.tests.helpers import get_precipitation_fields, get_invalid_mask # Test arguments exporter_arg_names = ( "n_ens_members", "incremental", "datatype", "fill_value", "scale_factor", "offset", "n_timesteps", ) exporter_arg_values = [ (1, None, np.float32, None, None, None, 3), (1, "timestep", np.float32, 65535, None, None, 3), (2, None, np.float32, 65535, None, None, 3), (2, None, np.float32, 65535, None, None, [1, 2, 4]), (2, "timestep", np.float32, None, None, None, 3), (2, "timestep", np.float32, None, None, None, [1, 2, 4]), (2, "member", np.float64, None, 0.01, 1.0, 3), ] def test_get_geotiff_filename(): """Test the geotif name generator.""" start_date = datetime.strptime("201909082022", "%Y%m%d%H%M") n_timesteps = 50 timestep = 5 for timestep_index in range(n_timesteps): file_name = _get_geotiff_filename( "test/path", start_date, n_timesteps, timestep, timestep_index ) expected = ( f"test/path_201909082022_" f"{(timestep_index + 1) * timestep:03d}.tif" ) assert expected == file_name @pytest.mark.parametrize(exporter_arg_names, exporter_arg_values) def test_io_export_netcdf_one_member_one_time_step( n_ens_members, incremental, datatype, fill_value, scale_factor, offset, n_timesteps ): """ Test the export netcdf. Also, test that the exported file can be read by the importer. """ pytest.importorskip("pyproj") precip, metadata = get_precipitation_fields( num_prev_files=2, return_raw=True, metadata=True, source="fmi" ) invalid_mask = get_invalid_mask(precip) with tempfile.TemporaryDirectory() as outpath: # save it back to disk outfnprefix = "test_netcdf_out" file_path = os.path.join(outpath, outfnprefix + ".nc") startdate = metadata["timestamps"][0] timestep = metadata["accutime"] shape = precip.shape[1:] exporter = initialize_forecast_exporter_netcdf( outpath, outfnprefix, startdate, timestep, n_timesteps, shape, metadata, n_ens_members=n_ens_members, datatype=datatype, incremental=incremental, fill_value=fill_value, scale_factor=scale_factor, offset=offset, ) if n_ens_members > 1: precip = np.repeat(precip[np.newaxis, :, :, :], n_ens_members, axis=0) if incremental == None: export_forecast_dataset(precip, exporter) if incremental == "timestep": if isinstance(n_timesteps, list): timesteps = len(n_timesteps) else: timesteps = n_timesteps for t in range(timesteps): if n_ens_members > 1: export_forecast_dataset(precip[:, t, :, :], exporter) else: export_forecast_dataset(precip[t, :, :], exporter) if incremental == "member": for ens_mem in range(n_ens_members): export_forecast_dataset(precip[ens_mem, :, :, :], exporter) close_forecast_files(exporter) # assert if netcdf file was saved and file size is not zero assert os.path.exists(file_path) and os.path.getsize(file_path) > 0 # Test that the file can be read by the nowcast_importer output_file_path = os.path.join(outpath, f"{outfnprefix}.nc") precip_new, _ = import_netcdf_pysteps(output_file_path) assert_array_almost_equal(precip.squeeze(), precip_new.data) assert precip_new.dtype == "single" precip_new, _ = import_netcdf_pysteps(output_file_path, dtype="double") assert_array_almost_equal(precip.squeeze(), precip_new.data) assert precip_new.dtype == "double" precip_new, _ = import_netcdf_pysteps(output_file_path, fillna=-1000) new_invalid_mask = precip_new == -1000 assert (new_invalid_mask == invalid_mask).all() @pytest.mark.parametrize( ["proj4str", "expected_value"], [ ( "+proj=lcc +lat_1=49.83333333333334 +lat_2=51.16666666666666 +lat_0=50.797815 +lon_0=4.359215833333333 +x_0=649328 +y_0=665262 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs ", ( "lcc", "lambert_conformal_conic", { "false_easting": 649328.0, "false_northing": 665262.0, "longitude_of_central_meridian": 4.359215833333333, "latitude_of_projection_origin": 50.797815, "standard_parallel": (49.83333333333334, 51.16666666666666), "reference_ellipsoid_name": "GRS80", "towgs84": "0,0,0,0,0,0,0", }, ), ), ( "+proj=aea +lat_0=-37.852 +lon_0=144.752 +lat_1=-18.0 +lat_2=-36.0 +a=6378.137 +b=6356.752 +x_0=0 +y_0=0", ( "proj", "albers_conical_equal_area", { "false_easting": 0.0, "false_northing": 0.0, "longitude_of_central_meridian": 144.752, "latitude_of_projection_origin": -37.852, "standard_parallel": (-18.0, -36.0), }, ), ), ( "+proj=stere +lat_0=90 +lon_0=0.0 +lat_ts=60.0 +a=6378.137 +b=6356.752 +x_0=0 +y_0=0", ( "polar_stereographic", "polar_stereographic", { "straight_vertical_longitude_from_pole": 0.0, "latitude_of_projection_origin": 90.0, "standard_parallel": 60.0, "false_easting": 0.0, "false_northing": 0.0, }, ), ), ], ) def test_convert_proj4_to_grid_mapping(proj4str, expected_value): """ test the grid mapping in function _convert_proj4_to_grid_mapping() """ output = _convert_proj4_to_grid_mapping(proj4str) assert output == expected_value ================================================ FILE: pysteps/tests/test_extrapolation_semilagrangian.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.extrapolation.semilagrangian import extrapolate def test_semilagrangian(): """Test semilagrangian extrapolation with number of timesteps.""" # inputs precip = np.zeros((8, 8)) precip[0, 0] = 1 v = np.ones((8, 8)) velocity = np.stack([v, v]) num_timesteps = 1 # expected expected = np.zeros((1, 8, 8)) expected[:, :, 0] = np.nan expected[:, 0, :] = np.nan expected[:, 1, 1] = 1 # result result = extrapolate(precip, velocity, num_timesteps) assert_array_almost_equal(result, expected) def test_wrong_input_dimensions(): p_1d = np.ones(8) p_2d = np.ones((8, 8)) p_3d = np.ones((8, 8, 2)) v_2d = np.ones((8, 8)) v_3d = np.stack([v_2d, v_2d]) num_timesteps = 1 invalid_inputs = [ (p_1d, v_3d), (p_2d, v_2d), (p_3d, v_2d), (p_3d, v_3d), ] for precip, velocity in invalid_inputs: with pytest.raises(ValueError): extrapolate(precip, velocity, num_timesteps) def test_ascending_time_step(): precip = np.ones((8, 8)) v = np.ones((8, 8)) velocity = np.stack([v, v]) not_ascending_timesteps = [1, 2, 3, 5, 4, 6, 7] with pytest.raises(ValueError): extrapolate(precip, velocity, not_ascending_timesteps) def test_semilagrangian_timesteps(): """Test semilagrangian extrapolation with list of timesteps.""" # inputs precip = np.zeros((8, 8)) precip[0, 0] = 1 v = np.ones((8, 8)) * 10 velocity = np.stack([v, v]) timesteps = [0.1] # expected expected = np.zeros((1, 8, 8)) expected[:, :, 0] = np.nan expected[:, 0, :] = np.nan expected[:, 1, 1] = 1 # result result = extrapolate(precip, velocity, timesteps) assert_array_almost_equal(result, expected) ================================================ FILE: pysteps/tests/test_feature.py ================================================ import pytest import numpy as np from pysteps import feature from pysteps.tests.helpers import get_precipitation_fields arg_names = ["method", "max_num_features"] arg_values = [("blob", None), ("blob", 5), ("shitomasi", None), ("shitomasi", 5)] @pytest.mark.parametrize(arg_names, arg_values) def test_feature(method, max_num_features): if method == "blob": pytest.importorskip("skimage") if method == "shitomasi": pytest.importorskip("cv2") input_field = get_precipitation_fields( num_prev_files=0, num_next_files=0, return_raw=True, metadata=False, upscale=None, source="mch", ) detector = feature.get_method(method) kwargs = {"max_num_features": max_num_features} output = detector(input_field.squeeze(), **kwargs) assert isinstance(output, np.ndarray) assert output.ndim == 2 assert output.shape[0] > 0 if max_num_features is not None: assert output.shape[0] <= max_num_features assert output.shape[1] == 2 ================================================ FILE: pysteps/tests/test_feature_tstorm.py ================================================ import numpy as np import pytest from pysteps.feature.tstorm import detection from pysteps.utils import to_reflectivity from pysteps.tests.helpers import get_precipitation_fields try: from pandas import DataFrame except ModuleNotFoundError: pass arg_names = ( "source", "output_feat", "dry_input", "max_num_features", "output_split_merge", ) arg_values = [ ("mch", False, False, None, False), ("mch", False, False, 5, False), ("mch", True, False, None, False), ("mch", True, False, 5, False), ("mch", False, True, None, False), ("mch", False, True, 5, False), ("mch", False, False, None, True), ] @pytest.mark.parametrize(arg_names, arg_values) def test_feature_tstorm_detection( source, output_feat, dry_input, max_num_features, output_split_merge ): pytest.importorskip("skimage") pytest.importorskip("pandas") if not dry_input: input, metadata = get_precipitation_fields(0, 0, True, True, None, source) input = input.squeeze() input, __ = to_reflectivity(input, metadata) else: input = np.zeros((50, 50)) time = "000" output = detection( input, time=time, output_feat=output_feat, max_num_features=max_num_features, output_splits_merges=output_split_merge, ) if output_feat: assert isinstance(output, np.ndarray) assert output.ndim == 2 assert output.shape[1] == 2 if max_num_features is not None: assert output.shape[0] <= max_num_features elif output_split_merge: assert isinstance(output, tuple) assert len(output) == 2 assert isinstance(output[0], DataFrame) assert isinstance(output[1], np.ndarray) if max_num_features is not None: assert output[0].shape[0] <= max_num_features assert output[0].shape[1] == 15 assert list(output[0].columns) == [ "ID", "time", "x", "y", "cen_x", "cen_y", "max_ref", "cont", "area", "splitted", "split_IDs", "merged", "merged_IDs", "results_from_split", "will_merge", ] assert (output[0].time == time).all() assert output[1].ndim == 2 assert output[1].shape == input.shape if not dry_input: assert output[0].shape[0] > 0 assert sorted(list(output[0].ID)) == sorted(list(np.unique(output[1]))[1:]) else: assert output[0].shape[0] == 0 assert output[1].sum() == 0 else: assert isinstance(output, tuple) assert len(output) == 2 assert isinstance(output[0], DataFrame) assert isinstance(output[1], np.ndarray) if max_num_features is not None: assert output[0].shape[0] <= max_num_features assert output[0].shape[1] == 9 assert list(output[0].columns) == [ "ID", "time", "x", "y", "cen_x", "cen_y", "max_ref", "cont", "area", ] assert (output[0].time == time).all() assert output[1].ndim == 2 assert output[1].shape == input.shape if not dry_input: assert output[0].shape[0] > 0 assert sorted(list(output[0].ID)) == sorted(list(np.unique(output[1]))[1:]) else: assert output[0].shape[0] == 0 assert output[1].sum() == 0 ================================================ FILE: pysteps/tests/test_importer_decorator.py ================================================ # -*- coding: utf-8 -*- from functools import partial import numpy as np import pytest from pysteps.tests.helpers import get_precipitation_fields default_dtypes = dict( fmi="double", knmi="double", mch="double", opera="double", saf="double", mrms="single", ) @pytest.mark.parametrize("source, default_dtype", default_dtypes.items()) def test_postprocess_import_decorator(source, default_dtype): """Test the postprocessing decorator for the importers.""" import_data = partial(get_precipitation_fields, return_raw=True, source=source) precip = import_data() invalid_mask = ~np.isfinite(precip) assert precip.dtype == default_dtype if default_dtype == "single": dtype = "double" else: dtype = "single" precip = import_data(dtype=dtype) assert precip.dtype == dtype # Test that invalid types are handled correctly for dtype in ["int", "int64"]: with pytest.raises(ValueError): _ = import_data(dtype=dtype) precip = import_data(fillna=-1000) new_invalid_mask = precip == -1000 assert (new_invalid_mask == invalid_mask).all() ================================================ FILE: pysteps/tests/test_interfaces.py ================================================ # -*- coding: utf-8 -*- import numpy import pytest import pysteps def _generic_interface_test(method_getter, valid_names_func_pair, invalid_names): for name, expected_function in valid_names_func_pair: error_message = "Error getting '{}' function.".format(name) assert method_getter(name) == expected_function, error_message if isinstance(name, str): assert method_getter(name.upper()) == expected_function, error_message # test invalid names for invalid_name in invalid_names: with pytest.raises(ValueError): method_getter(invalid_name) def test_nowcasts_interface(): """Test the nowcasts module interface.""" from pysteps.blending import ( linear_blending, steps, ) method_getter = pysteps.nowcasts.interface.get_method valid_names_func_pair = [ ("linear_blending", linear_blending.forecast), ("steps", steps.forecast), ] invalid_names = ["linear", "step", "blending", "pysteps"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) def test_cascade_interface(): """Test the cascade module interface.""" from pysteps.cascade import decomposition, bandpass_filters method_getter = pysteps.cascade.interface.get_method valid_names_func_pair = [ ("fft", (decomposition.decomposition_fft, decomposition.recompose_fft)), ("gaussian", bandpass_filters.filter_gaussian), ("uniform", bandpass_filters.filter_uniform), ] invalid_names = ["gauss", "fourier"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) def test_extrapolation_interface(): """Test the extrapolation module interface.""" from pysteps import extrapolation from pysteps.extrapolation import semilagrangian from pysteps.extrapolation.interface import eulerian_persistence as eulerian from pysteps.extrapolation.interface import _do_nothing as do_nothing method_getter = extrapolation.interface.get_method valid_returned_objs = dict() valid_returned_objs["semilagrangian"] = semilagrangian.extrapolate valid_returned_objs["eulerian"] = eulerian valid_returned_objs[None] = do_nothing valid_returned_objs["None"] = do_nothing valid_names_func_pair = list(valid_returned_objs.items()) invalid_names = ["euler", "LAGRANGIAN"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) # Test eulerian persistence method precip = numpy.random.rand(100, 100) velocity = numpy.random.rand(100, 100) num_timesteps = 10 for name in ["eulerian", "EULERIAN"]: forecaster = method_getter(name) forecast = forecaster(precip, velocity, num_timesteps) for i in range(num_timesteps): assert numpy.all(forecast[i] == precip) forecaster = method_getter(None) assert forecaster(precip, velocity, num_timesteps) is None def test_io_interface(): """Test the io module interface.""" from pysteps.io import import_bom_rf3 from pysteps.io import import_fmi_geotiff from pysteps.io import import_fmi_pgm from pysteps.io import import_knmi_hdf5 from pysteps.io import import_mch_gif from pysteps.io import import_mch_hdf5 from pysteps.io import import_mch_metranet from pysteps.io import import_mrms_grib from pysteps.io import import_opera_hdf5 from pysteps.io import import_saf_crri from pysteps.io import initialize_forecast_exporter_geotiff from pysteps.io import initialize_forecast_exporter_kineros from pysteps.io import initialize_forecast_exporter_netcdf # Test importers valid_names_func_pair = [ ("bom_rf3", import_bom_rf3), ("fmi_geotiff", import_fmi_geotiff), ("fmi_pgm", import_fmi_pgm), ("knmi_hdf5", import_knmi_hdf5), ("mch_gif", import_mch_gif), ("mch_hdf5", import_mch_hdf5), ("mch_metranet", import_mch_metranet), ("mrms_grib", import_mrms_grib), ("opera_hdf5", import_opera_hdf5), ("saf_crri", import_saf_crri), ] def method_getter(name): return pysteps.io.interface.get_method(name, "importer") invalid_names = ["bom", "fmi", "knmi", "mch", "mrms", "opera", "saf"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) # Test exporters def method_getter(name): return pysteps.io.interface.get_method(name, "exporter") valid_names_func_pair = [ ("geotiff", initialize_forecast_exporter_geotiff), ("kineros", initialize_forecast_exporter_kineros), ("netcdf", initialize_forecast_exporter_netcdf), ] invalid_names = ["hdf"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) # Test for invalid argument type with pytest.raises(TypeError): pysteps.io.interface.get_method("mch_gif", None) pysteps.io.interface.get_method(None, "importer") # Test for invalid method types with pytest.raises(ValueError): pysteps.io.interface.get_method("mch_gif", "io") def test_postprocessing_interface(): """Test the postprocessing module interface.""" # ensemblestats pre-installed methods from pysteps.postprocessing import mean, excprob, banddepth # Test ensemblestats valid_names_func_pair = [ ("mean", mean), ("excprob", excprob), ("banddepth", banddepth), ] # Test for exisiting functions with pytest.warns(RuntimeWarning): pysteps.postprocessing.interface.add_postprocessor( "excprob", "ensemblestat_excprob", "ensemblestats", [tup[0] for tup in valid_names_func_pair], ) # Test get method for valid and invalid names def method_getter(name): return pysteps.postprocessing.interface.get_method(name, "ensemblestats") invalid_names = [ "ensemblestat_mean", "ensemblestat_excprob", "ensemblestat_banddepth", ] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) # Test diagnostics def method_getter(name): return pysteps.postprocessing.interface.get_method(name, "diagnostics") valid_names_func_pair = [] invalid_names = ["unknown"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) # Test for invalid argument type with pytest.raises(TypeError): pysteps.postprocessing.interface.get_method("mean", None) with pytest.raises(TypeError): pysteps.postprocessing.interface.get_method(None, "ensemblestats") # Test for invalid method types with pytest.raises(ValueError): pysteps.postprocessing.interface.get_method("mean", "forecast") # Test print pysteps.postprocessing.postprocessors_info() def test_motion_interface(): """Test the motion module interface.""" from pysteps.motion.constant import constant from pysteps.motion.darts import DARTS from pysteps.motion.lucaskanade import dense_lucaskanade from pysteps.motion.proesmans import proesmans from pysteps.motion.vet import vet method_getter = pysteps.motion.interface.get_method valid_names_func_pair = [ ("constant", constant), ("darts", DARTS), ("lk", dense_lucaskanade), ("lucaskanade", dense_lucaskanade), ("proesmans", proesmans), ("vet", vet), ] invalid_names = ["dart", "pyvet", "lukascanade", "lucas-kanade", "no_method"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) # Test default dummy method precip_field = method_getter(None)(numpy.random.random([2, 300, 500])) assert numpy.max(numpy.abs(precip_field)) == pytest.approx(0) # test not implemented names for name in ["brox", "clg"]: with pytest.raises(NotImplementedError): method_getter(name) # last s missing def test_noise_interface(): """Test the noise module interface.""" from pysteps.noise.fftgenerators import ( initialize_param_2d_fft_filter, generate_noise_2d_fft_filter, initialize_nonparam_2d_fft_filter, initialize_nonparam_2d_ssft_filter, generate_noise_2d_ssft_filter, initialize_nonparam_2d_nested_filter, ) from pysteps.noise.motion import initialize_bps, generate_bps method_getter = pysteps.noise.interface.get_method valid_names_func_pair = [ ("parametric", (initialize_param_2d_fft_filter, generate_noise_2d_fft_filter)), ( "nonparametric", (initialize_nonparam_2d_fft_filter, generate_noise_2d_fft_filter), ), ("ssft", (initialize_nonparam_2d_ssft_filter, generate_noise_2d_ssft_filter)), ( "nested", (initialize_nonparam_2d_nested_filter, generate_noise_2d_ssft_filter), ), ("bps", (initialize_bps, generate_bps)), ] invalid_names = ["nest", "sft", "ssfft"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) def test_nowcasts_interface(): """Test the nowcasts module interface.""" from pysteps.nowcasts import ( anvil, extrapolation, lagrangian_probability, linda, sprog, steps, sseps, ) method_getter = pysteps.nowcasts.interface.get_method valid_names_func_pair = [ ("anvil", anvil.forecast), ("extrapolation", extrapolation.forecast), ("lagrangian", extrapolation.forecast), ("linda", linda.forecast), ("probability", lagrangian_probability.forecast), ("lagrangian_probability", lagrangian_probability.forecast), ("sprog", sprog.forecast), ("sseps", sseps.forecast), ("steps", steps.forecast), ] invalid_names = ["extrap", "step", "s-prog", "pysteps"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) # Test eulerian persistence method precip = numpy.random.rand(100, 100) velocity = numpy.random.rand(100, 100) num_timesteps = 10 for name in ["eulerian", "EULERIAN"]: forecast = method_getter(name)(precip, velocity, num_timesteps) for i in range(num_timesteps): assert numpy.all(forecast[i] == precip) def test_utils_interface(): """Test utils module interface.""" from pysteps.utils import arrays from pysteps.utils import cleansing from pysteps.utils import conversion from pysteps.utils import dimension from pysteps.utils import images from pysteps.utils import interpolate from pysteps.utils import reprojection from pysteps.utils import spectral from pysteps.utils import tapering from pysteps.utils import transformation method_getter = pysteps.utils.interface.get_method valid_names_func_pair = [ ("centred_coord", arrays.compute_centred_coord_array), ("decluster", cleansing.decluster), ("detect_outliers", cleansing.detect_outliers), ("mm/h", conversion.to_rainrate), ("rainrate", conversion.to_rainrate), ("mm", conversion.to_raindepth), ("raindepth", conversion.to_raindepth), ("dbz", conversion.to_reflectivity), ("reflectivity", conversion.to_reflectivity), ("accumulate", dimension.aggregate_fields_time), ("clip", dimension.clip_domain), ("square", dimension.square_domain), ("upscale", dimension.aggregate_fields_space), ("morph_opening", images.morph_opening), ("rbfinterp2d", interpolate.rbfinterp2d), ("reproject_grids", reprojection.reproject_grids), ("rapsd", spectral.rapsd), ("rm_rdisc", spectral.remove_rain_norain_discontinuity), ("compute_mask_window_function", tapering.compute_mask_window_function), ("compute_window_function", tapering.compute_window_function), ("boxcox", transformation.boxcox_transform), ("box-cox", transformation.boxcox_transform), ("db", transformation.dB_transform), ("decibel", transformation.dB_transform), ("log", transformation.boxcox_transform), ("nqt", transformation.NQ_transform), ("sqrt", transformation.sqrt_transform), ] invalid_names = ["random", "invalid"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) def test_downscaling_interface(): """Test the downscaling module interface.""" from pysteps.downscaling import rainfarm method_getter = pysteps.downscaling.interface.get_method valid_names_func_pair = [ ("rainfarm", rainfarm.downscale), ] invalid_names = ["rain-farm", "rainfarms"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) def test_feature_interface(): """Test the feature detection module interface.""" from pysteps.feature import blob from pysteps.feature import tstorm from pysteps.feature import shitomasi method_getter = pysteps.feature.interface.get_method valid_names_func_pair = [ ("blob", blob.detection), ("tstorm", tstorm.detection), ("shitomasi", shitomasi.detection), ] invalid_names = ["blobs", "storm", "shi-tomasi"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) def test_tracking_interface(): """Test the feature tracking module interface.""" from pysteps.tracking import lucaskanade from pysteps.tracking import tdating method_getter = pysteps.tracking.interface.get_method valid_names_func_pair = [ ("lucaskanade", lucaskanade.track_features), ("tdating", tdating.dating), ] invalid_names = ["lucas-kanade", "dating"] _generic_interface_test(method_getter, valid_names_func_pair, invalid_names) ================================================ FILE: pysteps/tests/test_io_archive.py ================================================ from datetime import datetime import pytest import pysteps def test_find_by_date_mch(): pytest.importorskip("PIL") date = datetime.strptime("201505151630", "%Y%m%d%H%M") data_source = pysteps.rcparams.data_sources["mch"] root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] timestep = data_source["timestep"] fns = pysteps.io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=timestep, num_prev_files=1, num_next_files=1, ) assert len(fns) == 2 assert len(fns[0]) == 3 assert len(fns[1]) == 3 assert isinstance(fns[0][0], str) assert isinstance(fns[1][0], datetime) ================================================ FILE: pysteps/tests/test_io_bom_rf3.py ================================================ # -*- coding: utf-8 -*- import os import pytest import pysteps from pysteps.tests.helpers import smart_assert netCDF4 = pytest.importorskip("netCDF4") # Test import_bom_rf3 function expected_proj1 = ( "+proj=aea +lon_0=144.752 +lat_0=-37.852 " "+lat_1=-18.000 +lat_2=-36.000" ) test_metadata_bom = [ ("transform", None, None), ("zerovalue", 0.0, 0.1), ("projection", expected_proj1, None), ("unit", "mm", None), ("accutime", 6, 0.1), ("x1", -128000.0, 0.1), ("x2", 127500.0, 0.1), ("y1", -127500.0, 0.1), ("y2", 128000.0, 0.1), ("xpixelsize", 500.0, 0.1), ("ypixelsize", 500.0, 0.1), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ("institution", "Commonwealth of Australia, Bureau of Meteorology", None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_metadata_bom) def test_io_import_bom_rf3_metadata(variable, expected, tolerance): """Test the importer Bom RF3.""" root_path = pysteps.rcparams.data_sources["bom"]["root_path"] rel_path = os.path.join("prcp-cscn", "2", "2018", "06", "16") filename = os.path.join(root_path, rel_path, "2_20180616_100000.prcp-cscn.nc") precip, _, metadata = pysteps.io.import_bom_rf3(filename) smart_assert(metadata[variable], expected, tolerance) assert precip.shape == (512, 512) # Test _import_bom_rf3_data function def test_io_import_bom_rf3_shape(): """Test the importer Bom RF3.""" root_path = pysteps.rcparams.data_sources["bom"]["root_path"] rel_path = os.path.join("prcp-cscn", "2", "2018", "06", "16") filename = os.path.join(root_path, rel_path, "2_20180616_100000.prcp-cscn.nc") precip, _ = pysteps.io.importers._import_bom_rf3_data(filename) assert precip.shape == (512, 512) # Test _import_bom_rf3_geodata function expected_proj2 = ( "+proj=aea +lon_0=144.752 +lat_0=-37.852 " "+lat_1=-18.000 +lat_2=-36.000" ) # test_geodata: list of (variable,expected,tolerance) tuples test_geodata_bom = [ ("projection", expected_proj2, None), ("unit", "mm", None), ("accutime", 6, 0.1), ("x1", -128000.0, 0.1), ("x2", 127500.0, 0.1), ("y1", -127500.0, 0.1), ("y2", 128000.0, 0.1), ("xpixelsize", 500.0, 0.1), ("ypixelsize", 500.0, 0.1), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ("institution", "Commonwealth of Australia, Bureau of Meteorology", None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_geodata_bom) def test_io_import_bom_rf3_geodata(variable, expected, tolerance): """Test the importer Bom RF3.""" root_path = pysteps.rcparams.data_sources["bom"]["root_path"] rel_path = os.path.join("prcp-cscn", "2", "2018", "06", "16") filename = os.path.join(root_path, rel_path, "2_20180616_100000.prcp-cscn.nc") ds_rainfall = netCDF4.Dataset(filename) geodata = pysteps.io.importers._import_bom_rf3_geodata(ds_rainfall) smart_assert(geodata[variable], expected, tolerance) ds_rainfall.close() ================================================ FILE: pysteps/tests/test_io_dwd_hdf5.py ================================================ # -*- coding: utf-8 -*- import pytest import pysteps from pysteps.tests.helpers import smart_assert, get_precipitation_fields pytest.importorskip("h5py") # Test for RADOLAN RY product precip_ry, metadata_ry = get_precipitation_fields( num_prev_files=0, num_next_files=0, return_raw=False, metadata=True, source="dwd", log_transform=False, importer_kwargs=dict(qty="RATE"), ) def test_io_import_dwd_hdf5_ry_shape(): """Test the importer DWD HDF5.""" assert precip_ry.shape == (1200, 1100) # Test_metadata # Expected projection definition expected_proj = ( "+proj=stere +lat_0=90 +lat_ts=60 " "+lon_0=10 +a=6378137 +b=6356752.3142451802 " "+no_defs +x_0=543196.83521776402 " "+y_0=3622588.8619310018 +units=m" ) # List of (variable,expected,tolerance) tuples test_ry_attrs = [ ("projection", expected_proj, None), ("ll_lon", 3.566994635, 1e-10), ("ll_lat", 45.69642538, 1e-10), ("ur_lon", 18.73161645, 1e-10), ("ur_lat", 55.84543856, 1e-10), ("x1", -500.0, 1e-6), ("y1", -1199500.0, 1e-6), ("x2", 1099500.0, 1e-6), ("y2", 500.0, 1e-6), ("xpixelsize", 1000.0, 1e-10), ("xpixelsize", 1000.0, 1e-10), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ("institution", "ORG:78,CTY:616,CMT:Deutscher Wetterdienst radolan@dwd.de", None), ("accutime", 5.0, 1e-10), ("unit", "mm/h", None), ("transform", None, None), ("zerovalue", 0.0, 1e-6), ("threshold", 0.12, 1e-6), ] @pytest.mark.parametrize("variable, expected, tolerance", test_ry_attrs) def test_io_import_dwd_hdf5_ry_metadata(variable, expected, tolerance): """Test the importer OPERA HDF5.""" smart_assert(metadata_ry[variable], expected, tolerance) ================================================ FILE: pysteps/tests/test_io_fmi_geotiff.py ================================================ import os import pytest import pysteps from pysteps.tests.helpers import smart_assert pytest.importorskip("pyproj") pytest.importorskip("osgeo") root_path = pysteps.rcparams.data_sources["fmi_geotiff"]["root_path"] filename = os.path.join( root_path, "20160928", "201609281600_FINUTM.tif", ) precip, _, metadata = pysteps.io.import_fmi_geotiff(filename) def test_io_import_fmi_geotiff_shape(): """Test the shape of the read file.""" assert precip.shape == (7316, 4963) expected_proj = ( "+proj=utm +zone=35 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs" ) # test_geodata: list of (variable,expected,tolerance) tuples test_geodata = [ ("projection", expected_proj, None), ("x1", -196593.0043142295908183, 1e-10), ("x2", 1044176.9413554778, 1e-10), ("y1", 6255329.6988206729292870, 1e-10), ("y2", 8084432.005259146, 1e-10), ("xpixelsize", 250.0040188736061566, 1e-6), ("ypixelsize", 250.0139839309011904, 1e-6), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_geodata) def test_io_import_fmi_pgm_geodata(variable, expected, tolerance): """Test the GeoTIFF and metadata reading.""" smart_assert(metadata[variable], expected, tolerance) ================================================ FILE: pysteps/tests/test_io_fmi_pgm.py ================================================ import os import pytest import pysteps from pysteps.tests.helpers import smart_assert pytest.importorskip("pyproj") root_path = pysteps.rcparams.data_sources["fmi"]["root_path"] filename = os.path.join( root_path, "20160928", "201609281600_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", ) precip, _, metadata = pysteps.io.import_fmi_pgm(filename, gzipped=True) def test_io_import_fmi_pgm_shape(): """Test the importer FMI PGM.""" assert precip.shape == (1226, 760) expected_proj = ( "+proj=stere +lon_0=25E +lat_0=90N " "+lat_ts=60 +a=6371288 +x_0=380886.310 " "+y_0=3395677.920 +no_defs" ) test_attrs = [ ("projection", expected_proj, None), ("institution", "Finnish Meteorological Institute", None), # ("composite_area", ["FIN"]), # ("projection_name", ["SUOMI1"]), # ("radar", ["LUO", "1", "26.9008", "67.1386"]), # ("obstime", ["201609281600"]), # ("producttype", ["CAPPI"]), # ("productname", ["LOWEST"]), # ("param", ["CorrectedReflectivity"]), # ("metersperpixel_x", ["999.674053"]), # ("metersperpixel_y", ["999.62859"]), # ("projection", ["radar", "{"]), # ("type", ["stereographic"]), # ("centrallongitude", ["25"]), # ("centrallatitude", ["90"]), # ("truelatitude", ["60"]), # ("bottomleft", ["18.600000", "57.930000"]), # ("topright", ["34.903000", "69.005000"]), # ("missingval", 255), ("accutime", 5.0, 0.1), ("unit", "dBZ", None), ("transform", "dB", None), ("zerovalue", -32.0, 0.1), ("threshold", -31.5, 0.1), ("zr_a", 223.0, 0.1), ("zr_b", 1.53, 0.1), ] @pytest.mark.parametrize("variable, expected, tolerance", test_attrs) def test_io_import_mch_gif_dataset_attrs(variable, expected, tolerance): """Test the importer FMI PMG.""" smart_assert(metadata[variable], expected, tolerance) # test_geodata: list of (variable,expected,tolerance) tuples test_geodata = [ ("projection", expected_proj, None), ("x1", 0.0049823258887045085, 1e-20), ("x2", 759752.2852757066, 1e-10), ("y1", 0.009731985162943602, 1e-18), ("y2", 1225544.6588913496, 1e-10), ("xpixelsize", 999.674053, 1e-6), ("ypixelsize", 999.62859, 1e-5), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_geodata) def test_io_import_fmi_pgm_geodata(variable, expected, tolerance): """Test the importer FMI pgm.""" root_path = pysteps.rcparams.data_sources["fmi"]["root_path"] filename = os.path.join( root_path, "20160928", "201609281600_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", ) metadata = pysteps.io.importers._import_fmi_pgm_metadata(filename, gzipped=True) geodata = pysteps.io.importers._import_fmi_pgm_geodata(metadata) smart_assert(geodata[variable], expected, tolerance) ================================================ FILE: pysteps/tests/test_io_knmi_hdf5.py ================================================ # -*- coding: utf-8 -*- import os import pytest import pysteps from pysteps.tests.helpers import smart_assert pytest.importorskip("h5py") root_path = pysteps.rcparams.data_sources["knmi"]["root_path"] filename = os.path.join(root_path, "2010/08", "RAD_NL25_RAP_5min_201008260000.h5") precip, _, metadata = pysteps.io.import_knmi_hdf5(filename) def test_io_import_knmi_hdf5_shape(): """Test the importer KNMI HDF5.""" assert precip.shape == (765, 700) # test_metadata: list of (variable,expected, tolerance) tuples expected_proj = ( "+proj=stere +lat_0=90 +lon_0=0.0 +lat_ts=60.0 +a=6378137 +b=6356752 +x_0=0 +y_0=0" ) # list of (variable,expected,tolerance) tuples test_attrs = [ ("projection", expected_proj, None), ("x1", 0.0, 1e-10), ("y1", -4415038.179210632, 1e-10), ("x2", 699984.2646331593, 1e-10), ("y2", -3649950.360247753, 1e-10), ("xpixelsize", 1000.0, 1e-10), ("xpixelsize", 1000.0, 1e-10), ("cartesian_unit", "m", None), ("accutime", 5.0, 1e-10), ("yorigin", "upper", None), ("unit", "mm", None), ("institution", "KNMI - Royal Netherlands Meteorological Institute", None), ("transform", None, None), ("zerovalue", 0.0, 1e-10), ("threshold", 0.01, 1e-10), ("zr_a", 200.0, None), ("zr_b", 1.6, None), ] @pytest.mark.parametrize("variable,expected,tolerance", test_attrs) def test_io_import_knmi_hdf5_metadata(variable, expected, tolerance): """Test the importer KNMI HDF5.""" smart_assert(metadata[variable], expected, tolerance) ================================================ FILE: pysteps/tests/test_io_mch_gif.py ================================================ # -*- coding: utf-8 -*- import os import pytest import pysteps from pysteps.tests.helpers import smart_assert pytest.importorskip("PIL") root_path = pysteps.rcparams.data_sources["mch"]["root_path"] filename = os.path.join(root_path, "20170131", "AQC170310945F_00005.801.gif") precip, _, metadata = pysteps.io.import_mch_gif(filename, "AQC", "mm", 5.0) def test_io_import_mch_gif_shape(): """Test the importer MCH GIF.""" assert precip.shape == (640, 710) expected_proj = ( "+proj=somerc +lon_0=7.43958333333333 " "+lat_0=46.9524055555556 +k_0=1 " "+x_0=600000 +y_0=200000 +ellps=bessel " "+towgs84=674.374,15.056,405.346,0,0,0,0 " "+units=m +no_defs" ) # list of (variable,expected,tolerance) tuples test_attrs = [ ("projection", expected_proj, None), ("institution", "MeteoSwiss", None), ("accutime", 5.0, 0.1), ("unit", "mm", None), ("transform", None, None), ("zerovalue", 0.0, 0.1), ("threshold", 0.0009628129986471908, 1e-19), ("zr_a", 316.0, 0.1), ("zr_b", 1.5, 0.1), ("x1", 255000.0, 0.1), ("y1", -160000.0, 0.1), ("x2", 965000.0, 0.1), ("y2", 480000.0, 0.1), ("xpixelsize", 1000.0, 0.1), ("ypixelsize", 1000.0, 0.1), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_attrs) def test_io_import_mch_gif_dataset_attrs(variable, expected, tolerance): """Test the importer MCH GIF.""" smart_assert(metadata[variable], expected, tolerance) # test_geodata: list of (variable,expected,tolerance) tuples test_geodata = [ ("projection", expected_proj, None), ("x1", 255000.0, 0.1), ("y1", -160000.0, 0.1), ("x2", 965000.0, 0.1), ("y2", 480000.0, 0.1), ("xpixelsize", 1000.0, 0.1), ("ypixelsize", 1000.0, 0.1), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_geodata) def test_io_import_mch_geodata(variable, expected, tolerance): """Test the importer MCH geodata.""" geodata = pysteps.io.importers._import_mch_geodata() smart_assert(geodata[variable], expected, tolerance) ================================================ FILE: pysteps/tests/test_io_mrms_grib.py ================================================ # -*- coding: utf-8 -*- import os import numpy as np import pytest from numpy.testing import assert_array_almost_equal import pysteps pytest.importorskip("pygrib") def test_io_import_mrms_grib(): """Test the importer for NSSL data.""" root_path = pysteps.rcparams.data_sources["mrms"]["root_path"] filename = os.path.join( root_path, "2019/06/10/", "PrecipRate_00.00_20190610-000000.grib2" ) precip, _, metadata = pysteps.io.import_mrms_grib(filename, fillna=0, window_size=1) assert precip.shape == (3500, 7000) assert precip.dtype == "single" expected_metadata = { "institution": "NOAA National Severe Storms Laboratory", "xpixelsize": 0.01, "ypixelsize": 0.01, "unit": "mm/h", "transform": None, "zerovalue": 0, "projection": "+proj=longlat +ellps=IAU76", "yorigin": "upper", "threshold": 0.1, "x1": -129.99999999999997, "x2": -60.00000199999991, "y1": 20.000001, "y2": 55.00000000000001, "cartesian_unit": "degrees", } for key, value in expected_metadata.items(): if isinstance(value, float): assert_array_almost_equal(metadata[key], expected_metadata[key]) else: assert metadata[key] == expected_metadata[key] x = np.arange(metadata["x1"], metadata["x2"], metadata["xpixelsize"]) y = np.arange(metadata["y1"], metadata["y2"], metadata["ypixelsize"]) assert y.size == precip.shape[0] assert x.size == precip.shape[1] # The full latitude range is (20.005, 54.995) # The full longitude range is (230.005, 299.995) # Test that if the bounding box is larger than the domain, all the points are returned. precip2, _, _ = pysteps.io.import_mrms_grib( filename, fillna=0, extent=(220, 300, 20, 55), window_size=1 ) assert precip2.shape == (3500, 7000) assert_array_almost_equal(precip, precip2) del precip2 # Test that a portion of the domain is returned correctly precip3, _, _ = pysteps.io.import_mrms_grib( filename, fillna=0, extent=(250, 260, 30, 35), window_size=1 ) assert precip3.shape == (500, 1000) assert_array_almost_equal(precip3, precip[2000:2500, 2000:3000]) del precip3 precip4, _, _ = pysteps.io.import_mrms_grib(filename, dtype="double", fillna=0) assert precip4.dtype == "double" del precip4 precip5, _, _ = pysteps.io.import_mrms_grib(filename, dtype="single", fillna=0) assert precip5.dtype == "single" del precip5 ================================================ FILE: pysteps/tests/test_io_nowcast_importers.py ================================================ import numpy as np import pytest from pysteps import io from pysteps.tests.helpers import get_precipitation_fields precip, metadata = get_precipitation_fields( num_prev_files=1, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) @pytest.mark.parametrize( "precip, metadata", [(precip, metadata), (np.zeros_like(precip), metadata)], ) def test_import_netcdf(precip, metadata, tmp_path): pytest.importorskip("pyproj") field_shape = (precip.shape[1], precip.shape[2]) startdate = metadata["timestamps"][-1] timestep = metadata["accutime"] exporter = io.exporters.initialize_forecast_exporter_netcdf( outpath=tmp_path.as_posix(), outfnprefix="test", startdate=startdate, timestep=timestep, n_timesteps=precip.shape[0], shape=field_shape, metadata=metadata, ) io.exporters.export_forecast_dataset(precip, exporter) io.exporters.close_forecast_files(exporter) tmp_file = tmp_path / "test.nc" precip_netcdf, metadata_netcdf = io.import_netcdf_pysteps(tmp_file, dtype="float64") assert isinstance(precip_netcdf, np.ndarray) assert isinstance(metadata_netcdf, dict) assert precip_netcdf.ndim == precip.ndim, "Wrong number of dimensions" assert precip_netcdf.shape[0] == precip.shape[0], "Wrong number of lead times" assert precip_netcdf.shape[1:] == field_shape, "Wrong field shape" assert np.allclose(precip_netcdf, precip) ================================================ FILE: pysteps/tests/test_io_opera_hdf5.py ================================================ # -*- coding: utf-8 -*- import os import pytest import pysteps from pysteps.tests.helpers import smart_assert pytest.importorskip("h5py") # tests for three OPERA products: # Odyssey rain rate composite (production discontinued on October 30th 2024) # CIRRUS max. reflectivity composites # NIMBUS rain rate composites root_path = pysteps.rcparams.data_sources["opera"]["root_path"] filename = os.path.join(root_path, "20180824", "T_PAAH21_C_EUOC_20180824180000.hdf") precip_odyssey, _, metadata_odyssey = pysteps.io.import_opera_hdf5(filename, qty="RATE") filename = os.path.join( root_path, "20241126", "CIRRUS", "T_PABV21_C_EUOC_20241126010000.hdf" ) precip_cirrus, _, metadata_cirrus = pysteps.io.import_opera_hdf5(filename, qty="DBZH") filename = os.path.join( root_path, "20241126", "NIMBUS", "T_PAAH22_C_EUOC_20241126010000.hdf" ) precip_nimbus_rain_rate, _, metadata_nimbus_rain_rate = pysteps.io.import_opera_hdf5( filename, qty="RATE" ) filename = os.path.join( root_path, "20241126", "NIMBUS", "T_PASH22_C_EUOC_20241126010000.hdf" ) precip_nimbus_rain_accum, _, metadata_nimbus_rain_accum = pysteps.io.import_opera_hdf5( filename, qty="ACRR" ) def test_io_import_opera_hdf5_odyssey_shape(): """Test the importer OPERA HDF5.""" assert precip_odyssey.shape == (2200, 1900) def test_io_import_opera_hdf5_cirrus_shape(): """Test the importer OPERA HDF5.""" assert precip_cirrus.shape == (4400, 3800) def test_io_import_opera_hdf5_nimbus_rain_rate_shape(): """Test the importer OPERA HDF5.""" assert precip_nimbus_rain_rate.shape == (2200, 1900) def test_io_import_opera_hdf5_nimbus_rain_accum_shape(): """Test the importer OPERA HDF5.""" assert precip_nimbus_rain_accum.shape == (2200, 1900) # test_metadata: list of (variable,expected, tolerance) tuples expected_proj = ( "+proj=laea +lat_0=55.0 +lon_0=10.0 " "+x_0=1950000.0 " "+y_0=-2100000.0 " "+units=m +ellps=WGS84" ) # list of (variable,expected,tolerance) tuples test_odyssey_attrs = [ ("projection", expected_proj, None), ("ll_lon", -10.434576838640398, 1e-10), ("ll_lat", 31.746215319325056, 1e-10), ("ur_lon", 57.81196475014995, 1e-10), ("ur_lat", 67.62103710275053, 1e-10), ("x1", -0.0004161088727414608, 1e-6), ("y1", -4400000.001057557, 1e-10), ("x2", 3800000.0004256153, 1e-10), ("y2", -0.0004262728616595268, 1e-6), ("xpixelsize", 2000.0, 1e-10), ("xpixelsize", 2000.0, 1e-10), ("cartesian_unit", "m", None), ("accutime", 15.0, 1e-10), ("yorigin", "upper", None), ("unit", "mm/h", None), ("institution", "Odyssey datacentre", None), ("transform", None, None), ("zerovalue", 0.0, 1e-10), ("threshold", 0.01, 1e-10), ] @pytest.mark.parametrize("variable, expected, tolerance", test_odyssey_attrs) def test_io_import_opera_hdf5_odyssey_dataset_attrs(variable, expected, tolerance): """Test the importer OPERA HDF5.""" smart_assert(metadata_odyssey[variable], expected, tolerance) # list of (variable,expected,tolerance) tuples test_cirrus_attrs = [ ("projection", expected_proj, None), ("ll_lon", -10.4345768386404, 1e-10), ("ll_lat", 31.7462153182675, 1e-10), ("ur_lon", 57.8119647501499, 1e-10), ("ur_lat", 67.6210371071631, 1e-10), ("x1", -0.00027143326587975025, 1e-6), ("y1", -4400000.00116988, 1e-10), ("x2", 3800000.0000817003, 1e-10), ("y2", -8.761277422308922e-05, 1e-6), ("xpixelsize", 1000.0, 1e-10), ("ypixelsize", 1000.0, 1e-10), ("cartesian_unit", "m", None), ("accutime", 15.0, 1e-10), ("yorigin", "upper", None), ("unit", "dBZ", None), ("institution", "Odyssey datacentre", None), ("transform", "dB", None), ("zerovalue", -32.0, 1e-10), ("threshold", -31.5, 1e-10), ] @pytest.mark.parametrize("variable, expected, tolerance", test_cirrus_attrs) def test_io_import_opera_hdf5_cirrus_dataset_attrs(variable, expected, tolerance): """Test OPERA HDF5 importer: max. reflectivity composites from CIRRUS.""" smart_assert(metadata_cirrus[variable], expected, tolerance) # list of (variable,expected,tolerance) tuples test_nimbus_rain_rate_attrs = [ ("projection", expected_proj, None), ("ll_lon", -10.434599999137568, 1e-10), ("ll_lat", 31.74619995126678, 1e-10), ("ur_lon", 57.8119032106317, 1e-10), ("ur_lat", 67.62104536996274, 1e-10), ("x1", -2.5302714337594807, 1e-6), ("y1", -4400001.031169886, 1e-10), ("x2", 3799997.4700817037, 1e-10), ("y2", -1.0300876162946224, 1e-6), ("xpixelsize", 2000.0, 1e-10), ("ypixelsize", 2000.0, 1e-10), ("cartesian_unit", "m", None), ("accutime", 15.0, 1e-10), ("yorigin", "upper", None), ("unit", "mm/h", None), ("institution", "Odyssey datacentre", None), ("transform", None, None), ("zerovalue", 0.0, 1e-10), ("threshold", 0.01, 1e-10), ] @pytest.mark.parametrize("variable, expected, tolerance", test_nimbus_rain_rate_attrs) def test_io_import_opera_hdf5_nimbus_rain_rate_dataset_attrs( variable, expected, tolerance ): """Test OPERA HDF5 importer: rain rate composites from NIMBUS.""" smart_assert(metadata_nimbus_rain_rate[variable], expected, tolerance) # list of (variable,expected,tolerance) tuples test_nimbus_rain_accum_attrs = [ ("projection", expected_proj, None), ("ll_lon", -10.434599999137568, 1e-10), ("ll_lat", 31.74619995126678, 1e-10), ("ur_lon", 57.8119032106317, 1e-10), ("ur_lat", 67.62104536996274, 1e-10), ("x1", -2.5302714337594807, 1e-6), ("y1", -4400001.031169886, 1e-10), ("x2", 3799997.4700817037, 1e-10), ("y2", -1.0300876162946224, 1e-6), ("xpixelsize", 2000.0, 1e-10), ("ypixelsize", 2000.0, 1e-10), ("cartesian_unit", "m", None), ("accutime", 15.0, 1e-10), ("yorigin", "upper", None), ("unit", "mm", None), ("institution", "Odyssey datacentre", None), ("transform", None, None), ("zerovalue", 0.0, 1e-10), ("threshold", 0.01, 1e-10), ] @pytest.mark.parametrize("variable, expected, tolerance", test_nimbus_rain_accum_attrs) def test_io_import_opera_hdf5_nimbus_rain_accum_dataset_attrs( variable, expected, tolerance ): """Test OPERA HDF5 importer: rain accumulation composites from NIMBUS.""" smart_assert(metadata_nimbus_rain_accum[variable], expected, tolerance) ================================================ FILE: pysteps/tests/test_io_readers.py ================================================ from datetime import datetime import numpy as np import pytest import pysteps def test_read_timeseries_mch(): pytest.importorskip("PIL") date = datetime.strptime("201505151630", "%Y%m%d%H%M") data_source = pysteps.rcparams.data_sources["mch"] root_path = data_source["root_path"] path_fmt = data_source["path_fmt"] fn_pattern = data_source["fn_pattern"] fn_ext = data_source["fn_ext"] importer_name = data_source["importer"] importer_kwargs = data_source["importer_kwargs"] timestep = data_source["timestep"] fns = pysteps.io.archive.find_by_date( date, root_path, path_fmt, fn_pattern, fn_ext, timestep=timestep, num_prev_files=1, num_next_files=1, ) importer = pysteps.io.get_method(importer_name, "importer") precip, _, metadata = pysteps.io.read_timeseries(fns, importer, **importer_kwargs) assert isinstance(precip, np.ndarray) assert isinstance(metadata, dict) assert precip.shape[0] == 3 ================================================ FILE: pysteps/tests/test_io_saf_crri.py ================================================ # -*- coding: utf-8 -*- import os import pytest import pysteps from pysteps.tests.helpers import smart_assert pytest.importorskip("netCDF4") expected_proj = ( "+proj=geos +a=6378137.000000 +b=6356752.300000 " "+lon_0=0.000000 +h=35785863.000000" ) test_geodata_crri = [ ("projection", expected_proj, None), ("x1", -3301500.0, 0.1), ("x2", 3298500.0, 0.1), ("y1", 2512500.0, 0.1), ("y2", 5569500.0, 0.1), ("xpixelsize", 3000.0, 0.1), ("ypixelsize", 3000.0, 0.1), ("cartesian_unit", "m", None), ("yorigin", "upper", None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_geodata_crri) def test_io_import_saf_crri_geodata(variable, expected, tolerance): """Test the importer SAF CRRI.""" root_path = pysteps.rcparams.data_sources["saf"]["root_path"] rel_path = "20180601/CRR" filename = os.path.join( root_path, rel_path, "S_NWC_CRR_MSG4_Europe-VISIR_20180601T070000Z.nc" ) geodata = pysteps.io.importers._import_saf_crri_geodata(filename) smart_assert(geodata[variable], expected, tolerance) root_path = pysteps.rcparams.data_sources["saf"]["root_path"] rel_path = "20180601/CRR" filename = os.path.join( root_path, rel_path, "S_NWC_CRR_MSG4_Europe-VISIR_20180601T070000Z.nc" ) _, _, metadata = pysteps.io.import_saf_crri(filename) # list of (variable,expected,tolerance) tuples test_attrs = [ ("projection", expected_proj, None), ("institution", "Agencia Estatal de Meteorología (AEMET)", None), ("transform", None, None), ("zerovalue", 0.0, 0.1), ("unit", "mm/h", None), ("accutime", None, None), ] @pytest.mark.parametrize("variable, expected, tolerance", test_attrs) def test_io_import_saf_crri_attrs(variable, expected, tolerance): """Test the importer SAF CRRI.""" smart_assert(metadata[variable], expected, tolerance) test_extent_crri = [ (None, (-3301500.0, 3298500.0, 2512500.0, 5569500.0), (1019, 2200), None), ( (-1980000.0, 1977000.0, 2514000.0, 4818000.0), (-1978500.0, 1975500.0, 2515500.0, 4816500.0), (767, 1318), None, ), ] @pytest.mark.parametrize( "extent, expected_extent, expected_shape, tolerance", test_extent_crri ) def test_io_import_saf_crri_extent(extent, expected_extent, expected_shape, tolerance): """Test the importer SAF CRRI.""" root_path = pysteps.rcparams.data_sources["saf"]["root_path"] rel_path = "20180601/CRR" filename = os.path.join( root_path, rel_path, "S_NWC_CRR_MSG4_Europe-VISIR_20180601T070000Z.nc" ) precip, _, metadata = pysteps.io.import_saf_crri(filename, extent=extent) extent_out = (metadata["x1"], metadata["x2"], metadata["y1"], metadata["y2"]) smart_assert(extent_out, expected_extent, tolerance) smart_assert(precip.shape, expected_shape, tolerance) ================================================ FILE: pysteps/tests/test_motion.py ================================================ # coding: utf-8 """ Test the convergence of the optical flow methods available in pySTEPS using idealized motion fields. To test the convergence, using an example precipitation field we will: - Read precipitation field from a file - Morph the precipitation field using a given motion field (linear or rotor) to generate a sequence of moving precipitation patterns. - Using the available optical flow methods, retrieve the motion field from the precipitation time sequence (synthetic precipitation observations). This tests check that the retrieved motion fields are within reasonable values. Also, they will fail if any modification on the code decrease the quality of the retrieval. """ from contextlib import contextmanager import numpy as np import pytest from functools import partial from scipy.ndimage import uniform_filter import pysteps as stp from pysteps import motion from pysteps.motion.vet import morph from pysteps.tests.helpers import get_precipitation_fields @contextmanager def not_raises(_exception): try: yield except _exception: raise pytest.fail("DID RAISE {0}".format(_exception)) reference_field = get_precipitation_fields(num_prev_files=0) def _create_motion_field(input_precip, motion_type): """ Create idealized motion fields to be applied to the reference image. Parameters ---------- input_precip: numpy array (lat, lon) motion_type : str The supported motion fields are: - linear_x: (u=2, v=0) - linear_y: (u=0, v=2) Returns ------- ideal_motion : numpy array (u, v) """ # Create an imaginary grid on the image and create a motion field to be # applied to the image. ny, nx = input_precip.shape ideal_motion = np.zeros((2, nx, ny)) if motion_type == "linear_x": ideal_motion[0, :] = 2 # Motion along x elif motion_type == "linear_y": ideal_motion[1, :] = 2 # Motion along y else: raise ValueError("motion_type not supported.") # We need to swap the axes because the optical flow methods expect # (lat, lon) or (y,x) indexing convention. ideal_motion = ideal_motion.swapaxes(1, 2) return ideal_motion def _create_observations(input_precip, motion_type, num_times=9): """ Create synthetic precipitation observations by displacing the input field using an ideal motion field. Parameters ---------- input_precip: numpy array (lat, lon) Input precipitation field. motion_type : str The supported motion fields are: - linear_x: (u=2, v=0) - linear_y: (u=0, v=2) num_times: int, optional Length of the observations sequence. Returns ------- synthetic_observations : numpy array Sequence of observations """ ideal_motion = _create_motion_field(input_precip, motion_type) # The morph function expects (lon, lat) or (x, y) dimensions. # Hence, we need to swap the lat,lon axes. # NOTE: The motion field passed to the morph function can't have any NaNs. # Otherwise, it can produce a segmentation fault. morphed_field, mask = morph( input_precip.swapaxes(0, 1), ideal_motion.swapaxes(1, 2) ) mask = np.array(mask, dtype=bool) synthetic_observations = np.ma.MaskedArray(morphed_field, mask=mask) synthetic_observations = synthetic_observations[np.newaxis, :] for t in range(1, num_times): morphed_field, mask = morph( synthetic_observations[t - 1], ideal_motion.swapaxes(1, 2) ) mask = np.array(mask, dtype=bool) morphed_field = np.ma.MaskedArray( morphed_field[np.newaxis, :], mask=mask[np.newaxis, :] ) synthetic_observations = np.ma.concatenate( [synthetic_observations, morphed_field], axis=0 ) # Swap back to (lat, lon) synthetic_observations = synthetic_observations.swapaxes(1, 2) synthetic_observations = np.ma.masked_invalid(synthetic_observations) synthetic_observations.data[np.ma.getmaskarray(synthetic_observations)] = 0 return ideal_motion, synthetic_observations convergence_arg_names = ( "input_precip, optflow_method_name, motion_type, " "num_times, max_rel_rmse" ) convergence_arg_values = [ (reference_field, "lk", "linear_x", 2, 0.1), (reference_field, "lk", "linear_y", 2, 0.1), (reference_field, "lk", "linear_x", 3, 0.1), (reference_field, "lk", "linear_y", 3, 0.1), (reference_field, "vet", "linear_x", 2, 0.1), # (reference_field, 'vet', 'linear_x', 3, 9), # (reference_field, 'vet', 'linear_y', 2, 9), (reference_field, "vet", "linear_y", 3, 0.1), (reference_field, "proesmans", "linear_x", 2, 0.45), (reference_field, "proesmans", "linear_y", 2, 0.45), (reference_field, "darts", "linear_x", 9, 20), (reference_field, "darts", "linear_y", 9, 20), (reference_field, "farneback", "linear_x", 2, 28), (reference_field, "farneback", "linear_y", 2, 28), ] @pytest.mark.parametrize(convergence_arg_names, convergence_arg_values) def test_optflow_method_convergence( input_precip, optflow_method_name, motion_type, num_times, max_rel_rmse ): """ Test the convergence to the actual solution of the optical flow method used. We measure the error in the retrieved field by using the Relative RMSE = Rel_RMSE = sqrt(Relative MSE) - Rel_RMSE = 0%: no error - Rel_RMSE = 100%: The retrieved motion field has an average error equal in magnitude to the motion field. Relative RMSE is computed only un a region surrounding the precipitation field, were we have enough information to retrieve the motion field. The precipitation region includes the precipitation pattern plus a margin of approximately 20 grid points. Parameters ---------- input_precip: numpy array (lat, lon) Input precipitation field. optflow_method_name: str Optical flow method name motion_type : str The supported motion fields are: - linear_x: (u=2, v=0) - linear_y: (u=0, v=2) """ if optflow_method_name == "lk": pytest.importorskip("cv2") ideal_motion, precip_obs = _create_observations( input_precip.copy(), motion_type, num_times=num_times ) oflow_method = motion.get_method(optflow_method_name) if optflow_method_name == "vet": # By default, the maximum number of iteration in the VET minimization # is maxiter=100. # To increase the stability of the tests to we increase this value to # maxiter=150. retrieved_motion = oflow_method( precip_obs, verbose=False, options=dict(maxiter=150) ) elif optflow_method_name == "proesmans": retrieved_motion = oflow_method(precip_obs) else: retrieved_motion = oflow_method(precip_obs, verbose=False) precip_data, _ = stp.utils.dB_transform(precip_obs.max(axis=0), inverse=True) precip_data.data[precip_data.mask] = 0 precip_mask = (uniform_filter(precip_data, size=20) > 0.1) & ~precip_obs.mask.any( axis=0 ) # To evaluate the accuracy of the computed_motion vectors, we will use # a relative RMSE measure. # Relative MSE = < (expected_motion - computed_motion)^2 > / # Relative RMSE = sqrt(Relative MSE) mse = ((ideal_motion - retrieved_motion)[:, precip_mask] ** 2).mean() rel_mse = mse / (ideal_motion[:, precip_mask] ** 2).mean() rel_rmse = np.sqrt(rel_mse) * 100 print( f"method:{optflow_method_name} ; " f"motion:{motion_type} ; times: {num_times} ; " f"rel_rmse:{rel_rmse:.2f}%" ) assert rel_rmse < max_rel_rmse no_precip_args_names = "optflow_method_name, num_times" no_precip_args_values = [ ("lk", 2), ("lk", 3), ("vet", 2), ("vet", 3), ("darts", 9), ("proesmans", 2), ("farneback", 2), ] @pytest.mark.parametrize(no_precip_args_names, no_precip_args_values) def test_no_precipitation(optflow_method_name, num_times): """ Test that the motion methods work well with a zero precipitation in the domain. The expected result is a zero motion vector. Parameters ---------- optflow_method_name: str Optical flow method name num_times : int Number of precipitation frames (times) used as input for the optical flow methods. """ if optflow_method_name == "lk": pytest.importorskip("cv2") zero_precip = np.zeros((num_times,) + reference_field.shape) motion_method = motion.get_method(optflow_method_name) uv_motion = motion_method(zero_precip, verbose=False) assert np.abs(uv_motion).max() < 0.01 input_tests_args_names = ( "optflow_method_name", "minimum_input_frames", "maximum_input_frames", ) input_tests_args_values = [ ("lk", 2, np.inf), ("vet", 2, 3), ("darts", 9, 9), ("proesmans", 2, 2), ("farneback", 2, np.inf), ] @pytest.mark.parametrize(input_tests_args_names, input_tests_args_values) def test_input_shape_checks( optflow_method_name, minimum_input_frames, maximum_input_frames ): if optflow_method_name in ("lk", "farneback"): pytest.importorskip("cv2") image_size = 100 motion_method = motion.get_method(optflow_method_name) if maximum_input_frames == np.inf: maximum_input_frames = minimum_input_frames + 10 with not_raises(Exception): for frames in range(minimum_input_frames, maximum_input_frames + 1): motion_method(np.zeros((frames, image_size, image_size)), verbose=False) with pytest.raises(ValueError): motion_method(np.zeros((2,))) motion_method(np.zeros((2, 2))) for frames in range(minimum_input_frames): motion_method(np.zeros((frames, image_size, image_size)), verbose=False) for frames in range(maximum_input_frames + 1, maximum_input_frames + 4): motion_method(np.zeros((frames, image_size, image_size)), verbose=False) def test_vet_padding(): """ Test that the padding functionality in vet works correctly with ndarrays and masked arrays. """ _, precip_obs = _create_observations( reference_field.copy(), "linear_y", num_times=2 ) # Use a small region to speed up the test precip_obs = precip_obs[:, 200:427, 250:456] # precip_obs.shape == (227 , 206) # 227 is a prime number ; 206 = 2*103 # Using this shape will force vet to internally pad the input array for the sector's # blocks to divide exactly the input shape. # NOTE: This "internal padding" is different from the padding keyword being test next. for padding in [0, 3, 10]: # No padding vet_method = partial( motion.get_method("vet"), verbose=False, sectors=((16, 4, 2), (16, 4, 2)), options=dict(maxiter=5), padding=padding, # We use only a few iterations since # we don't care about convergence in this test ) assert precip_obs.shape == vet_method(precip_obs).shape assert precip_obs.shape == vet_method(np.ma.masked_invalid(precip_obs)).shape def test_vet_cost_function(): """ Test that the vet cost_function computation gives always the same result with the same input. Useful to test if the parallelization in VET produce undesired results. """ from pysteps.motion import vet ideal_motion, precip_obs = _create_observations( reference_field.copy(), "linear_y", num_times=2 ) mask_2d = np.ma.getmaskarray(precip_obs).any(axis=0).astype("int8") returned_values = np.zeros(20) for i in range(20): returned_values[i] = vet.vet_cost_function( ideal_motion.ravel(), # sector_displacement_1d precip_obs.data, # input_images ideal_motion.shape[1:], # blocks_shape (same as 2D grid) mask_2d, # Mask 1e6, # smooth_gain debug=False, ) tolerance = 1e-12 errors = np.abs(returned_values - returned_values[0]) # errors should contain all zeros assert (errors < tolerance).any() assert (returned_values[0] - 1548250.87627097) < 0.001 @pytest.mark.parametrize( "method,kwargs", [ ("LK", {"fd_kwargs": {"buffer_mask": 20}, "verbose": False}), ("farneback", {"verbose": False}), ], ) def test_motion_masked_array(method, kwargs): """ Passing a ndarray with NaNs or a masked array should produce the same results. Tests for both LK and Farneback motion estimation methods. """ pytest.importorskip("cv2") __, precip_obs = _create_observations( reference_field.copy(), "linear_y", num_times=2 ) motion_method = motion.get_method(method) # ndarray with nans np.ma.set_fill_value(precip_obs, -15) ndarray = precip_obs.filled() ndarray[ndarray == -15] = np.nan uv_ndarray = motion_method(ndarray, **kwargs) # masked array mdarray = np.ma.masked_invalid(ndarray) mdarray.data[mdarray.mask] = -15 uv_mdarray = motion_method(mdarray, **kwargs) assert np.abs(uv_mdarray - uv_ndarray).max() < 0.01 ================================================ FILE: pysteps/tests/test_motion_farneback.py ================================================ import pytest import numpy as np from pysteps.motion import farneback from pysteps.exceptions import MissingOptionalDependency from pysteps.tests.helpers import get_precipitation_fields fb_arg_names = ( "pyr_scale", "levels", "winsize", "iterations", "poly_n", "poly_sigma", "flags", "size_opening", "sigma", "verbose", ) fb_arg_values = [ (0.5, 3, 15, 3, 5, 1.1, 0, 3, 60.0, False), # default (0.5, 1, 5, 1, 7, 1.5, 0, 0, 0.0, True), # minimal settings, sigma=0, verbose ( 0.3, 5, 30, 10, 7, 1.5, 1, 5, 10.0, False, ), # maximal settings, flags=1, big opening (0.5, 3, 15, 3, 5, 1.1, 0, 0, 60.0, True), # no opening, verbose ] @pytest.mark.parametrize(fb_arg_names, fb_arg_values) def test_farneback_params( pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags, size_opening, sigma, verbose, ): """Test Farneback with various parameters and input types.""" pytest.importorskip("cv2") # Input: realistic precipitation fields precip, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) precip = precip.filled() output = farneback.farneback( precip, pyr_scale=pyr_scale, levels=levels, winsize=winsize, iterations=iterations, poly_n=poly_n, poly_sigma=poly_sigma, flags=flags, size_opening=size_opening, sigma=sigma, verbose=verbose, ) assert isinstance(output, np.ndarray) assert output.shape[0] == 2 assert output.shape[1:] == precip[0].shape assert np.isfinite(output).all() or np.isnan(output).any() def test_farneback_invalid_shape(): """Test error when input is wrong shape.""" pytest.importorskip("cv2") arr = np.random.rand(64, 64) with pytest.raises(ValueError): farneback.farneback(arr) def test_farneback_nan_input(): """Test NaN handling in input.""" pytest.importorskip("cv2") arr = np.random.rand(2, 64, 64) arr[0, 0, 0] = np.nan arr[1, 10, 10] = np.inf result = farneback.farneback(arr) assert result.shape == (2, 64, 64) def test_farneback_cv2_missing(monkeypatch): """Test MissingOptionalDependency when cv2 is not injected.""" monkeypatch.setattr(farneback, "CV2_IMPORTED", False) arr = np.random.rand(2, 64, 64) with pytest.raises(MissingOptionalDependency): farneback.farneback(arr) monkeypatch.setattr(farneback, "CV2_IMPORTED", True) # restore def test_farneback_sigma_zero(): """Test sigma=0 disables smoothing logic.""" pytest.importorskip("cv2") arr = np.random.rand(2, 32, 32) result = farneback.farneback(arr, sigma=0.0) assert isinstance(result, np.ndarray) assert result.shape == (2, 32, 32) def test_farneback_small_window(): """Test winsize edge case behavior.""" pytest.importorskip("cv2") arr = np.random.rand(2, 16, 16) result = farneback.farneback(arr, winsize=3) assert result.shape == (2, 16, 16) def test_farneback_verbose(capsys): """Test that verbose produces output (side-effect only).""" pytest.importorskip("cv2") arr = np.random.rand(2, 16, 16) farneback.farneback(arr, verbose=True) out = capsys.readouterr().out assert "Farneback method" in out or "mult factor" in out or "---" in out ================================================ FILE: pysteps/tests/test_motion_lk.py ================================================ # coding: utf-8 """ """ import pytest import numpy as np from pysteps import motion, verification from pysteps.tests.helpers import get_precipitation_fields lk_arg_names = ( "lk_kwargs", "fd_method", "dense", "nr_std_outlier", "k_outlier", "size_opening", "decl_scale", "verbose", ) lk_arg_values = [ ({}, "shitomasi", True, 3, 30, 3, 20, False), # defaults ({}, "shitomasi", False, 3, 30, 3, 20, True), # sparse ouput, verbose ({}, "shitomasi", False, 0, 30, 3, 20, False), # sparse ouput, all outliers ( {}, "shitomasi", True, 3, None, 0, 0, False, ), # global outlier detection, no filtering, no declutering ({}, "shitomasi", True, 0, 30, 3, 20, False), # all outliers ({}, "blob", True, 3, 30, 3, 20, False), # blob detection ({}, "tstorm", True, 3, 30, 3, 20, False), # tstorm detection ] @pytest.mark.parametrize(lk_arg_names, lk_arg_values) def test_lk( lk_kwargs, fd_method, dense, nr_std_outlier, k_outlier, size_opening, decl_scale, verbose, ): """Tests Lucas-Kanade optical flow.""" pytest.importorskip("cv2") if fd_method == "blob": pytest.importorskip("skimage") if fd_method == "tstorm": pytest.importorskip("skimage") pytest.importorskip("pandas") # inputs precip, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) precip = precip.filled() # Retrieve motion field oflow_method = motion.get_method("LK") output = oflow_method( precip, lk_kwargs=lk_kwargs, fd_method=fd_method, dense=dense, nr_std_outlier=nr_std_outlier, k_outlier=k_outlier, size_opening=size_opening, decl_scale=decl_scale, verbose=verbose, ) # Check format of ouput if dense: assert isinstance(output, np.ndarray) assert output.ndim == 3 assert output.shape[0] == 2 assert output.shape[1:] == precip[0].shape if nr_std_outlier == 0: assert output.sum() == 0 else: assert isinstance(output, tuple) assert len(output) == 2 assert isinstance(output[0], np.ndarray) assert isinstance(output[1], np.ndarray) assert output[0].ndim == 2 assert output[1].ndim == 2 assert output[0].shape[1] == 2 assert output[1].shape[1] == 2 assert output[0].shape[0] == output[1].shape[0] if nr_std_outlier == 0: assert output[0].shape[0] == 0 assert output[1].shape[0] == 0 ================================================ FILE: pysteps/tests/test_noise_fftgenerators.py ================================================ import numpy as np from pysteps.noise import fftgenerators from pysteps.tests.helpers import get_precipitation_fields PRECIP = get_precipitation_fields( num_prev_files=0, num_next_files=0, return_raw=False, metadata=False, upscale=2000, ) PRECIP = PRECIP.filled() def test_noise_param_2d_fft_filter(): fft_filter = fftgenerators.initialize_param_2d_fft_filter(PRECIP) assert isinstance(fft_filter, dict) assert all([key in fft_filter for key in ["field", "input_shape", "model", "pars"]]) out = fftgenerators.generate_noise_2d_fft_filter(fft_filter) assert isinstance(out, np.ndarray) assert out.shape == PRECIP.shape def test_noise_nonparam_2d_fft_filter(): fft_filter = fftgenerators.initialize_nonparam_2d_fft_filter(PRECIP) assert isinstance(fft_filter, dict) assert all([key in fft_filter for key in ["field", "input_shape"]]) out = fftgenerators.generate_noise_2d_fft_filter(fft_filter) assert isinstance(out, np.ndarray) assert out.shape == PRECIP.shape def test_noise_nonparam_2d_ssft_filter(): fft_filter = fftgenerators.initialize_nonparam_2d_ssft_filter(PRECIP) assert isinstance(fft_filter, dict) assert all([key in fft_filter for key in ["field", "input_shape"]]) out = fftgenerators.generate_noise_2d_ssft_filter(fft_filter) assert isinstance(out, np.ndarray) assert out.shape == PRECIP.shape def test_noise_nonparam_2d_nested_filter(): fft_filter = fftgenerators.initialize_nonparam_2d_nested_filter(PRECIP) assert isinstance(fft_filter, dict) assert all([key in fft_filter for key in ["field", "input_shape"]]) out = fftgenerators.generate_noise_2d_ssft_filter(fft_filter) assert isinstance(out, np.ndarray) assert out.shape == PRECIP.shape ================================================ FILE: pysteps/tests/test_noise_motion.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.noise.motion import generate_bps from pysteps.noise.motion import get_default_params_bps_par from pysteps.noise.motion import get_default_params_bps_perp from pysteps.noise.motion import initialize_bps def test_noise_motion_get_default_params_bps_par(): """Tests default BPS velocity parameters.""" expected = (10.88, 0.23, -7.68) result = get_default_params_bps_par() assert_array_almost_equal(result, expected) def test_noise_motion_get_default_params_bps_perp(): """Tests default BPS velocity perturbation.""" expected = (5.76, 0.31, -2.72) result = get_default_params_bps_perp() assert_array_almost_equal(result, expected) vv = np.ones((8, 8)) * np.sqrt(2) * 0.5 test_init_bps_vars = [ ("vsf", 60), ("eps_par", -0.2042896366299448), ("eps_perp", 1.6383482042624593), ("p_par", (10.88, 0.23, -7.68)), ("p_perp", (5.76, 0.31, -2.72)), ("V_par", np.stack([vv, vv])), ("V_perp", np.stack([-vv, vv])), ] @pytest.mark.parametrize("variable, expected", test_init_bps_vars) def test_initialize_bps(variable, expected): """Tests initialation BPS velocity perturbation method.""" seed = 42 timestep = 1 pixelsperkm = 1 v = np.ones((8, 8)) velocity = np.stack([v, v]) perturbator = initialize_bps(velocity, pixelsperkm, timestep, seed=seed) assert_array_almost_equal(perturbator[variable], expected) def test_generate_bps(): """Tests generation BPS velocity perturbation method.""" seed = 42 timestep = 1 pixelsperkm = 1 v = np.ones((8, 8)) velocity = np.stack([v, v]) perturbator = initialize_bps(velocity, pixelsperkm, timestep, seed=seed) new_vv = generate_bps(perturbator, timestep) expected = np.stack([v * -0.066401, v * 0.050992]) assert_array_almost_equal(new_vv, expected) ================================================ FILE: pysteps/tests/test_nowcasts_anvil.py ================================================ import numpy as np import pytest from pysteps import motion, nowcasts, verification from pysteps.tests.helpers import get_precipitation_fields anvil_arg_names = ( "n_cascade_levels", "ar_order", "ar_window_radius", "timesteps", "min_csi", "apply_rainrate_mask", "measure_time", ) anvil_arg_values = [ (8, 1, 50, 3, 0.6, True, False), (8, 1, 50, [3], 0.6, False, True), ] def test_default_anvil_norain(): """Tests anvil nowcast with default params and all-zero inputs.""" # Define dummy nowcast input data precip_input = np.zeros((4, 100, 100)) pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("anvil") precip_forecast = nowcast_method( precip_input, retrieved_motion, timesteps=3, ) assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == 3 assert precip_forecast.sum() == 0.0 @pytest.mark.parametrize(anvil_arg_names, anvil_arg_values) def test_anvil_rainrate( n_cascade_levels, ar_order, ar_window_radius, timesteps, min_csi, apply_rainrate_mask, measure_time, ): """Tests ANVIL nowcast using rain rate precipitation fields.""" # inputs precip_input = get_precipitation_fields( num_prev_files=4, num_next_files=0, return_raw=False, metadata=False, upscale=2000, ) precip_input = precip_input.filled() precip_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 )[1:, :, :] precip_obs = precip_obs.filled() pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("anvil") output = nowcast_method( precip_input[-(ar_order + 2) :], retrieved_motion, timesteps=timesteps, rainrate=None, # no R(VIL) conversion is done n_cascade_levels=n_cascade_levels, ar_order=ar_order, ar_window_radius=ar_window_radius, apply_rainrate_mask=apply_rainrate_mask, measure_time=measure_time, ) if measure_time: precip_forecast, __, __ = output else: precip_forecast = output assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == ( timesteps if isinstance(timesteps, int) else len(timesteps) ) result = verification.det_cat_fct( precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI" )["CSI"] assert result > min_csi, f"CSI={result:.2f}, required > {min_csi:.2f}" if __name__ == "__main__": for n in range(len(anvil_arg_values)): test_args = zip(anvil_arg_names, anvil_arg_values[n]) test_anvil_rainrate(**dict((x, y) for x, y in test_args)) ================================================ FILE: pysteps/tests/test_nowcasts_lagrangian_probability.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps.nowcasts.lagrangian_probability import forecast from pysteps.tests.helpers import get_precipitation_fields from pysteps.motion.lucaskanade import dense_lucaskanade def test_numerical_example(): """""" precip = np.zeros((20, 20)) precip[5:10, 5:10] = 1 velocity = np.zeros((2, *precip.shape)) timesteps = 4 thr = 0.5 slope = 1 # pixels / timestep # compute probability forecast fct = forecast(precip, velocity, timesteps, thr, slope=slope) assert fct.ndim == 3 assert fct.shape[0] == timesteps assert fct.shape[1:] == precip.shape assert fct.max() <= 1.0 assert fct.min() >= 0.0 # slope = 0 should return a binary field fct = forecast(precip, velocity, timesteps, thr, slope=0) ref = (np.repeat(precip[None, ...], timesteps, axis=0) >= thr).astype(float) assert np.allclose(fct, fct.astype(bool)) assert np.allclose(fct, ref) def test_numerical_example_with_float_slope_and_float_list_timesteps(): """""" precip = np.zeros((20, 20)) precip[5:10, 5:10] = 1 velocity = np.zeros((2, *precip.shape)) timesteps = [1.0, 2.0, 5.0, 12.0] thr = 0.5 slope = 1.0 # pixels / timestep # compute probability forecast fct = forecast(precip, velocity, timesteps, thr, slope=slope) assert fct.ndim == 3 assert fct.shape[0] == len(timesteps) assert fct.shape[1:] == precip.shape assert fct.max() <= 1.0 assert fct.min() >= 0.0 def test_real_case(): """""" pytest.importorskip("cv2") # inputs precip, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) # motion motion = dense_lucaskanade(precip) # parameters timesteps = [1, 2, 3] thr = 1 # mm / h slope = 1 * metadata["accutime"] # min-1 # compute probability forecast extrap_kwargs = dict(allow_nonfinite_values=True) fct = forecast( precip[-1], motion, timesteps, thr, slope=slope, extrap_kwargs=extrap_kwargs ) assert fct.ndim == 3 assert fct.shape[0] == len(timesteps) assert fct.shape[1:] == precip.shape[1:] assert np.nanmax(fct) <= 1.0 assert np.nanmin(fct) >= 0.0 def test_wrong_inputs(): # dummy inputs precip = np.zeros((3, 3)) velocity = np.zeros((2, *precip.shape)) # timesteps must be > 0 with pytest.raises(ValueError): forecast(precip, velocity, 0, 1) # timesteps must be a sorted list with pytest.raises(ValueError): forecast(precip, velocity, [2, 1], 1) ================================================ FILE: pysteps/tests/test_nowcasts_linda.py ================================================ from datetime import timedelta import os import numpy as np import pytest from pysteps import io, motion, nowcasts, verification from pysteps.nowcasts.linda import forecast from pysteps.tests.helpers import get_precipitation_fields linda_arg_names = ( "timesteps", "add_perturbations", "kernel_type", "vel_pert_method", "num_workers", "measure_time", "min_csi", "max_crps", ) linda_arg_values = [ (3, False, "anisotropic", None, 1, False, 0.5, None), ([3], False, "anisotropic", None, 1, False, 0.5, None), (3, False, "isotropic", None, 5, True, 0.5, None), (3, True, "anisotropic", None, 1, True, None, 0.3), (3, True, "isotropic", "bps", 5, True, None, 0.3), ] def test_default_linda_norain(): """Tests linda nowcast with default params and all-zero inputs.""" # Define dummy nowcast input data precip_input = np.zeros((3, 100, 100)) pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("linda") precip_forecast = nowcast_method( precip_input, retrieved_motion, n_ens_members=3, timesteps=3, kmperpixel=1, timestep=5, ) assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == 3 assert precip_forecast.shape[1] == 3 assert precip_forecast.sum() == 0.0 @pytest.mark.parametrize(linda_arg_names, linda_arg_values) def test_linda( timesteps, add_perturbations, kernel_type, vel_pert_method, num_workers, measure_time, min_csi, max_crps, ): """Tests LINDA nowcast.""" pytest.importorskip("cv2") pytest.importorskip("skimage") # inputs precip_input, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, metadata=True, clip=(354000, 866000, -96000, 416000), upscale=4000, log_transform=False, ) precip_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, clip=(354000, 866000, -96000, 416000), upscale=4000, log_transform=False, )[1:, :, :] oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) precip_forecast = forecast( precip_input, retrieved_motion, timesteps, kernel_type=kernel_type, vel_pert_method=vel_pert_method, feature_kwargs={"threshold": 1.5, "min_sigma": 2, "max_sigma": 10}, add_perturbations=add_perturbations, kmperpixel=4.0, timestep=metadata["accutime"], measure_time=measure_time, n_ens_members=5, num_workers=num_workers, seed=42, ) num_nowcast_timesteps = timesteps if isinstance(timesteps, int) else len(timesteps) if measure_time: assert len(precip_forecast) == num_nowcast_timesteps assert isinstance(precip_forecast[1], float) precip_forecast = precip_forecast[0] if not add_perturbations: assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == num_nowcast_timesteps assert precip_forecast.shape[1:] == precip_input.shape[1:] csi = verification.det_cat_fct( precip_forecast[-1], precip_obs[-1], thr=1.0, scores="CSI" )["CSI"] assert csi > min_csi, f"CSI={csi:.1f}, required > {min_csi:.1f}" else: assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == 5 assert precip_forecast.shape[1] == num_nowcast_timesteps assert precip_forecast.shape[2:] == precip_input.shape[1:] crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" def test_linda_wrong_inputs(): # dummy inputs precip = np.zeros((3, 3, 3)) velocity = np.zeros((2, 3, 3)) # vel_pert_method is set but kmperpixel is None with pytest.raises(ValueError): forecast(precip, velocity, 1, vel_pert_method="bps", kmperpixel=None) # vel_pert_method is set but timestep is None with pytest.raises(ValueError): forecast( precip, velocity, 1, vel_pert_method="bps", kmperpixel=1, timestep=None ) # ari_order 1 or 2 required with pytest.raises(ValueError): forecast(precip, velocity, 1, ari_order=3) # precip_fields must be a three-dimensional array with pytest.raises(ValueError): forecast(np.zeros((3, 3, 3, 3)), velocity, 1) # precip_fields.shape[0] < ari_order+2 with pytest.raises(ValueError): forecast(np.zeros((2, 3, 3)), velocity, 1, ari_order=1) # advection_field must be a three-dimensional array with pytest.raises(ValueError): forecast(precip, velocity[0], 1) # dimension mismatch between precip_fields and advection_field with pytest.raises(ValueError): forecast(np.zeros((3, 2, 3)), velocity, 1) def test_linda_callback(tmp_path): """Test LINDA callback functionality to export the output as a netcdf.""" pytest.importorskip("skimage") n_ens_members = 2 n_timesteps = 3 precip_input, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) precip_input = precip_input.filled() field_shape = (precip_input.shape[1], precip_input.shape[2]) startdate = metadata["timestamps"][-1] timestep = metadata["accutime"] motion_field = np.zeros((2, *field_shape)) exporter = io.initialize_forecast_exporter_netcdf( outpath=tmp_path.as_posix(), outfnprefix="test_linda", startdate=startdate, timestep=timestep, n_timesteps=n_timesteps, shape=field_shape, n_ens_members=n_ens_members, metadata=metadata, incremental="timestep", ) def callback(array): return io.export_forecast_dataset(array, exporter) precip_output = nowcasts.get_method("linda")( precip_input, motion_field, timesteps=n_timesteps, add_perturbations=False, n_ens_members=n_ens_members, kmperpixel=4.0, timestep=metadata["accutime"], callback=callback, return_output=True, ) io.close_forecast_files(exporter) # assert that netcdf exists and its size is not zero tmp_file = os.path.join(tmp_path, "test_linda.nc") assert os.path.exists(tmp_file) and os.path.getsize(tmp_file) > 0 # assert that the file can be read by the nowcast importer precip_netcdf, metadata_netcdf = io.import_netcdf_pysteps(tmp_file, dtype="float64") # assert that the dimensionality of the array is as expected assert precip_netcdf.ndim == 4, "Wrong number of dimensions" assert precip_netcdf.shape[0] == n_ens_members, "Wrong ensemble size" assert precip_netcdf.shape[1] == n_timesteps, "Wrong number of lead times" assert precip_netcdf.shape[2:] == field_shape, "Wrong field shape" # assert that the saved output is the same as the original output assert np.allclose( precip_netcdf, precip_output, equal_nan=True ), "Wrong output values" # assert that leadtimes and timestamps are as expected td = timedelta(minutes=timestep) leadtimes = [(i + 1) * timestep for i in range(n_timesteps)] timestamps = [startdate + (i + 1) * td for i in range(n_timesteps)] assert (metadata_netcdf["leadtimes"] == leadtimes).all(), "Wrong leadtimes" assert (metadata_netcdf["timestamps"] == timestamps).all(), "Wrong timestamps" ================================================ FILE: pysteps/tests/test_nowcasts_sprog.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps import motion, nowcasts, verification from pysteps.tests.helpers import get_precipitation_fields sprog_arg_names = ( "n_cascade_levels", "ar_order", "probmatching_method", "domain", "timesteps", "min_csi", ) sprog_arg_values = [ (6, 1, None, "spatial", 3, 0.5), (6, 1, None, "spatial", [3], 0.5), (6, 2, None, "spatial", 3, 0.5), (6, 2, "cdf", "spatial", 3, 0.5), (6, 2, "mean", "spatial", 3, 0.5), (6, 2, "cdf", "spectral", 3, 0.5), ] def test_default_sprog_norain(): """Tests SPROG nowcast with default params and all-zero inputs.""" # Define dummy nowcast input data precip_input = np.zeros((3, 100, 100)) pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("sprog") precip_forecast = nowcast_method( precip_input, retrieved_motion, timesteps=3, precip_thr=0.1, ) assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == 3 assert precip_forecast.sum() == 0.0 @pytest.mark.parametrize(sprog_arg_names, sprog_arg_values) def test_sprog( n_cascade_levels, ar_order, probmatching_method, domain, timesteps, min_csi ): """Tests SPROG nowcast.""" # inputs precip_input, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) precip_input = precip_input.filled() precip_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 )[1:, :, :] precip_obs = precip_obs.filled() pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("sprog") precip_forecast = nowcast_method( precip_input, retrieved_motion, timesteps=timesteps, precip_thr=metadata["threshold"], n_cascade_levels=n_cascade_levels, ar_order=ar_order, probmatching_method=probmatching_method, domain=domain, ) assert precip_forecast.ndim == 3 assert precip_forecast.shape[0] == ( timesteps if isinstance(timesteps, int) else len(timesteps) ) result = verification.det_cat_fct( precip_forecast[-1], precip_obs[-1], thr=0.1, scores="CSI" )["CSI"] assert result > min_csi, f"CSI={result:.1f}, required > {min_csi:.1f}" if __name__ == "__main__": for n in range(len(sprog_arg_values)): test_args = zip(sprog_arg_names, sprog_arg_values[n]) test_sprog(**dict((x, y) for x, y in test_args)) ================================================ FILE: pysteps/tests/test_nowcasts_sseps.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps import motion, nowcasts, verification from pysteps.tests.helpers import get_precipitation_fields sseps_arg_names = ( "n_ens_members", "n_cascade_levels", "ar_order", "mask_method", "probmatching_method", "win_size", "timesteps", "max_crps", ) sseps_arg_values = [ (5, 6, 2, "incremental", "cdf", 200, 3, 0.60), (5, 6, 2, "incremental", "cdf", 200, [3], 0.60), ] def test_default_sseps_norain(): """Tests SSEPS nowcast with default params and all-zero inputs.""" # Define dummy nowcast input data precip_input = np.zeros((3, 100, 100)) metadata = { "accutime": 5, "xpixelsize": 1000, "threshold": 0.1, "zerovalue": 0, } pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("sseps") precip_forecast = nowcast_method( precip_input, metadata, retrieved_motion, n_ens_members=3, timesteps=3, ) assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == 3 assert precip_forecast.shape[1] == 3 assert precip_forecast.sum() == 0.0 @pytest.mark.parametrize(sseps_arg_names, sseps_arg_values) def test_sseps( n_ens_members, n_cascade_levels, ar_order, mask_method, probmatching_method, win_size, timesteps, max_crps, ): """Tests SSEPS nowcast.""" # inputs precip_input, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) precip_input = precip_input.filled() precip_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 )[1:, :, :] precip_obs = precip_obs.filled() pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("sseps") precip_forecast = nowcast_method( precip_input, metadata, retrieved_motion, win_size=win_size, timesteps=timesteps, n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, ar_order=ar_order, seed=42, mask_method=mask_method, probmatching_method=probmatching_method, ) assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == n_ens_members assert precip_forecast.shape[1] == ( timesteps if isinstance(timesteps, int) else len(timesteps) ) crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" if __name__ == "__main__": for n in range(len(sseps_arg_values)): test_args = zip(sseps_arg_names, sseps_arg_values[n]) test_sseps(**dict((x, y) for x, y in test_args)) ================================================ FILE: pysteps/tests/test_nowcasts_steps.py ================================================ import os from datetime import timedelta import numpy as np import pytest from pysteps import io, motion, nowcasts, verification from pysteps.tests.helpers import get_precipitation_fields steps_arg_names = ( "n_ens_members", "n_cascade_levels", "ar_order", "mask_method", "probmatching_method", "domain", "timesteps", "max_crps", ) steps_arg_values = [ (5, 6, 2, None, None, "spatial", 3, 1.30), (5, 6, 2, None, None, "spatial", [3], 1.30), (5, 6, 2, "incremental", None, "spatial", 3, 7.32), (5, 6, 2, "sprog", None, "spatial", 3, 8.4), (5, 6, 2, "obs", None, "spatial", 3, 8.37), (5, 6, 2, None, "cdf", "spatial", 3, 0.60), (5, 6, 2, None, "mean", "spatial", 3, 1.35), (5, 6, 2, "incremental", "cdf", "spectral", 3, 0.60), ] def test_default_steps_norain(): """Tests STEPS nowcast with default params and all-zero inputs.""" # Define dummy nowcast input data precip_input = np.zeros((3, 100, 100)) pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("steps") precip_forecast = nowcast_method( precip_input, retrieved_motion, n_ens_members=3, timesteps=3, precip_thr=0.1, kmperpixel=1, timestep=5, ) assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == 3 assert precip_forecast.shape[1] == 3 assert precip_forecast.sum() == 0.0 @pytest.mark.parametrize(steps_arg_names, steps_arg_values) def test_steps_skill( n_ens_members, n_cascade_levels, ar_order, mask_method, probmatching_method, domain, timesteps, max_crps, ): """Tests STEPS nowcast skill.""" # inputs precip_input, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) precip_input = precip_input.filled() precip_obs = get_precipitation_fields( num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000 )[1:, :, :] precip_obs = precip_obs.filled() pytest.importorskip("cv2") oflow_method = motion.get_method("LK") retrieved_motion = oflow_method(precip_input) nowcast_method = nowcasts.get_method("steps") precip_forecast = nowcast_method( precip_input, retrieved_motion, timesteps=timesteps, precip_thr=metadata["threshold"], kmperpixel=2.0, timestep=metadata["accutime"], seed=42, n_ens_members=n_ens_members, n_cascade_levels=n_cascade_levels, ar_order=ar_order, mask_method=mask_method, probmatching_method=probmatching_method, domain=domain, ) assert precip_forecast.ndim == 4 assert precip_forecast.shape[0] == n_ens_members assert precip_forecast.shape[1] == ( timesteps if isinstance(timesteps, int) else len(timesteps) ) crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1]) assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}" def test_steps_callback(tmp_path): """Test STEPS callback functionality to export the output as a netcdf.""" pytest.importorskip("netCDF4") n_ens_members = 2 n_timesteps = 3 precip_input, metadata = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=True, upscale=2000, ) precip_input = precip_input.filled() field_shape = (precip_input.shape[1], precip_input.shape[2]) startdate = metadata["timestamps"][-1] timestep = metadata["accutime"] motion_field = np.zeros((2, *field_shape)) exporter = io.initialize_forecast_exporter_netcdf( outpath=tmp_path.as_posix(), outfnprefix="test_steps", startdate=startdate, timestep=timestep, n_timesteps=n_timesteps, shape=field_shape, n_ens_members=n_ens_members, metadata=metadata, incremental="timestep", ) def callback(array): return io.export_forecast_dataset(array, exporter) precip_output = nowcasts.get_method("steps")( precip_input, motion_field, timesteps=n_timesteps, precip_thr=metadata["threshold"], kmperpixel=2.0, timestep=timestep, seed=42, n_ens_members=n_ens_members, vel_pert_method=None, callback=callback, return_output=True, ) io.close_forecast_files(exporter) # assert that netcdf exists and its size is not zero tmp_file = os.path.join(tmp_path, "test_steps.nc") assert os.path.exists(tmp_file) and os.path.getsize(tmp_file) > 0 # assert that the file can be read by the nowcast importer precip_netcdf, metadata_netcdf = io.import_netcdf_pysteps(tmp_file, dtype="float64") # assert that the dimensionality of the array is as expected assert precip_netcdf.ndim == 4, "Wrong number of dimensions" assert precip_netcdf.shape[0] == n_ens_members, "Wrong ensemble size" assert precip_netcdf.shape[1] == n_timesteps, "Wrong number of lead times" assert precip_netcdf.shape[2:] == field_shape, "Wrong field shape" # assert that the saved output is the same as the original output assert np.allclose( precip_netcdf, precip_output, equal_nan=True ), "Wrong output values" # assert that leadtimes and timestamps are as expected td = timedelta(minutes=timestep) leadtimes = [(i + 1) * timestep for i in range(n_timesteps)] timestamps = [startdate + (i + 1) * td for i in range(n_timesteps)] assert (metadata_netcdf["leadtimes"] == leadtimes).all(), "Wrong leadtimes" assert (metadata_netcdf["timestamps"] == timestamps).all(), "Wrong timestamps" ================================================ FILE: pysteps/tests/test_nowcasts_utils.py ================================================ import numpy as np import pytest from pysteps import motion from pysteps.nowcasts import utils as nowcast_utils from pysteps.tests.helpers import get_precipitation_fields main_loop_arg_names = ( "timesteps", "ensemble", "num_ensemble_members", "velocity_perturbations", ) # TODO: add tests for callback and other untested options main_loop_arg_values = [ (6, False, 0, False), ([0.5, 1.5], False, 0, False), (6, True, 2, False), (6, True, 2, True), ] @pytest.mark.parametrize(main_loop_arg_names, main_loop_arg_values) def test_nowcast_main_loop( timesteps, ensemble, num_ensemble_members, velocity_perturbations ): """Test the nowcast_main_loop function.""" precip = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=False, metadata=False, upscale=2000, ) precip = precip.filled() oflow_method = motion.get_method("LK") velocity = oflow_method(precip) precip = precip[-1] state = {"input": precip} extrap_method = "semilagrangian" def func(state, params): if not ensemble: precip_out = state["input"] else: precip_out = state["input"][np.newaxis, :] return precip_out, state nowcast_utils.nowcast_main_loop( precip, velocity, state, timesteps, extrap_method, func, ensemble=ensemble, num_ensemble_members=num_ensemble_members, ) ================================================ FILE: pysteps/tests/test_paramsrc.py ================================================ # -*- coding: utf-8 -*- import os from tempfile import NamedTemporaryFile import pysteps from pysteps import load_config_file minimal_pystepsrc_file = """ // pysteps configuration { "silent_import": false, "outputs": { "path_outputs": "./" }, "plot": { "motion_plot": "quiver", "colorscale": "pysteps" }, "data_sources": { "bom": { "root_path": "./radar/bom", "path_fmt": "prcp-cscn/2/%Y/%m/%d", "fn_pattern": "2_%Y%m%d_%H%M00.prcp-cscn", "fn_ext": "nc", "importer": "bom_rf3", "timestep": 6, "importer_kwargs": { "gzipped": true } } } } """ def test_read_paramsrc(): """ Test that the parameter file is read correctly and the resulting pysteps.paramsrc dict can be accessed by attributes too. """ with NamedTemporaryFile(mode="w", delete=False) as tmp_paramsrc: tmp_paramsrc.write(minimal_pystepsrc_file) tmp_paramsrc.flush() # Perform a dry run that does not update # the internal pysteps.rcparams values. rcparams = load_config_file(tmp_paramsrc.name, dryrun=True, verbose=False) os.unlink(tmp_paramsrc.name) # Test item and attribute getters assert rcparams["data_sources"]["bom"]["fn_ext"] == "nc" assert rcparams.data_sources.bom.fn_ext == "nc" bom_datasource_as_dict = rcparams["data_sources"]["bom"] bom_datasource_as_attr = rcparams.data_sources.bom assert bom_datasource_as_dict is bom_datasource_as_attr bom_datasource = bom_datasource_as_attr timestep_as_dict = bom_datasource["timestep"] timestep_as_attr = bom_datasource.timestep assert timestep_as_dict == 6 assert timestep_as_attr == 6 assert timestep_as_dict is timestep_as_attr importer_kwargs_dict = bom_datasource["importer_kwargs"] importer_kwargs_attr = bom_datasource.importer_kwargs assert importer_kwargs_attr is importer_kwargs_dict assert importer_kwargs_attr["gzipped"] is importer_kwargs_attr.gzipped assert importer_kwargs_attr["gzipped"] is True # Test item and attribute setters rcparams.test = 4 assert rcparams.test == 4 assert rcparams.test is rcparams["test"] rcparams["test2"] = 4 assert rcparams.test2 == 4 assert rcparams.test2 is rcparams["test2"] rcparams.test = dict(a=1, b="test") assert rcparams.test == dict(a=1, b="test") assert rcparams.test["a"] == 1 assert rcparams.test["b"] == "test" assert rcparams.test["a"] is rcparams["test"].a assert rcparams.test["b"] is rcparams["test"].b ================================================ FILE: pysteps/tests/test_plt_animate.py ================================================ # -*- coding: utf-8 -*- import os import numpy as np import pytest from unittest.mock import patch from pysteps.tests.helpers import get_precipitation_fields from pysteps.visualization.animations import animate PRECIP, METADATA = get_precipitation_fields( num_prev_files=2, num_next_files=0, return_raw=True, metadata=True, upscale=2000, ) VALID_ARGS = ( ([PRECIP], {}), ([PRECIP], {"title": "title"}), ([PRECIP], {"timestamps_obs": METADATA["timestamps"]}), ([PRECIP], {"geodata": METADATA, "map_kwargs": {"plot_map": None}}), ([PRECIP], {"motion_field": np.ones((2, *PRECIP.shape[1:]))}), ( [PRECIP], {"precip_kwargs": {"units": "mm/h", "colorbar": True, "colorscale": "pysteps"}}, ), ([PRECIP, PRECIP], {}), ([PRECIP, PRECIP], {"title": "title"}), ([PRECIP, PRECIP], {"timestamps_obs": METADATA["timestamps"]}), ([PRECIP, PRECIP], {"timestamps_obs": METADATA["timestamps"], "timestep_min": 5}), ([PRECIP, PRECIP], {"ptype": "prob", "prob_thr": 1}), ([PRECIP, PRECIP], {"ptype": "mean"}), ([PRECIP, np.stack((PRECIP, PRECIP))], {"ptype": "ensemble"}), ) @pytest.mark.parametrize(["anim_args", "anim_kwargs"], VALID_ARGS) def test_animate(anim_args, anim_kwargs): with patch("matplotlib.pyplot.show"): animate(*anim_args, **anim_kwargs) VALUEERROR_ARGS = ( ([PRECIP], {"timestamps_obs": METADATA["timestamps"][:2]}), ([PRECIP], {"motion_plot": "test"}), ([PRECIP, PRECIP], {"ptype": "prob"}), ) @pytest.mark.parametrize(["anim_args", "anim_kwargs"], VALUEERROR_ARGS) def test_animate_valueerrors(anim_args, anim_kwargs): with pytest.raises(ValueError): animate(*anim_args, **anim_kwargs) TYPEERROR_ARGS = ( ([PRECIP], {"timestamps": METADATA["timestamps"]}), ([PRECIP], {"plotanimation": True}), ([PRECIP], {"units": "mm/h"}), ([PRECIP], {"colorbar": True}), ([PRECIP], {"colorscale": "pysteps"}), ([PRECIP, PRECIP], {"type": "ensemble"}), ) @pytest.mark.parametrize(["anim_args", "anim_kwargs"], TYPEERROR_ARGS) def test_animate_typeerrors(anim_args, anim_kwargs): with pytest.raises(TypeError): animate(*anim_args, **anim_kwargs) def test_animate_save(tmp_path): animate( PRECIP, np.stack((PRECIP, PRECIP)), display_animation=False, savefig=True, path_outputs=tmp_path, fig_dpi=10, ) assert len(os.listdir(tmp_path)) == 9 ================================================ FILE: pysteps/tests/test_plt_cartopy.py ================================================ # -*- coding: utf-8 -*- import pytest from pysteps.visualization import plot_precip_field from pysteps.utils import to_rainrate from pysteps.tests.helpers import get_precipitation_fields import matplotlib.pyplot as plt plt_arg_names = ("source", "map_kwargs", "pass_geodata") plt_arg_values = [ ("mch", {"drawlonlatlines": False, "lw": 0.5, "plot_map": None}, False), ("mch", {"drawlonlatlines": False, "lw": 0.5, "plot_map": "cartopy"}, False), ("mch", {"drawlonlatlines": False, "lw": 0.5}, True), ("mch", {"drawlonlatlines": True, "lw": 1.0}, True), ("bom", {"drawlonlatlines": True, "lw": 0.5}, True), ("fmi", {"drawlonlatlines": True, "lw": 0.5}, True), ("knmi", {"drawlonlatlines": True, "lw": 0.5}, True), ("opera", {"drawlonlatlines": True, "lw": 0.5}, True), ("mrms", {"drawlonlatlines": True, "lw": 0.5}, True), ("saf", {"drawlonlatlines": True, "lw": 0.5}, True), ] @pytest.mark.parametrize(plt_arg_names, plt_arg_values) def test_visualization_plot_precip_field(source, map_kwargs, pass_geodata): field, metadata = get_precipitation_fields(0, 0, True, True, None, source) field = field.squeeze() field, __ = to_rainrate(field, metadata) if not pass_geodata: metadata = None plot_precip_field(field, ptype="intensity", geodata=metadata, map_kwargs=map_kwargs) if __name__ == "__main__": for i, args in enumerate(plt_arg_values): test_visualization_plot_precip_field(*args) plt.show() ================================================ FILE: pysteps/tests/test_plt_motionfields.py ================================================ # -*- coding: utf-8 -*- import matplotlib.pyplot as plt import numpy as np import pytest from pysteps import motion from pysteps.visualization import plot_precip_field, quiver, streamplot from pysteps.tests.helpers import get_precipitation_fields arg_names_quiver = ( "source", "axis", "step", "quiver_kwargs", "map_kwargs", "upscale", "pass_geodata", ) arg_values_quiver = [ (None, "off", 10, {}, {"drawlonlatlines": False, "lw": 0.5}, None, False), ("bom", "on", 10, {}, {"drawlonlatlines": False, "lw": 0.5}, 4000, False), ("bom", "on", 10, {}, {"drawlonlatlines": True, "lw": 0.5}, 4000, True), ("mch", "on", 20, {}, {"drawlonlatlines": False, "lw": 0.5}, 2000, True), ] @pytest.mark.parametrize(arg_names_quiver, arg_values_quiver) def test_visualization_motionfields_quiver( source, axis, step, quiver_kwargs, map_kwargs, upscale, pass_geodata ): pytest.importorskip("cv2") if source is not None: fields, geodata = get_precipitation_fields(0, 2, False, True, upscale, source) if not pass_geodata: geodata = None ax = plot_precip_field(fields[-1], geodata=geodata) oflow_method = motion.get_method("LK") UV = oflow_method(fields) else: shape = (100, 100) geodata = None ax = None u = np.ones(shape[1]) * shape[0] v = np.arange(0, shape[0]) U, V = np.meshgrid(u, v) UV = np.concatenate([U[None, :], V[None, :]]) UV_orig = UV.copy() __ = quiver(UV, ax, geodata, axis, step, quiver_kwargs, map_kwargs=map_kwargs) # Check that quiver does not modify the input data assert np.array_equal(UV, UV_orig) arg_names_streamplot = ( "source", "axis", "streamplot_kwargs", "map_kwargs", "upscale", "pass_geodata", ) arg_values_streamplot = [ (None, "off", {}, {"drawlonlatlines": False, "lw": 0.5}, None, False), ("bom", "on", {}, {"drawlonlatlines": False, "lw": 0.5}, 4000, False), ("bom", "on", {"density": 0.5}, {"drawlonlatlines": True, "lw": 0.5}, 4000, True), ] @pytest.mark.parametrize(arg_names_streamplot, arg_values_streamplot) def test_visualization_motionfields_streamplot( source, axis, streamplot_kwargs, map_kwargs, upscale, pass_geodata ): pytest.importorskip("cv2") if source is not None: fields, geodata = get_precipitation_fields(0, 2, False, True, upscale, source) if not pass_geodata: pass_geodata = None ax = plot_precip_field(fields[-1], geodata=geodata) oflow_method = motion.get_method("LK") UV = oflow_method(fields) else: shape = (100, 100) geodata = None ax = None u = np.ones(shape[1]) * shape[0] v = np.arange(0, shape[0]) U, V = np.meshgrid(u, v) UV = np.concatenate([U[None, :], V[None, :]]) UV_orig = UV.copy() __ = streamplot(UV, ax, geodata, axis, streamplot_kwargs, map_kwargs=map_kwargs) # Check that streamplot does not modify the input data assert np.array_equal(UV, UV_orig) if __name__ == "__main__": for i, args in enumerate(arg_values_quiver): test_visualization_motionfields_quiver(*args) plt.show() for i, args in enumerate(arg_values_streamplot): test_visualization_motionfields_streamplot(*args) plt.show() ================================================ FILE: pysteps/tests/test_plt_precipfields.py ================================================ # -*- coding: utf-8 -*- import pytest from pysteps.visualization import plot_precip_field from pysteps.utils import conversion from pysteps.postprocessing import ensemblestats from pysteps.tests.helpers import get_precipitation_fields import matplotlib.pyplot as plt import numpy as np plt_arg_names = ( "source", "plot_type", "bbox", "colorscale", "probthr", "title", "colorbar", "axis", ) plt_arg_values = [ ("mch", "intensity", None, "pysteps", None, None, False, "off"), ("mch", "depth", None, "pysteps", None, "Title", True, "on"), ("mch", "prob", None, "pysteps", 0.1, None, True, "on"), ("mch", "intensity", None, "STEPS-BE", None, None, True, "on"), ("mch", "intensity", None, "BOM-RF3", None, None, True, "on"), ("bom", "intensity", None, "pysteps", None, None, True, "on"), ("fmi", "intensity", None, "pysteps", None, None, True, "on"), ("knmi", "intensity", None, "pysteps", None, None, True, "on"), ("knmi", "intensity", None, "STEPS-NL", None, None, True, "on"), ("knmi", "intensity", [300, 300, 500, 500], "pysteps", None, None, True, "on"), ("opera", "intensity", None, "pysteps", None, None, True, "on"), ("saf", "intensity", None, "pysteps", None, None, True, "on"), ] @pytest.mark.parametrize(plt_arg_names, plt_arg_values) def test_visualization_plot_precip_field( source, plot_type, bbox, colorscale, probthr, title, colorbar, axis ): if plot_type == "intensity": field, metadata = get_precipitation_fields(0, 0, True, True, None, source) field = field.squeeze() field, metadata = conversion.to_rainrate(field, metadata) elif plot_type == "depth": field, metadata = get_precipitation_fields(0, 0, True, True, None, source) field = field.squeeze() field, metadata = conversion.to_raindepth(field, metadata) elif plot_type == "prob": field, metadata = get_precipitation_fields(0, 10, True, True, None, source) field, metadata = conversion.to_rainrate(field, metadata) field = ensemblestats.excprob(field, probthr) field_orig = field.copy() ax = plot_precip_field( field.copy(), ptype=plot_type, bbox=bbox, geodata=None, colorscale=colorscale, probthr=probthr, units=metadata["unit"], title=title, colorbar=colorbar, axis=axis, ) # Check that plot_precip_field does not modify the input data field_orig = np.ma.masked_invalid(field_orig) field_orig.data[field_orig.mask] = -100 field = np.ma.masked_invalid(field) field.data[field.mask] = -100 assert np.array_equal(field_orig.data, field.data) if __name__ == "__main__": for i, args in enumerate(plt_arg_values): test_visualization_plot_precip_field(*args) plt.show() ================================================ FILE: pysteps/tests/test_plugins_support.py ================================================ # -*- coding: utf-8 -*- """ Script to test the plugin support. https://github.com/pySTEPS/cookiecutter-pysteps-plugin """ import os import pytest import subprocess import sys import tempfile __ = pytest.importorskip("cookiecutter") from cookiecutter.main import cookiecutter PLUGIN_TEMPLATE_URL = "https://github.com/pysteps/cookiecutter-pysteps-plugin" from contextlib import contextmanager from pysteps import io, postprocessing def _check_installed_importer_plugin(import_func_name): # reload the pysteps module to detect the installed plugin io.discover_importers() print(io.importers_info()) import_func_name = import_func_name.replace("importer_", "import_") assert hasattr(io.importers, import_func_name) func_name = import_func_name.replace("import_", "") assert func_name in io.interface._importer_methods importer = getattr(io.importers, import_func_name) importer("filename") def _check_installed_diagnostic_plugin(diagnostic_func_name): # reload the pysteps module to detect the installed plugin postprocessing.discover_postprocessors() assert hasattr(postprocessing.diagnostics, diagnostic_func_name) assert diagnostic_func_name in postprocessing.interface._diagnostics_methods diagnostic = getattr(postprocessing.diagnostics, diagnostic_func_name) diagnostic("filename") @contextmanager def _create_and_install_plugin(project_name, plugin_type): with tempfile.TemporaryDirectory() as tmpdirname: print(f"Installing plugin {project_name} providing a {plugin_type} module") cookiecutter( PLUGIN_TEMPLATE_URL, no_input=True, overwrite_if_exists=True, extra_context={ "project_name": project_name, "plugin_type": plugin_type, }, output_dir=tmpdirname, ) # Install the plugin subprocess.check_call( [ sys.executable, "-m", "pip", "install", "--force-reinstall", os.path.join(tmpdirname, project_name), ] ) # The block below, together with the decorator used in this function are used # to create a context manager that uninstall the plugin packages after the # tests finish (even if they fail). # https://docs.pytest.org/en/stable/fixture.html?highlight=context#fixture-finalization-executing-teardown-code try: yield project_name finally: _uninstall_plugin(project_name) def _uninstall_plugin(project_name): # Install the plugin subprocess.check_call( [sys.executable, "-m", "pip", "uninstall", "-y", project_name] ) def test_importers_plugins(): with _create_and_install_plugin("pysteps-importer-institution-fun", "importer"): _check_installed_importer_plugin("importer_institution_fun") def test_diagnostic_plugins(): with _create_and_install_plugin("pysteps-diagnostic-fun", "diagnostic"): _check_installed_diagnostic_plugin("diagnostic_fun") ================================================ FILE: pysteps/tests/test_postprocessing_ensemblestats.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.postprocessing.ensemblestats import excprob, mean, banddepth # CREATE DATASETS TO TEST a = np.arange(9, dtype=float).reshape(3, 3) b = np.tile(a, (4, 1, 1)) b1 = b.copy() b1[3] = np.nan a1 = a.copy() a1[:] = np.nan a2 = a.copy() a2[0, :] = np.nan # test data test_data = [ (a, False, None, a), (b, False, None, a), (b1, True, None, a), (b1, False, None, a1), (b, False, 0.0, a), (b, False, 3.0, a2), (b, True, 3.0, a2), (b1, True, 3.0, a2), ] @pytest.mark.parametrize("X, ignore_nan, X_thr, expected", test_data) def test_ensemblestats_mean(X, ignore_nan, X_thr, expected): """ Test ensemblestats mean.""" assert_array_almost_equal(mean(X, ignore_nan, X_thr), expected) # test exceptions test_exceptions = [(0), (None), (a[0, :]), (np.tile(a, (4, 1, 1, 1)))] @pytest.mark.parametrize("X", test_exceptions) def test_exceptions_mean(X): with pytest.raises(Exception): mean(X) # test data b2 = b.copy() b2[2, 2, 2] = np.nan test_data = [ (b, 2.0, False, np.array([[0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])), (b2, 2.0, False, np.array([[0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, np.nan]])), (b2, 2.0, True, np.array([[0.0, 0.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])), ] @pytest.mark.parametrize("X, X_thr, ignore_nan, expected", test_data) def test_ensemblestats_excprob(X, X_thr, ignore_nan, expected): """Test ensemblestats excprob.""" assert_array_almost_equal(excprob(X, X_thr, ignore_nan), expected) # test exceptions test_exceptions = [(0), (None), (a[0, :]), (a)] @pytest.mark.parametrize("X", test_exceptions) def test_exceptions_excprob(X): with pytest.raises(Exception): excprob(X, 2.0) # test data b3 = np.tile(a, (5, 1, 1)) + 1 b3 *= np.arange(1, 6)[:, None, None] b3[2, 2, 2] = np.nan test_data = [ (b3, 1, True, np.array([0.0, 0.75, 1.0, 0.75, 0.0])), (b3, None, False, np.array([0.4, 0.7, 0.8, 0.7, 0.4])), ] @pytest.mark.parametrize("X, thr, norm, expected", test_data) def test_ensemblestats_banddepth(X, thr, norm, expected): """Test ensemblestats banddepth.""" assert_array_almost_equal(banddepth(X, thr, norm), expected) ================================================ FILE: pysteps/tests/test_postprocessing_probmatching.py ================================================ import numpy as np import pytest from pysteps.postprocessing.probmatching import ( nonparam_match_empirical_cdf, resample_distributions, ) class TestResampleDistributions: @pytest.fixture(autouse=True) def setup(self): # Set the seed for reproducibility np.random.seed(42) def test_valid_inputs(self): first_array = np.array([1, 3, 5, 7, 9]) second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.6 result = resample_distributions( first_array, second_array, probability_first_array ) expected_result = np.array([9, 8, 6, 3, 1]) # Expected result based on the seed assert result.shape == first_array.shape assert np.array_equal(result, expected_result) def test_probability_zero(self): first_array = np.array([1, 3, 5, 7, 9]) second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.0 result = resample_distributions( first_array, second_array, probability_first_array ) assert np.array_equal(result, np.sort(second_array)[::-1]) def test_probability_one(self): first_array = np.array([1, 3, 5, 7, 9]) second_array = np.array([2, 4, 6, 8, 10]) probability_first_array = 1.0 result = resample_distributions( first_array, second_array, probability_first_array ) assert np.array_equal(result, np.sort(first_array)[::-1]) def test_nan_in_arr1_prob_1(self): array_with_nan = np.array([1, 3, np.nan, 7, 9]) array_without_nan = np.array([2.0, 4, 6, 8, 10]) probability_first_array = 1.0 result = resample_distributions( array_with_nan, array_without_nan, probability_first_array ) expected_result = np.array([np.nan, 9, 7, 3, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) def test_nan_in_arr1_prob_0(self): array_with_nan = np.array([1, 3, np.nan, 7, 9]) array_without_nan = np.array([2, 4, 6, 8, 10]) probability_first_array = 0.0 result = resample_distributions( array_with_nan, array_without_nan, probability_first_array ) expected_result = np.array([np.nan, 10, 8, 4, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) def test_nan_in_arr2_prob_1(self): array_without_nan = np.array([1, 3, 5, 7, 9]) array_with_nan = np.array([2.0, 4, 6, np.nan, 10]) probability_first_array = 1.0 result = resample_distributions( array_without_nan, array_with_nan, probability_first_array ) expected_result = np.array([np.nan, 9, 5, 3, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) def test_nan_in_arr2_prob_0(self): array_without_nan = np.array([1, 3, 5, 7, 9]) array_with_nan = np.array([2, 4, 6, np.nan, 10]) probability_first_array = 0.0 result = resample_distributions( array_without_nan, array_with_nan, probability_first_array ) expected_result = np.array([np.nan, 10, 6, 4, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) def test_nan_in_both_prob_1(self): array1_with_nan = np.array([1, np.nan, np.nan, 7, 9]) array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) probability_first_array = 1.0 result = resample_distributions( array1_with_nan, array2_with_nan, probability_first_array ) expected_result = np.array([np.nan, np.nan, np.nan, 9, 1], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) def test_nan_in_both_prob_0(self): array1_with_nan = np.array([1, np.nan, np.nan, 7, 9]) array2_with_nan = np.array([2.0, 4, np.nan, np.nan, 10]) probability_first_array = 0.0 result = resample_distributions( array1_with_nan, array2_with_nan, probability_first_array ) expected_result = np.array([np.nan, np.nan, np.nan, 10, 2], dtype=float) assert np.allclose(result, expected_result, equal_nan=True) class TestNonparamMatchEmpiricalCDF: @pytest.fixture(autouse=True) def setup(self): # Set the seed for reproducibility np.random.seed(42) def test_ignore_indices_with_nans_both(self): initial_array = np.array([np.nan, np.nan, 6, 2, 0, 0, 0, 0, 0, 0]) target_array = np.array([np.nan, np.nan, 9, 5, 4, 0, 0, 0, 0, 0]) result = nonparam_match_empirical_cdf( initial_array, target_array, ignore_indices=np.isnan(initial_array) ) expected_result = np.array([np.nan, np.nan, 9, 5, 0, 0, 0, 0, 0, 0]) assert np.allclose(result, expected_result, equal_nan=True) def test_zeroes_initial(self): initial_array = np.zeros(10) target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) result = nonparam_match_empirical_cdf(initial_array, target_array) expected_result = np.zeros(10) assert np.allclose(result, expected_result) def test_nans_initial(self): initial_array = np.array( [0, 1, 2, 3, 4, np.nan, np.nan, np.nan, np.nan, np.nan] ) target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) with pytest.raises( ValueError, match="Initial array contains non-finite values outside ignore_indices mask.", ): nonparam_match_empirical_cdf(initial_array, target_array) def test_all_nans_initial(self): initial_array = np.full(10, np.nan) target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) with pytest.raises(ValueError, match="Initial array contains only nans."): nonparam_match_empirical_cdf(initial_array, target_array) def test_ignore_indices_nans_initial(self): initial_array = np.array( [0, 1, 2, 3, 4, np.nan, np.nan, np.nan, np.nan, np.nan] ) target_array = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10]) result = nonparam_match_empirical_cdf( initial_array, target_array, ignore_indices=np.isnan(initial_array) ) expected_result = np.array( [0, 7, 8, 9, 10, np.nan, np.nan, np.nan, np.nan, np.nan] ) assert np.allclose(result, expected_result, equal_nan=True) def test_ignore_indices_nans_target(self): # We expect the initial_array values for which ignore_indices is true to be conserved as-is. initial_array = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) target_array = np.array( [0, 2, 3, 4, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] ) result = nonparam_match_empirical_cdf( initial_array, target_array, ignore_indices=np.isnan(target_array) ) expected_result = np.array([0, 2, 3, 4, 4, 5, 6, 7, 8, 9]) assert np.allclose(result, expected_result, equal_nan=True) def test_more_zeroes_in_initial(self): initial_array = np.array([1, 4, 0, 0, 0, 0, 0, 0, 0, 0]) target_array = np.array([10, 8, 6, 4, 2, 0, 0, 0, 0, 0]) result = nonparam_match_empirical_cdf( initial_array, target_array, ignore_indices=np.isnan(initial_array) ) expected_result = np.array([8, 10, 0, 0, 0, 0, 0, 0, 0, 0]) assert np.allclose(result, expected_result, equal_nan=True) def test_more_zeroes_in_initial_unsrt(self): initial_array = np.array([1, 4, 0, 0, 0, 0, 0, 0, 0, 0]) target_array = np.array([6, 4, 2, 0, 0, 0, 0, 0, 10, 8]) result = nonparam_match_empirical_cdf( initial_array, target_array, ignore_indices=np.isnan(initial_array) ) expected_result = np.array([8, 10, 0, 0, 0, 0, 0, 0, 0, 0]) assert np.allclose(result, expected_result, equal_nan=True) def test_more_zeroes_in_target(self): initial_array = np.array([1, 3, 7, 5, 0, 0, 0, 0, 0, 0]) target_array = np.array([10, 8, 0, 0, 0, 0, 0, 0, 0, 0]) result = nonparam_match_empirical_cdf( initial_array, target_array, ignore_indices=np.isnan(initial_array) ) expected_result = np.array([0, 0, 10, 8, 0, 0, 0, 0, 0, 0]) assert np.allclose(result, expected_result, equal_nan=True) def test_2dim_array(self): initial_array = np.array([[1, 3, 5], [11, 9, 7]]) target_array = np.array([[2, 4, 6], [8, 10, 12]]) result = nonparam_match_empirical_cdf(initial_array, target_array) expected_result = np.array([[2, 4, 6], [12, 10, 8]]) assert np.allclose(result, expected_result, equal_nan=True) ================================================ FILE: pysteps/tests/test_timeseries_autoregression.py ================================================ # -*- coding: utf-8 -*- import os import numpy as np import pytest import pysteps from pysteps.timeseries import autoregression, correlation pytest.importorskip("pyproj") def test_estimate_ar_params_ols(): R = _create_data_univariate() for p in range(1, 4): phi = autoregression.estimate_ar_params_ols(R[-(p + 1) :], p) assert len(phi) == p + 1 for i in range(len(phi)): assert np.isscalar(phi[i]) phi = autoregression.estimate_ar_params_ols( R[-(p + 1) :], p, include_constant_term=True ) assert len(phi) == p + 2 for i in range(len(phi)): assert np.isscalar(phi[i]) phi = autoregression.estimate_ar_params_ols( R[-(p + 2) :], p, include_constant_term=True, d=1 ) assert len(phi) == p + 3 for i in range(len(phi)): assert np.isscalar(phi[i]) def test_estimate_ar_params_yw(): R = _create_data_univariate() for p in range(1, 4): gamma = correlation.temporal_autocorrelation(R[-(p + 1) :]) phi = autoregression.estimate_ar_params_yw(gamma) assert len(phi) == p + 1 for i in range(len(phi)): assert np.isscalar(phi[i]) def test_estimate_ar_params_yw_localized(): R = _create_data_univariate() for p in range(1, 4): gamma = correlation.temporal_autocorrelation( R[-(p + 1) :], window="gaussian", window_radius=25 ) phi = autoregression.estimate_ar_params_yw_localized(gamma) assert len(phi) == p + 1 for i in range(len(phi)): assert phi[i].shape == R.shape[1:] def test_estimate_ar_params_ols_localized(): R = _create_data_univariate() for p in range(1, 4): phi = autoregression.estimate_ar_params_ols_localized(R[-(p + 1) :], p, 25) assert len(phi) == p + 1 for i in range(len(phi)): assert phi[i].shape == R.shape[1:] phi = autoregression.estimate_ar_params_ols_localized( R[-(p + 1) :], p, 25, include_constant_term=True ) assert len(phi) == p + 2 for i in range(len(phi)): assert phi[i].shape == R.shape[1:] phi = autoregression.estimate_ar_params_ols_localized( R[-(p + 2) :], p, 25, include_constant_term=True, d=1 ) assert len(phi) == p + 3 for i in range(len(phi)): assert phi[i].shape == R.shape[1:] def test_estimate_var_params_ols(): R = _create_data_multivariate() q = R.shape[1] for p in range(1, 4): phi = autoregression.estimate_var_params_ols(R[-(p + 1) :], p) assert len(phi) == p + 1 for i in range(len(phi)): assert phi[i].shape == (q, q) phi = autoregression.estimate_var_params_ols( R[-(p + 1) :], p, include_constant_term=True ) assert len(phi) == p + 2 assert phi[0].shape == (q,) for i in range(1, len(phi)): assert phi[i].shape == (q, q) phi = autoregression.estimate_var_params_ols( R[-(p + 2) :], p, include_constant_term=True, d=1 ) assert len(phi) == p + 3 assert phi[0].shape == (q,) for i in range(1, len(phi)): assert phi[i].shape == (q, q) def test_estimate_var_params_ols_localized(): R = _create_data_multivariate() q = R.shape[1] for p in range(1, 4): phi = autoregression.estimate_var_params_ols_localized(R[-(p + 1) :], p, 25) assert len(phi) == p + 1 for i in range(len(phi)): assert phi[i].shape == (R.shape[2], R.shape[3], q, q) phi = autoregression.estimate_var_params_ols_localized( R[-(p + 1) :], p, 25, include_constant_term=True ) assert len(phi) == p + 2 assert phi[0].shape == (R.shape[2], R.shape[3], q) for i in range(1, len(phi)): assert phi[i].shape == (R.shape[2], R.shape[3], q, q) phi = autoregression.estimate_var_params_ols_localized( R[-(p + 2) :], p, 25, include_constant_term=True, d=1 ) assert len(phi) == p + 3 assert phi[0].shape == (R.shape[2], R.shape[3], q) for i in range(1, len(phi)): assert phi[i].shape == (R.shape[2], R.shape[3], q, q) def test_estimate_var_params_yw(): R = _create_data_multivariate() for p in range(1, 4): gamma = correlation.temporal_autocorrelation_multivariate(R[-(p + 1) :]) phi = autoregression.estimate_var_params_yw(gamma) assert len(phi) == p + 1 for i in range(len(phi)): assert phi[i].shape == (R.shape[1], R.shape[1]) def test_estimate_var_params_yw_localized(): R = _create_data_multivariate() q = R.shape[1] for p in range(1, 4): gamma = correlation.temporal_autocorrelation_multivariate( R[-(p + 1) :], window="gaussian", window_radius=25 ) phi = autoregression.estimate_var_params_yw_localized(gamma) assert len(phi) == p + 1 for i in range(len(phi)): assert phi[i].shape == (R.shape[2], R.shape[3], q, q) def test_iterate_ar(): R = _create_data_univariate() p = 2 phi = autoregression.estimate_ar_params_ols(R[-(p + 1) :], p) autoregression.iterate_ar_model(R, phi) def test_iterate_ar_localized(): R = _create_data_univariate() p = 2 phi = autoregression.estimate_ar_params_ols_localized(R[-(p + 1) :], p, 25) R_ = autoregression.iterate_ar_model(R, phi) assert R_.shape == R.shape def test_iterate_var(): R = _create_data_multivariate() p = 2 phi = autoregression.estimate_var_params_ols(R[-(p + 1) :], p) R_ = autoregression.iterate_var_model(R, phi) assert R_.shape == R.shape def test_iterate_var_localized(): R = _create_data_multivariate() p = 2 phi = autoregression.estimate_var_params_ols_localized(R[-(p + 1) :], p, 25) R_ = autoregression.iterate_var_model(R, phi) assert R_.shape == R.shape def _create_data_multivariate(): root_path = pysteps.rcparams.data_sources["fmi"]["root_path"] filenames = [ "201609281600_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281605_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281610_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281615_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281620_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", ] R = [] for fn in filenames: filename = os.path.join(root_path, "20160928", fn) R_, _, _ = pysteps.io.import_fmi_pgm(filename, gzipped=True) R_[~np.isfinite(R_)] = 0.0 R.append(np.stack([R_, np.roll(R_, 5, axis=0)])) R = np.stack(R) R = R[:, :, 575:800, 255:480] return R def _create_data_univariate(): root_path = pysteps.rcparams.data_sources["fmi"]["root_path"] filenames = [ "201609281600_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281605_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281610_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281615_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", "201609281620_fmi.radar.composite.lowest_FIN_SUOMI1.pgm.gz", ] R = [] for fn in filenames: filename = os.path.join(root_path, "20160928", fn) R_, _, _ = pysteps.io.import_fmi_pgm(filename, gzipped=True) R_[~np.isfinite(R_)] = 0.0 R.append(R_) R = np.stack(R) R = R[:, 575:800, 255:480] return R ================================================ FILE: pysteps/tests/test_tracking_tdating.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps.tracking.tdating import dating from pysteps.utils import to_reflectivity from pysteps.tests.helpers import get_precipitation_fields arg_names = ("source", "dry_input", "output_splits_merges") arg_values = [ ("mch", False, False), ("mch", False, False), ("mch", True, False), ("mch", False, True), ] arg_names_multistep = ("source", "len_timesteps", "output_splits_merges") arg_values_multistep = [ ("mch", 6, False), ("mch", 6, True), ] @pytest.mark.parametrize(arg_names_multistep, arg_values_multistep) def test_tracking_tdating_dating_multistep(source, len_timesteps, output_splits_merges): pytest.importorskip("skimage") input_fields, metadata = get_precipitation_fields( 0, len_timesteps, True, True, 4000, source ) input_fields, __ = to_reflectivity(input_fields, metadata) timelist = metadata["timestamps"] # First half of timesteps tracks_1, cells, labels = dating( input_fields[0 : len_timesteps // 2], timelist[0 : len_timesteps // 2], mintrack=1, output_splits_merges=output_splits_merges, ) # Second half of timesteps tracks_2, cells, _ = dating( input_fields[len_timesteps // 2 - 2 :], timelist[len_timesteps // 2 - 2 :], mintrack=1, start=2, cell_list=cells, label_list=labels, output_splits_merges=output_splits_merges, ) # Since we are adding cells, number of tracks should increase assert len(tracks_1) <= len(tracks_2) # Tracks should be continuous in time so time difference should not exceed timestep max_track_step = max([t.time.diff().max().seconds for t in tracks_2 if len(t) > 1]) timestep = np.diff(timelist).max().seconds assert max_track_step <= timestep # IDs of unmatched cells should increase in every timestep for prev_df, cur_df in zip(cells[:-1], cells[1:]): prev_ids = set(prev_df.ID) cur_ids = set(cur_df.ID) new_ids = list(cur_ids - prev_ids) prev_unmatched = list(prev_ids - cur_ids) if len(prev_unmatched): assert np.all(np.array(new_ids) > max(prev_unmatched)) @pytest.mark.parametrize(arg_names, arg_values) def test_tracking_tdating_dating(source, dry_input, output_splits_merges): pytest.importorskip("skimage") pandas = pytest.importorskip("pandas") if not dry_input: input, metadata = get_precipitation_fields(0, 2, True, True, 4000, source) input, __ = to_reflectivity(input, metadata) else: input = np.zeros((3, 50, 50)) metadata = {"timestamps": ["00", "01", "02"]} timelist = metadata["timestamps"] cell_column_length = 9 if output_splits_merges: cell_column_length = 15 output = dating( input, timelist, mintrack=1, output_splits_merges=output_splits_merges ) # Check output format assert isinstance(output, tuple) assert len(output) == 3 assert isinstance(output[0], list) assert isinstance(output[1], list) assert isinstance(output[2], list) assert len(output[1]) == input.shape[0] assert len(output[2]) == input.shape[0] assert isinstance(output[1][0], pandas.DataFrame) assert isinstance(output[2][0], np.ndarray) assert output[1][0].shape[1] == cell_column_length assert output[2][0].shape == input.shape[1:] if not dry_input: assert len(output[0]) > 0 assert isinstance(output[0][0], pandas.DataFrame) assert output[0][0].shape[1] == cell_column_length else: assert len(output[0]) == 0 assert output[1][0].shape[0] == 0 assert output[2][0].sum() == 0 ================================================ FILE: pysteps/tests/test_utils_arrays.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_equal from pysteps.utils import arrays # compute_centred_coord_array test_data = [ (2, 2, [np.array([[-1, 0]]).T, np.array([[-1, 0]])]), (3, 3, [np.array([[-1, 0, 1]]).T, np.array([[-1, 0, 1]])]), (3, 2, [np.array([[-1, 0, 1]]).T, np.array([[-1, 0]])]), (2, 3, [np.array([[-1, 0]]).T, np.array([[-1, 0, 1]])]), ] @pytest.mark.parametrize("M, N, expected", test_data) def test_compute_centred_coord_array(M, N, expected): """Test the compute_centred_coord_array.""" assert_array_equal(arrays.compute_centred_coord_array(M, N)[0], expected[0]) assert_array_equal(arrays.compute_centred_coord_array(M, N)[1], expected[1]) ================================================ FILE: pysteps/tests/test_utils_cleansing.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps.utils import cleansing def test_decluster_empty(): """ Decluster an empty input""" X = np.empty((0, 2)) V = np.empty((0, 2)) X_dec, V_dec = cleansing.decluster(X, V, 20, 1) assert X_dec.ndim == 2 assert V_dec.ndim == 2 assert X_dec.shape[0] == 0 assert V_dec.shape[0] == 0 def test_decluster_single(): """decluster a single vector""" X = np.array([[0.0, 0.0]]) V = np.array([[1.0, 1.0]]) X_dec, V_dec = cleansing.decluster(X, V, 20, 1) assert X_dec.ndim == 2 assert V_dec.ndim == 2 assert np.all(X_dec == X) assert np.all(X_dec == X) X_dec, V_dec = cleansing.decluster(X, V, 20, 2) assert X_dec.ndim == 2 assert V_dec.ndim == 2 assert X_dec.shape[0] == 0 assert V_dec.shape[0] == 0 def test_decluster(): """decluster an input with duplicated vectors""" X = np.tile(np.random.randint(100, size=(10, 2)), (3, 1)) V = np.tile(np.random.randint(100, size=(10, 2)), (3, 1)) X_dec, V_dec = cleansing.decluster(X, V, 20, 1) assert X_dec.ndim == 2 assert V_dec.ndim == 2 assert X_dec.shape[0] <= V_dec.shape[0] assert X_dec.shape[0] <= 10 assert V_dec.shape[0] <= 10 X_dec, V_dec = cleansing.decluster(X, V, 100, 1) assert X_dec.ndim == 2 assert V_dec.ndim == 2 assert X_dec.shape[0] == 1 assert V_dec.shape[0] == 1 assert np.all(X_dec == np.median(X, axis=0)) assert np.all(V_dec == np.median(V, axis=0)) def test_decluster_value_error_is_raise_when_input_has_nan(): coords = np.ones((3, 1)) input_array = np.ones((3, 1)) input_array[1, 0] = np.nan with pytest.raises(ValueError): cleansing.decluster(coords, input_array, scale=20) def test_detect_outlier_constant(): """Test that a constant input produces no outliers and that warnings are raised""" V = np.zeros(20) # this will trigger a runtime warning with pytest.warns(RuntimeWarning): outliers = cleansing.detect_outliers(V, 1) assert outliers.size == V.shape[0] assert outliers.sum() == 0 V = np.zeros((20, 3)) # this will trigger a singular matrix warning with pytest.warns(UserWarning): outliers = cleansing.detect_outliers(V, 1) assert outliers.size == V.shape[0] assert outliers.sum() == 0 V = np.zeros((20, 3)) # this will trigger a singular matrix warning X = np.random.randint(100, size=(20, 3)) with pytest.warns(UserWarning): outliers = cleansing.detect_outliers(V, 1, coord=X, k=10) assert outliers.size == V.shape[0] assert outliers.sum() == 0 def test_detect_outlier_univariate_global(): """Test that""" # test input with no outliers at all V = np.random.randn(200) V = V[np.abs(V) < 1.5] outliers = cleansing.detect_outliers(V, 4) assert outliers.sum() == 0 # test a postive outlier V[-1] = 10 outliers = cleansing.detect_outliers(V, 4) assert outliers.sum() == 1 # test a negative outlier V[-1] = -10 outliers = cleansing.detect_outliers(V, 4) assert outliers.sum() == 1 def test_detect_outlier_multivariate_global(): """Test that""" # test input with no outliers at all V = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], size=200) V = V[np.all(np.abs(V) < 1.5, axis=1), :] V = V[np.abs(V[:, 1] - V[:, 0]) < 0.5, :] outliers = cleansing.detect_outliers(V, 4) assert outliers.sum() == 0 # test postive outliers V[-2, :] = (10, 0) V[-1, :] = (3, -3) outliers = cleansing.detect_outliers(V, 4) assert outliers.sum() == 2 # test negative outliers V[-2] = (-10, 0) V[-1] = (-3, 3) outliers = cleansing.detect_outliers(V, 4) assert outliers.sum() == 2 def test_detect_outlier_univariate_local(): """Test that""" # test input with no outliers at all V = np.random.randn(200) X = np.random.randint(100, size=200) X = X[np.abs(V) < 1.5] V = V[np.abs(V) < 1.5] outliers = cleansing.detect_outliers(V, 4, coord=X, k=50) assert outliers.sum() == 0 # test a postive outlier V[-1] = 10 outliers = cleansing.detect_outliers(V, 4, coord=X, k=50) assert outliers.sum() == 1 # test a negative outlier V[-1] = -10 outliers = cleansing.detect_outliers(V, 4, coord=X, k=50) assert outliers.sum() == 1 def test_detect_outlier_multivariate_local(): """Test that""" # test input with no outliers at all V = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], size=200) X = np.random.randint(100, size=(200, 3)) idx = np.abs(V[:, 1] - V[:, 0]) < 1 idx = idx & np.all(np.abs(V) < 1.5, axis=1) X = X[idx, :] V = V[idx, :] outliers = cleansing.detect_outliers(V, 4, coord=X, k=50) assert outliers.sum() == 0 # test postive outliers V[-2, :] = (10, 0) V[-1, :] = (3, -3) outliers = cleansing.detect_outliers(V, 4, coord=X, k=50) assert outliers.sum() == 2 # test negative outliers V[-2] = (-10, 0) V[-1] = (-3, 3) outliers = cleansing.detect_outliers(V, 4, coord=X, k=50) assert outliers.sum() == 2 def test_detect_outlier_wrong_input_dims_raise_error(): input_array = np.zeros((20, 3, 2)) thr_std_devs = 1 with pytest.raises(ValueError): cleansing.detect_outliers(input_array, thr_std_devs) ================================================ FILE: pysteps/tests/test_utils_conversion.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.utils import conversion # to_rainrate test_data = [ ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([1]), ), ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([12]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([1.25892541]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([15.10710494]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "dBZ", "threshold": 0, "zerovalue": 0, }, np.array([0.04210719]), ), ( np.array([1]), { "accutime": 5, "transform": "log", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([2.71828183]), ), ( np.array([1.0]), { "accutime": 5, "transform": "log", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([32.61938194]), ), ( np.array([1]), { "accutime": 5, "transform": "sqrt", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([1]), ), ( np.array([1.0]), { "accutime": 5, "transform": "sqrt", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([12.0]), ), ] @pytest.mark.parametrize("R, metadata, expected", test_data) def test_to_rainrate(R, metadata, expected): """Test the to_rainrate.""" assert_array_almost_equal(conversion.to_rainrate(R, metadata)[0], expected) # to_raindepth test_data = [ ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([0.08333333]), ), ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([1]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([0.10491045]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([1.25892541]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "dBZ", "threshold": 0, "zerovalue": 0, }, np.array([0.00350893]), ), ( np.array([1]), { "accutime": 5, "transform": "log", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([0.22652349]), ), ( np.array([1.0]), { "accutime": 5, "transform": "log", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([2.71828183]), ), ( np.array([1]), { "accutime": 5, "transform": "sqrt", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([0.08333333]), ), ( np.array([1.0]), { "accutime": 5, "transform": "sqrt", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([1.0]), ), ] @pytest.mark.parametrize("R, metadata, expected", test_data) def test_to_raindepth(R, metadata, expected): """Test the to_raindepth.""" assert_array_almost_equal(conversion.to_raindepth(R, metadata)[0], expected) # to_reflectivity test_data = [ ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([23.01029996]), ), ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([40.27719989]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([24.61029996]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([41.87719989]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "dBZ", "threshold": 0, "zerovalue": 0, }, np.array([1]), ), ( np.array([1]), { "accutime": 5, "transform": "log", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([29.95901167]), ), ( np.array([1.0]), { "accutime": 5, "transform": "log", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([47.2259116]), ), ( np.array([1]), { "accutime": 5, "transform": "sqrt", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, np.array([23.01029996]), ), ( np.array([1.0]), { "accutime": 5, "transform": "sqrt", "unit": "mm", "threshold": 0, "zerovalue": 0, }, np.array([40.27719989]), ), ] @pytest.mark.parametrize("R, metadata, expected", test_data) def test_to_reflectivity(R, metadata, expected): """Test the to_reflectivity.""" assert_array_almost_equal(conversion.to_reflectivity(R, metadata)[0], expected) ================================================ FILE: pysteps/tests/test_utils_dimension.py ================================================ # -*- coding: utf-8 -*- import datetime as dt import numpy as np import pytest from numpy.testing import assert_array_equal from pytest import raises from pysteps.utils import dimension test_data_not_trim = ( # "data, window_size, axis, method, expected" (np.arange(6), 2, 0, "mean", np.array([0.5, 2.5, 4.5])), ( np.arange(4 * 6).reshape(4, 6), (2, 3), (0, 1), "sum", np.array([[24, 42], [96, 114]]), ), ( np.arange(4 * 6).reshape(4, 6), (2, 2), (0, 1), "sum", np.array([[14, 22, 30], [62, 70, 78]]), ), ( np.arange(4 * 6).reshape(4, 6), 2, (0, 1), "sum", np.array([[14, 22, 30], [62, 70, 78]]), ), ( np.arange(4 * 6).reshape(4, 6), (2, 3), (0, 1), "mean", np.array([[4.0, 7.0], [16.0, 19.0]]), ), ( np.arange(4 * 6).reshape(4, 6), (2, 2), (0, 1), "mean", np.array([[3.5, 5.5, 7.5], [15.5, 17.5, 19.5]]), ), ( np.arange(4 * 6).reshape(4, 6), 2, (0, 1), "mean", np.array([[3.5, 5.5, 7.5], [15.5, 17.5, 19.5]]), ), ) @pytest.mark.parametrize( "data, window_size, axis, method, expected", test_data_not_trim ) def test_aggregate_fields(data, window_size, axis, method, expected): """ Test the aggregate_fields function. The windows size must divide exactly the data dimensions. Internally, additional test are generated for situations where the windows size does not divide the data dimensions. The length of each dimension should be larger than 2. """ assert_array_equal( dimension.aggregate_fields(data, window_size, axis=axis, method=method), expected, ) # Test the trimming capabilities. data = np.pad(data, (0, 1)) assert_array_equal( dimension.aggregate_fields( data, window_size, axis=axis, method=method, trim=True ), expected, ) with raises(ValueError): dimension.aggregate_fields(data, window_size, axis=axis, method=method) def test_aggregate_fields_errors(): """ Test that the errors are correctly captured in the aggregate_fields function. """ data = np.arange(4 * 6).reshape(4, 6) with raises(ValueError): dimension.aggregate_fields(data, -1, axis=0) with raises(ValueError): dimension.aggregate_fields(data, 0, axis=0) with raises(ValueError): dimension.aggregate_fields(data, 1, method="invalid") with raises(TypeError): dimension.aggregate_fields(data, (1, 1), axis=0) # aggregate_fields_time timestamps = [dt.datetime.now() + dt.timedelta(minutes=t) for t in range(10)] test_data = [ ( np.ones((10, 1, 1)), {"unit": "mm/h", "timestamps": timestamps}, 2, False, np.ones((5, 1, 1)), ), ( np.ones((10, 1, 1)), {"unit": "mm", "timestamps": timestamps}, 2, False, 2 * np.ones((5, 1, 1)), ), ] @pytest.mark.parametrize( "R, metadata, time_window_min, ignore_nan, expected", test_data ) def test_aggregate_fields_time(R, metadata, time_window_min, ignore_nan, expected): """Test the aggregate_fields_time.""" assert_array_equal( dimension.aggregate_fields_time(R, metadata, time_window_min, ignore_nan)[0], expected, ) # aggregate_fields_space test_data = [ ( np.ones((1, 10, 10)), {"unit": "mm/h", "xpixelsize": 1, "ypixelsize": 1}, 2, False, np.ones((1, 5, 5)), ), ( np.ones((1, 10, 10)), {"unit": "mm", "xpixelsize": 1, "ypixelsize": 1}, 2, False, np.ones((1, 5, 5)), ), ( np.ones((1, 10, 10)), {"unit": "mm/h", "xpixelsize": 1, "ypixelsize": 2}, (2, 4), False, np.ones((1, 5, 5)), ), ] @pytest.mark.parametrize("R, metadata, space_window, ignore_nan, expected", test_data) def test_aggregate_fields_space(R, metadata, space_window, ignore_nan, expected): """Test the aggregate_fields_space.""" assert_array_equal( dimension.aggregate_fields_space(R, metadata, space_window, ignore_nan)[0], expected, ) # clip_domain R = np.zeros((4, 4)) R[:2, :] = 1 test_data = [ ( R, { "x1": 0, "x2": 4, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1, "zerovalue": 0, "yorigin": "upper", }, None, R, ), ( R, { "x1": 0, "x2": 4, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1, "zerovalue": 0, "yorigin": "lower", }, (2, 4, 2, 4), np.zeros((2, 2)), ), ( R, { "x1": 0, "x2": 4, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1, "zerovalue": 0, "yorigin": "upper", }, (2, 4, 2, 4), np.ones((2, 2)), ), ] @pytest.mark.parametrize("R, metadata, extent, expected", test_data) def test_clip_domain(R, metadata, extent, expected): """Test the clip_domain.""" assert_array_equal(dimension.clip_domain(R, metadata, extent)[0], expected) # square_domain R = np.zeros((4, 2)) test_data = [ # square by padding ( R, {"x1": 0, "x2": 2, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1}, "pad", False, np.zeros((4, 4)), ), # square by cropping ( R, {"x1": 0, "x2": 2, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1}, "crop", False, np.zeros((2, 2)), ), # inverse square by padding ( np.zeros((4, 4)), { "x1": -1, "x2": 3, "y1": 0, "y2": 4, "xpixelsize": 1, "ypixelsize": 1, "orig_domain": (4, 2), "square_method": "pad", }, "pad", True, R, ), # inverse square by cropping ( np.zeros((2, 2)), { "x1": 0, "x2": 2, "y1": 1, "y2": 3, "xpixelsize": 1, "ypixelsize": 1, "orig_domain": (4, 2), "square_method": "crop", }, "crop", True, R, ), ] @pytest.mark.parametrize("R, metadata, method, inverse, expected", test_data) def test_square_domain(R, metadata, method, inverse, expected): """Test the square_domain.""" assert_array_equal( dimension.square_domain(R, metadata, method, inverse)[0], expected ) ================================================ FILE: pysteps/tests/test_utils_interpolate.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps.utils import get_method interp_methods = ( "idwinterp2d", "rbfinterp2d", ) @pytest.mark.parametrize("interp_method", interp_methods) def test_interp_univariate(interp_method): coord = np.random.rand(10, 2) input_array = np.random.rand(10) xgrid, ygrid = np.linspace(0, 1, 10), np.linspace(0, 1, 10) interp = get_method(interp_method) output = interp(coord, input_array, xgrid, ygrid) assert isinstance(output, np.ndarray) assert output.ndim == 2 assert output.shape == (ygrid.size, xgrid.size) assert np.isfinite(output).all() @pytest.mark.parametrize("interp_method", interp_methods) def test_interp_multivariate(interp_method): coord = np.random.rand(10, 2) input_array = np.random.rand(10, 2) xgrid, ygrid = np.linspace(0, 1, 10), np.linspace(0, 1, 10) interp = get_method(interp_method) output = interp(coord, input_array, xgrid, ygrid) assert isinstance(output, np.ndarray) assert output.ndim == 3 assert output.shape[0] == 2 assert output.shape[1:] == (ygrid.size, xgrid.size) assert np.isfinite(output).all() @pytest.mark.parametrize("interp_method", interp_methods) def test_wrong_inputs(interp_method): coord = np.random.rand(10, 2) input_array = np.random.rand(10, 2) xgrid, ygrid = np.linspace(0, 1, 10), np.linspace(0, 1, 10) interp = get_method(interp_method) # nan in the input values with pytest.raises(ValueError): input_with_nans = input_array.copy() input_with_nans[0, 0] = np.nan interp(coord, input_with_nans, xgrid, ygrid) # nan in the input coordinates with pytest.raises(ValueError): coord_with_nans = coord.copy() coord_with_nans[0, 0] = np.nan interp(coord_with_nans, input_array, xgrid, ygrid) # too many dimensions in the input values with pytest.raises(ValueError): interp(coord, np.random.rand(10, 2, 1), xgrid, ygrid) # wrong dimension size in the input coordinates with pytest.raises(ValueError): interp(np.random.rand(10, 1), input_array, xgrid, ygrid) # wrong number of dimensions in the input coordinates with pytest.raises(ValueError): interp(np.random.rand(10, 2, 1), input_array, xgrid, ygrid) # wrong number of coordinates with pytest.raises(ValueError): interp(np.random.rand(9, 2), input_array, xgrid, ygrid) @pytest.mark.parametrize("interp_method", interp_methods) def test_one_sample_input(interp_method): coord = np.random.rand(1, 2) input_array = np.array([1, 2])[None, :] xgrid, ygrid = np.linspace(0, 1, 10), np.linspace(0, 1, 10) interp = get_method(interp_method) # one sample returns uniform grids output = interp(coord, input_array, xgrid, ygrid) assert np.isfinite(output).all() assert output[0, ...].max() == output[0, ...].min() == 1 assert output[1, ...].max() == output[1, ...].min() == 2 @pytest.mark.parametrize("interp_method", interp_methods) def test_uniform_input(interp_method): coord = np.random.rand(10, 2) xgrid, ygrid = np.linspace(0, 1, 10), np.linspace(0, 1, 10) interp = get_method(interp_method) # same value across all variables input_array = np.ones((10, 2)) output = interp(coord, input_array, xgrid, ygrid) assert np.isfinite(output).all() assert output.max() == output.min() == input_array.ravel()[0] # # same value in one variable only # input_array = np.vstack((np.ones(10), np.random.rand(10))).T # output = interp(coord, input_array, xgrid, ygrid) # assert output[0,].max() == output[0,].min() == input_array[0,0] def test_idwinterp2d_k1(): coord = np.random.rand(10, 2) input_array = np.random.rand(10, 2) xgrid, ygrid = np.linspace(0, 1, 10), np.linspace(0, 1, 10) interp = get_method("idwinterp2d") output = interp(coord, input_array, xgrid, ygrid, k=1) assert isinstance(output, np.ndarray) assert output.ndim == 3 assert output.shape[0] == 2 assert output.shape[1:] == (ygrid.size, xgrid.size) assert np.isfinite(output).all() def test_idwinterp2d_kNone(): coord = np.random.rand(10, 2) input_array = np.random.rand(10, 2) xgrid, ygrid = np.linspace(0, 1, 10), np.linspace(0, 1, 10) interp = get_method("idwinterp2d") output = interp(coord, input_array, xgrid, ygrid, k=None) assert isinstance(output, np.ndarray) assert output.ndim == 3 assert output.shape[0] == 2 assert output.shape[1:] == (ygrid.size, xgrid.size) assert np.isfinite(output).all() ================================================ FILE: pysteps/tests/test_utils_pca.py ================================================ # -*- coding: utf-8 -*- import pytest import numpy as np from pysteps.utils import pca pca_arg_values = ( (10, 10), (20, 20), (10, 5), (20, 15), ) pca_arg_names = ("len_y", "n_components") @pytest.mark.parametrize(pca_arg_names, pca_arg_values) def test_pca(len_y, n_components): pytest.importorskip("sklearn") precip_field = np.zeros((len_y, 200, 200)) for i in range(len_y): a = 3 * i b = 2 * i precip_field[i, 20 + b : 160 - b, 30 + a : 180 - a] = 0.1 precip_field[i, 22 + b : 162 - b, 35 + a : 178 - a] = 0.1 precip_field[i, 24 + b : 164 - b, 40 + a : 176 - a] = 1.0 precip_field[i, 26 + b : 166 - b, 45 + a : 174 - a] = 5.0 precip_field[i, 28 + b : 168 - b, 50 + a : 172 - a] = 5.0 precip_field[i, 30 + b : 170 - b, 35 + a : 170 - a] = 4.5 precip_field[i, 32 + b : 172 - b, 40 + a : 168 - a] = 4.5 precip_field[i, 34 + b : 174 - b, 45 + a : 166 - a] = 4.0 precip_field[i, 36 + b : 176 - b, 50 + a : 164 - a] = 2.0 precip_field[i, 38 + b : 178 - b, 55 + a : 162 - a] = 1.0 precip_field[i, 40 + b : 180 - b, 60 + a : 160 - a] = 0.5 precip_field[i, 42 + b : 182 - b, 65 + a : 158 - a] = 0.1 precip_field = precip_field.reshape( len_y, precip_field.shape[1] * precip_field.shape[2] ) kwargs = {"n_components": n_components, "svd_solver": "full"} precip_field_pc, pca_params = pca.pca_transform( forecast_ens=precip_field, get_params=True, **kwargs ) assert precip_field_pc.shape == (len_y, n_components) assert pca_params["principal_components"].shape[1] == precip_field.shape[1] assert pca_params["mean"].shape[0] == precip_field.shape[1] precip_field_backtransformed = pca.pca_backtransform( precip_field_pc, pca_params=pca_params ) # These fields are only equal if the full PCA is computed if len_y == n_components: assert np.sum(np.abs(precip_field_backtransformed - precip_field)) < 1e-6 ================================================ FILE: pysteps/tests/test_utils_reprojection.py ================================================ # -*- coding: utf-8 -*- import os import numpy as np import pytest import pysteps from pysteps.utils import reprojection as rpj pytest.importorskip("rasterio") pytest.importorskip("pyproj") root_path_radar = pysteps.rcparams.data_sources["rmi"]["root_path"] rel_path_radar = "20210704" # Different date, but that does not matter for the tester filename_radar = os.path.join( root_path_radar, rel_path_radar, "20210704180500.rad.best.comp.rate.qpe.hdf" ) # Open the radar data radar_array, _, metadata_dst = pysteps.io.importers.import_odim_hdf5(filename_radar) # Initialise dummy NWP data nwp_array = np.zeros((24, 564, 564)) for t in range(nwp_array.shape[0]): nwp_array[t, 30 + t : 185 + t, 30 + 2 * t] = 0.1 nwp_array[t, 30 + t : 185 + t, 31 + 2 * t] = 0.1 nwp_array[t, 30 + t : 185 + t, 32 + 2 * t] = 1.0 nwp_array[t, 30 + t : 185 + t, 33 + 2 * t] = 5.0 nwp_array[t, 30 + t : 185 + t, 34 + 2 * t] = 5.0 nwp_array[t, 30 + t : 185 + t, 35 + 2 * t] = 4.5 nwp_array[t, 30 + t : 185 + t, 36 + 2 * t] = 4.5 nwp_array[t, 30 + t : 185 + t, 37 + 2 * t] = 4.0 nwp_array[t, 30 + t : 185 + t, 38 + 2 * t] = 2.0 nwp_array[t, 30 + t : 185 + t, 39 + 2 * t] = 1.0 nwp_array[t, 30 + t : 185 + t, 40 + 2 * t] = 0.5 nwp_array[t, 30 + t : 185 + t, 41 + 2 * t] = 0.1 nwp_proj = ( "+proj=lcc +lon_0=4.55 +lat_1=50.8 +lat_2=50.8 " "+a=6371229 +es=0 +lat_0=50.8 +x_0=365950 +y_0=-365950.000000001" ) metadata_src = dict( projection=nwp_proj, institution="Royal Meteorological Institute of Belgium", transform=None, zerovalue=0.0, threshold=0, unit="mm", accutime=5, xpixelsize=1300.0, ypixelsize=1300.0, yorigin="upper", cartesian_unit="m", x1=0.0, x2=731900.0, y1=-731900.0, y2=0.0, ) steps_arg_names = ( "radar_array", "nwp_array", "metadata_src", "metadata_dst", ) steps_arg_values = [ (radar_array, nwp_array, metadata_src, metadata_dst), ] @pytest.mark.parametrize(steps_arg_names, steps_arg_values) def test_utils_reproject_grids( radar_array, nwp_array, metadata_src, metadata_dst, ): # Reproject nwp_array_reproj, metadata_reproj = rpj.reproject_grids( nwp_array, radar_array, metadata_src, metadata_dst ) # The tests assert ( nwp_array_reproj.shape[0] == nwp_array.shape[0] ), "Time dimension has not the same length as source" assert ( nwp_array_reproj.shape[1] == radar_array.shape[0] ), "y dimension has not the same length as radar composite" assert ( nwp_array_reproj.shape[2] == radar_array.shape[1] ), "x dimension has not the same length as radar composite" assert ( metadata_reproj["x1"] == metadata_dst["x1"] ), "x-value lower left corner is not equal to radar composite" assert ( metadata_reproj["x2"] == metadata_dst["x2"] ), "x-value upper right corner is not equal to radar composite" assert ( metadata_reproj["y1"] == metadata_dst["y1"] ), "y-value lower left corner is not equal to radar composite" assert ( metadata_reproj["y2"] == metadata_dst["y2"] ), "y-value upper right corner is not equal to radar composite" assert ( metadata_reproj["projection"] == metadata_dst["projection"] ), "projection is different than destination projection" ================================================ FILE: pysteps/tests/test_utils_spectral.py ================================================ import numpy as np import pytest from pysteps.utils import spectral _rapsd_input_fields = [ np.random.uniform(size=(255, 255)), np.random.uniform(size=(256, 256)), np.random.uniform(size=(255, 256)), np.random.uniform(size=(256, 255)), ] @pytest.mark.parametrize("field", _rapsd_input_fields) def test_rapsd(field): rapsd, freq = spectral.rapsd(field, return_freq=True) m, n = field.shape l = max(m, n) if l % 2 == 0: assert len(rapsd) == int(l / 2) else: assert len(rapsd) == int(l / 2 + 1) assert len(rapsd) == len(freq) assert np.all(freq >= 0.0) ================================================ FILE: pysteps/tests/test_utils_transformation.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.utils import transformation # boxcox_transform test_data = [ ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, None, None, None, False, np.array([0]), ), ( np.array([1]), { "accutime": 5, "transform": "BoxCox", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, None, None, None, True, np.array([np.exp(1)]), ), ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, 1.0, None, None, False, np.array([0]), ), ( np.array([1]), { "accutime": 5, "transform": "BoxCox", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, 1.0, None, None, True, np.array([2.0]), ), ] @pytest.mark.parametrize( "R, metadata, Lambda, threshold, zerovalue, inverse, expected", test_data ) def test_boxcox_transform(R, metadata, Lambda, threshold, zerovalue, inverse, expected): """Test the boxcox_transform.""" assert_array_almost_equal( transformation.boxcox_transform( R, metadata, Lambda, threshold, zerovalue, inverse )[0], expected, ) # dB_transform test_data = [ ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, None, None, False, np.array([0]), ), ( np.array([1]), { "accutime": 5, "transform": "dB", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, None, None, True, np.array([1.25892541]), ), ] @pytest.mark.parametrize( "R, metadata, threshold, zerovalue, inverse, expected", test_data ) def test_dB_transform(R, metadata, threshold, zerovalue, inverse, expected): """Test the dB_transform.""" assert_array_almost_equal( transformation.dB_transform(R, metadata, threshold, zerovalue, inverse)[0], expected, ) # NQ_transform test_data = [ ( np.array([1, 2]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, False, np.array([-0.4307273, 0.4307273]), ) ] @pytest.mark.parametrize("R, metadata, inverse, expected", test_data) def test_NQ_transform(R, metadata, inverse, expected): """Test the NQ_transform.""" assert_array_almost_equal( transformation.NQ_transform(R, metadata, inverse)[0], expected ) # sqrt_transform test_data = [ ( np.array([1]), { "accutime": 5, "transform": None, "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, False, np.array([1]), ), ( np.array([1]), { "accutime": 5, "transform": "sqrt", "unit": "mm/h", "threshold": 0, "zerovalue": 0, }, True, np.array([1]), ), ] @pytest.mark.parametrize("R, metadata, inverse, expected", test_data) def test_sqrt_transform(R, metadata, inverse, expected): """Test the sqrt_transform.""" assert_array_almost_equal( transformation.sqrt_transform(R, metadata, inverse)[0], expected ) ================================================ FILE: pysteps/tests/test_verification_detcatscores.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.verification import det_cat_fct # CREATE A LARGE DATASET TO MATCH # EXAMPLES IN # http://www.cawcr.gov.au/projects/verification/ fct_hits = 1.0 * np.ones(82) obs_hits = 1.0 * np.ones(82) fct_fa = 1.0 * np.ones(38) obs_fa = 1.0 * np.zeros(38) fct_misses = 1.0 * np.zeros(23) obs_misses = 1.0 * np.ones(23) fct_cr = 1.0 * np.zeros(222) obs_cr = 1.0 * np.zeros(222) obs_data = np.concatenate([obs_hits, obs_fa, obs_misses, obs_cr]) fct_data = np.concatenate([fct_hits, fct_fa, fct_misses, fct_cr]) test_data = [ ([0.0], [0.0], 0.0, None, []), ([1.0, 3.0], [2.0, 5.0], 0.0, None, []), ([1.0, 3.0], [2.0, 5.0], 0.0, "CSI", [1.0]), ([1.0, 3.0], [2.0, 5.0], 0.0, ("CSI", "FAR"), [1.0, 0.0]), ([1.0, 3.0], [2.0, 5.0], 0.0, ("lolo",), []), ([1.0, 3.0], [2.0, 5.0], 0.0, ("CSI", None, "FAR"), [1.0, 0.0]), ([1.0, 3.0], [2.0, 5.0], 1.0, ("CSI", None, "FAR"), [0.5, 0.0]), ([1.0, 3.0], [2.0, 5.0], 1.0, ("lolo"), []), # test unknown score (fct_data, obs_data, 0.0, ("ACC"), [0.83287671]), # ACCURACY score (fct_data, obs_data, 0.0, ("BIAS"), [1.1428571]), # BIAS score (fct_data, obs_data, 0.0, ("POD"), [0.7809524]), # POD score (fct_data, obs_data, 0.0, ("FAR"), [0.316667]), # FAR score # Probability of false detection (false alarm rate) (fct_data, obs_data, 0.0, ("FA"), [0.146154]), # CSI score (fct_data, obs_data, 0.0, ("CSI"), [0.573426]), # Heidke Skill Score (fct_data, obs_data, 0.0, ("HSS"), [0.608871]), # Hanssen-Kuipers Discriminant (fct_data, obs_data, 0.0, ("HK"), [0.6348]), # Gilbert Skill Score (fct_data, obs_data, 0.0, ("GSS"), [0.437682]), # Gilbert Skill Score (fct_data, obs_data, 0.0, ("ETS"), [0.437682]), # Symmetric extremal dependence index (fct_data, obs_data, 0.0, ("SEDI"), [0.789308]), # Matthews correlation coefficient (fct_data, obs_data, 0.0, ("MCC"), [0.611707]), # F1-score (fct_data, obs_data, 0.0, ("F1"), [0.728889]), ] @pytest.mark.parametrize("pred, obs, thr, scores, expected", test_data) def test_det_cat_fct(pred, obs, thr, scores, expected): """Test the det_cat_fct.""" assert_array_almost_equal( list(det_cat_fct(pred, obs, thr, scores).values()), expected ) ================================================ FILE: pysteps/tests/test_verification_detcontscores.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.verification import det_cont_fct # CREATE A DATASET TO MATCH # EXAMPLES IN # http://www.cawcr.gov.au/projects/verification/ obs_data = np.asarray( [7, 10, 9, 15, 22, 13, 17, 17, 19, 23.0, 0, 10, 0, 15, 0, 13, 0, 17, 0, 0.0] ) fct_data = np.asarray( [1, 8, 12, 13, 18, 10, 16, 19, 23, 24.0, 0, 0, 12, 0, 0, 0, 16, 0, 0, 0.0] ) test_data = [ # test None as score ([0.0], [0.0], None, None, None, []), # test unknown score ([1.0, 3.0], [2.0, 5.0], ("lolo"), None, None, []), # test unknown score and None ([1.0, 3.0], [2.0, 5.0], ("lolo", None), None, None, []), # Mean Error as string (fct_data, obs_data, "ME", None, None, [-1.75]), # Mean Error (fct_data, obs_data, ("ME"), None, None, [-1.75]), # Mean Error single conditional (fct_data, obs_data, ("ME"), None, "single", [-2.1875]), # Mean Error double conditional (fct_data, obs_data, ("ME"), None, "double", [-0.8]), # Mean Absolute Error (fct_data, obs_data, ("MAE"), None, None, [5.55]), # Mean Square Error (fct_data, obs_data, ("MSE"), None, None, [64.15]), # Normalized Mean Square Error (fct_data, obs_data, ("NMSE"), None, None, [0.113711]), # Root Mean Square Error (fct_data, obs_data, ("RMSE"), None, None, [8.009370]), # Beta1 (fct_data, obs_data, ("beta1"), None, None, [0.498200]), # Beta2 (fct_data, obs_data, ("beta2"), None, None, [0.591673]), # reduction of variance (fct_data, obs_data, ("RV"), None, None, [-0.054622]), # debiased RMSE (fct_data, obs_data, ("DRMSE"), None, None, [7.815849]), # Pearson correlation (fct_data, obs_data, ("corr_p"), None, None, [0.542929]), # Spearman correlation (fct_data, obs_data, ("corr_s"), None, None, [0.565251]), # Spearman correlation single conditional (fct_data, obs_data, ("corr_s"), None, "single", [0.467913]), # Spearman correlation double conditional (fct_data, obs_data, ("corr_s"), None, "double", [0.917937]), # scatter (fct_data, obs_data, ("scatter"), None, None, [0.808023]), # Mean Error along axis 0 as tuple ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "ME", (0,), None, [[-1.75, -1.75]], ), # Mean Error along axis 0 ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "ME", 0, None, [[-1.75, -1.75]], ), # Mean Error along axis 1 ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "ME", 1, None, [[-6, -2, 3, -2, -4, -3, -1, 2, 4, 1, 0, -10, 12, -15, 0, -13, 16, -17, 0, 0]], ), # Mean Error along axis (1,2) ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "ME", (0, 1), None, [-1.75], ), # Mean Error along axis (2,1) ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "ME", (1, 0), None, [-1.75], ), # scatter along axis 0 as tuple ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "scatter", (0,), None, [[0.808023, 0.808023]], ), # scatter along axis 0 ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "scatter", 0, None, [[0.808023, 0.808023]], ), # scatter along axis (1,2) ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "scatter", (0, 1), None, [0.804806], ), # scatter along axis (2,1) ( np.tile(fct_data, (2, 1)).T, np.tile(obs_data, (2, 1)).T, "scatter", (1, 0), None, [0.804806], ), ] @pytest.mark.parametrize("pred, obs, scores, axis, conditioning, expected", test_data) def test_det_cont_fct(pred, obs, scores, axis, conditioning, expected): """Test the det_cont_fct.""" assert_array_almost_equal( list(det_cont_fct(pred, obs, scores, axis, conditioning).values()), expected ) ================================================ FILE: pysteps/tests/test_verification_probscores.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from numpy.testing import assert_array_almost_equal from pysteps.postprocessing.ensemblestats import excprob from pysteps.tests.helpers import get_precipitation_fields from pysteps.verification import probscores precip = get_precipitation_fields(num_next_files=10, return_raw=True) # CRPS test_data = [(precip[:10], precip[-1], 0.01470871)] @pytest.mark.parametrize("X_f, X_o, expected", test_data) def test_CRPS(X_f, X_o, expected): """Test the CRPS.""" assert_array_almost_equal(probscores.CRPS(X_f, X_o), expected) # reldiag test_data = [(precip[:10], precip[-1], 1.0, 10, 10, 3.38751492)] @pytest.mark.parametrize("X_f, X_o, X_min, n_bins, min_count, expected", test_data) def test_reldiag_sum(X_f, X_o, X_min, n_bins, min_count, expected): """Test the reldiag.""" P_f = excprob(X_f, X_min, ignore_nan=False) assert_array_almost_equal( np.sum(probscores.reldiag(P_f, X_o, X_min, n_bins, min_count)[1]), expected ) # ROC_curve test_data = [(precip[:10], precip[-1], 1.0, 10, True, 0.79557329)] @pytest.mark.parametrize( "X_f, X_o, X_min, n_prob_thrs, compute_area, expected", test_data ) def test_ROC_curve_area(X_f, X_o, X_min, n_prob_thrs, compute_area, expected): """Test the ROC_curve.""" P_f = excprob(X_f, X_min, ignore_nan=False) assert_array_almost_equal( probscores.ROC_curve(P_f, X_o, X_min, n_prob_thrs, compute_area)[2], expected ) ================================================ FILE: pysteps/tests/test_verification_salscores.py ================================================ # -*- coding: utf-8 -*- import numpy as np import pytest from pysteps.tests.helpers import get_precipitation_fields from pysteps.verification.salscores import sal from pysteps.utils import to_rainrate, to_reflectivity test_data = [ (to_rainrate, 1 / 15), (to_reflectivity, None), ] @pytest.mark.parametrize("converter, thr_factor", test_data) class TestSAL: pytest.importorskip("pandas") pytest.importorskip("skimage") def test_sal_zeros(self, converter, thr_factor): """Test the SAL verification method.""" precip, metadata = get_precipitation_fields( num_prev_files=0, log_transform=False, metadata=True ) precip, metadata = converter(precip.filled(np.nan), metadata) result = sal(precip * 0, precip * 0, thr_factor) assert np.isnan(result).all() result = sal(precip * 0, precip, thr_factor) assert result[:2] == (-2, -2) assert np.isnan(result[2]) result = sal(precip, precip * 0, thr_factor) assert result[:2] == (2, 2) assert np.isnan(result[2]) def test_sal_same_image(self, converter, thr_factor): """Test the SAL verification method.""" precip, metadata = get_precipitation_fields( num_prev_files=0, log_transform=False, metadata=True ) precip, metadata = converter(precip.filled(np.nan), metadata) result = sal(precip, precip, thr_factor) assert isinstance(result, tuple) assert len(result) == 3 assert np.allclose(result, [0, 0, 0]) def test_sal_translation(self, converter, thr_factor): precip, metadata = get_precipitation_fields( num_prev_files=0, log_transform=False, metadata=True ) precip, metadata = converter(precip.filled(np.nan), metadata) precip_translated = np.roll(precip, 10, axis=0) result = sal(precip, precip_translated, thr_factor) assert np.allclose(result[0], 0) assert np.allclose(result[1], 0) assert not np.allclose(result[2], 0) ================================================ FILE: pysteps/tests/test_verification_spatialscores.py ================================================ # -*- coding: utf-8 -*- import pytest from numpy.testing import assert_array_almost_equal from pysteps.tests.helpers import get_precipitation_fields from pysteps.verification import spatialscores R = get_precipitation_fields(num_prev_files=1, return_raw=True) test_data = [ (R[0], R[1], "FSS", [1], [10], None, 0.85161531), (R[0], R[1], "BMSE", [1], None, "Haar", 0.99989651), ] @pytest.mark.parametrize("X_f, X_o, name, thrs, scales, wavelet, expected", test_data) def test_intensity_scale(X_f, X_o, name, thrs, scales, wavelet, expected): """Test the intensity_scale.""" if name == "BMSE": pytest.importorskip("pywt") assert_array_almost_equal( spatialscores.intensity_scale(X_f, X_o, name, thrs, scales, wavelet)[0][0], expected, ) R = get_precipitation_fields(num_prev_files=3, return_raw=True) test_data = [ (R[:2], R[2:], "FSS", [1], [10], None), (R[:2], R[2:], "BMSE", [1], None, "Haar"), ] @pytest.mark.parametrize("R1, R2, name, thrs, scales, wavelet", test_data) def test_intensity_scale_methods(R1, R2, name, thrs, scales, wavelet): """ Test the intensity_scale merge.""" if name == "BMSE": pytest.importorskip("pywt") # expected reult int = spatialscores.intensity_scale_init(name, thrs, scales, wavelet) spatialscores.intensity_scale_accum(int, R1[0], R1[1]) spatialscores.intensity_scale_accum(int, R2[0], R2[1]) expected = spatialscores.intensity_scale_compute(int)[0][0] # init int_1 = spatialscores.intensity_scale_init(name, thrs, scales, wavelet) int_2 = spatialscores.intensity_scale_init(name, thrs, scales, wavelet) # accum spatialscores.intensity_scale_accum(int_1, R1[0], R1[1]) spatialscores.intensity_scale_accum(int_2, R2[0], R2[1]) # merge int = spatialscores.intensity_scale_merge(int_1, int_2) # compute score = spatialscores.intensity_scale_compute(int)[0][0] assert_array_almost_equal(score, expected) ================================================ FILE: pysteps/timeseries/__init__.py ================================================ # -*- coding: utf-8 -*- """Methods and models for time series analysis.""" ================================================ FILE: pysteps/timeseries/autoregression.py ================================================ # -*- coding: utf-8 -*- """ pysteps.timeseries.autoregression ================================= Methods related to autoregressive AR(p) models. .. autosummary:: :toctree: ../generated/ adjust_lag2_corrcoef1 adjust_lag2_corrcoef2 ar_acf estimate_ar_params_ols estimate_ar_params_ols_localized estimate_ar_params_yw estimate_ar_params_yw_localized estimate_var_params_ols estimate_var_params_ols_localized estimate_var_params_yw iterate_ar_model iterate_var_model """ import numpy as np from scipy.special import binom from scipy import linalg as la from scipy import ndimage def adjust_lag2_corrcoef1(gamma_1, gamma_2): """ A simple adjustment of lag-2 temporal autocorrelation coefficient to ensure that the resulting AR(2) process is stationary when the parameters are estimated from the Yule-Walker equations. Parameters ---------- gamma_1: float Lag-1 temporal autocorrelation coeffient. gamma_2: float Lag-2 temporal autocorrelation coeffient. Returns ------- out: float The adjusted lag-2 correlation coefficient. """ gamma_2 = np.maximum(gamma_2, 2 * gamma_1 * gamma_1 - 1 + 1e-10) gamma_2 = np.minimum(gamma_2, 1 - 1e-10) return gamma_2 def adjust_lag2_corrcoef2(gamma_1, gamma_2): """ A more advanced adjustment of lag-2 temporal autocorrelation coefficient to ensure that the resulting AR(2) process is stationary when the parameters are estimated from the Yule-Walker equations. Parameters ---------- gamma_1: float Lag-1 temporal autocorrelation coeffient. gamma_2: float Lag-2 temporal autocorrelation coeffient. Returns ------- out: float The adjusted lag-2 correlation coefficient. """ gamma_2 = np.maximum(gamma_2, 2 * gamma_1 * gamma_2 - 1) gamma_2 = np.maximum( gamma_2, (3 * gamma_1**2 - 2 + 2 * (1 - gamma_1**2) ** 1.5) / gamma_1**2 ) return gamma_2 def ar_acf(gamma, n=None): """ Compute theoretical autocorrelation function (ACF) from the AR(p) model with lag-l, l=1,2,...,p temporal autocorrelation coefficients. Parameters ---------- gamma: array-like Array of length p containing the lag-l, l=1,2,...p, temporal autocorrelation coefficients. The correlation coefficients are assumed to be in ascending order with respect to time lag. n: int Desired length of ACF array. Must be greater than len(gamma). Returns ------- out: array-like Array containing the ACF values. """ ar_order = len(gamma) if n == ar_order or n is None: return gamma elif n < ar_order: raise ValueError( "n=%i, but must be larger than the order of the AR process %i" % (n, ar_order) ) phi = estimate_ar_params_yw(gamma)[:-1] acf = gamma.copy() for t in range(0, n - ar_order): # Retrieve gammas (in reverse order) gammas = acf[t : t + ar_order][::-1] # Compute next gamma gamma_ = np.sum(gammas * phi) acf.append(gamma_) return acf def estimate_ar_params_ols( x, p, d=0, check_stationarity=True, include_constant_term=False, h=0, lam=0.0 ): r""" Estimate the parameters of an autoregressive AR(p) model :math:`x_{k+1}=c+\phi_1 x_k+\phi_2 x_{k-1}+\dots+\phi_p x_{k-p}+\phi_{p+1}\epsilon` by using ordinary least squares (OLS). If :math:`d\geq 1`, the parameters are estimated for a d times differenced time series that is integrated back to the original one by summation of the differences. Parameters ---------- x: array_like Array of shape (n,...) containing a time series of length n=p+d+h+1. The remaining dimensions are flattened. The rows and columns of x represent time steps and samples, respectively. p: int The order of the model. d: {0,1} The order of differencing to apply to the time series. check_stationarity: bool Check the stationarity of the estimated model. include_constant_term: bool Include the constant term :math:`c` to the model. h: int If h>0, the fitting is done by using a history of length h in addition to the minimal required number of time steps n=p+d+1. lam: float If lam>0, the regression is regularized by adding a penalty term (i.e. ridge regression). Returns ------- out: list The estimated parameter matrices :math:`\mathbf{\Phi}_1,\mathbf{\Phi}_2, \dots,\mathbf{\Phi}_{p+1}`. If include_constant_term is True, the constant term :math:`c` is added to the beginning of the list. Notes ----- Estimation of the innovation term parameter :math:`\phi_{p+1}` is currently implemented for p<=2. If p > 2, :math:`\phi_{p+1}` is set to zero. """ n = x.shape[0] if n != p + d + h + 1: raise ValueError( "n = %d, p = %d, d = %d, h = %d, but n = p+d+h+1 = %d required" % (n, p, d, h, p + d + h + 1) ) if len(x.shape) > 1: x = x.reshape((n, np.prod(x.shape[1:]))) if d not in [0, 1]: raise ValueError("d = %d, but 0 or 1 required" % d) if d == 1: x = np.diff(x, axis=0) n -= d x_lhs = x[p:, :] Z = [] for i in range(x.shape[1]): for j in range(p - 1, n - 1 - h): z_ = np.hstack([x[j - k, i] for k in range(p)]) if include_constant_term: z_ = np.hstack([[1], z_]) Z.append(z_) Z = np.column_stack(Z) b = np.dot( np.dot(x_lhs, Z.T), np.linalg.inv(np.dot(Z, Z.T) + lam * np.eye(Z.shape[0])) ) b = b.flatten() if include_constant_term: c = b[0] phi = list(b[1:]) else: phi = list(b) if p == 1: phi_pert = np.sqrt(1.0 - phi[0] * phi[0]) elif p == 2: phi_pert = np.sqrt( (1.0 + phi[1]) * ((1.0 - phi[1]) ** 2.0 - phi[0] ** 2.0) / (1.0 - phi[1]) ) else: phi_pert = 0.0 if check_stationarity: if not test_ar_stationarity(phi): raise RuntimeError( "Error in estimate_ar_params_yw: " "nonstationary AR(p) process" ) if d == 1: phi_out = _compute_differenced_model_params(phi, p, 1, 1) else: phi_out = phi phi_out.append(phi_pert) if include_constant_term: phi_out.insert(0, c) return phi_out def estimate_ar_params_ols_localized( x, p, window_radius, d=0, include_constant_term=False, h=0, lam=0.0, window="gaussian", ): r""" Estimate the parameters of a localized AR(p) model :math:`x_{k+1,i}=c_i+\phi_{1,i}x_{k,i}+\phi_{2,i}x_{k-1,i}+\dots+\phi_{p,i}x_{k-p,i}+\phi_{p+1,i}\epsilon` by using ordinary least squares (OLS), where :math:`i` denote spatial coordinates with arbitrary dimension. If :math:`d\geq 1`, the parameters are estimated for a d times differenced time series that is integrated back to the original one by summation of the differences. Parameters ---------- x: array_like Array of shape (n,...) containing a time series of length n=p+d+h+1. The remaining dimensions are flattened. The rows and columns of x represent time steps and samples, respectively. p: int The order of the model. window_radius: float Radius of the moving window. If window is 'gaussian', window_radius is the standard deviation of the Gaussian filter. If window is 'uniform', the size of the window is 2*window_radius+1. d: {0,1} The order of differencing to apply to the time series. include_constant_term: bool Include the constant term :math:`c_i` to the model. h: int If h>0, the fitting is done by using a history of length h in addition to the minimal required number of time steps n=p+d+1. lam: float If lam>0, the regression is regularized by adding a penalty term (i.e. ridge regression). window: {"gaussian", "uniform"} The weight function to use for the moving window. Applicable if window_radius < np.inf. Defaults to 'gaussian'. Returns ------- out: list List of length p+1 containing the AR(p) parameter fields for for the lag-p terms and the innovation term. The parameter fields have the same shape as the elements of gamma. Nan values are assigned, where the sample size for estimating the parameters is too small. If include_constant_term is True, the constant term :math:`c_i` is added to the beginning of the list. Notes ----- Estimation of the innovation term parameter :math:`\phi_{p+1}` is currently implemented for p<=2. If p > 2, :math:`\phi_{p+1}` is set to a zero array. """ n = x.shape[0] if n != p + d + h + 1: raise ValueError( "n = %d, p = %d, d = %d, h = %d, but n = p+d+h+1 = %d required" % (n, p, d, h, p + d + h + 1) ) if d == 1: x = np.diff(x, axis=0) n -= d if window == "gaussian": convol_filter = ndimage.gaussian_filter else: convol_filter = ndimage.uniform_filter if window == "uniform": window_size = 2 * window_radius + 1 else: window_size = window_radius XZ = np.zeros(np.hstack([[p], x.shape[1:]])) for i in range(p): for j in range(h + 1): tmp = convol_filter( x[p + j, :] * x[p - 1 - i + j, :], window_size, mode="constant" ) XZ[i, :] += tmp if include_constant_term: v = 0.0 for i in range(h + 1): v += convol_filter(x[p + i, :], window_size, mode="constant") XZ = np.vstack([v[np.newaxis, :], XZ]) if not include_constant_term: Z2 = np.zeros(np.hstack([[p, p], x.shape[1:]])) for i in range(p): for j in range(p): for k in range(h + 1): tmp = convol_filter( x[p - 1 - i + k, :] * x[p - 1 - j + k, :], window_size, mode="constant", ) Z2[i, j, :] += tmp else: Z2 = np.zeros(np.hstack([[p + 1, p + 1], x.shape[1:]])) Z2[0, 0, :] = convol_filter(np.ones(x.shape[1:]), window_size, mode="constant") for i in range(p): for j in range(h + 1): tmp = convol_filter(x[p - 1 - i + j, :], window_size, mode="constant") Z2[0, i + 1, :] += tmp Z2[i + 1, 0, :] += tmp for i in range(p): for j in range(p): for k in range(h + 1): tmp = convol_filter( x[p - 1 - i + k, :] * x[p - 1 - j + k, :], window_size, mode="constant", ) Z2[i + 1, j + 1, :] += tmp m = np.prod(x.shape[1:]) phi = np.empty(np.hstack([[p], m])) if include_constant_term: c = np.empty(m) XZ = XZ.reshape(np.hstack([[XZ.shape[0]], m])) Z2 = Z2.reshape(np.hstack([[Z2.shape[0], Z2.shape[1]], m])) for i in range(m): try: b = np.dot(XZ[:, i], np.linalg.inv(Z2[:, :, i] + lam * np.eye(Z2.shape[0]))) if not include_constant_term: phi[:, i] = b else: phi[:, i] = b[1:] c[i] = b[0] except np.linalg.LinAlgError: phi[:, i] = np.nan if include_constant_term: c[i] = np.nan if p == 1: phi_pert = np.sqrt(1.0 - phi[0, :] * phi[0, :]) elif p == 2: phi_pert = np.sqrt( (1.0 + phi[1, :]) * ((1.0 - phi[1, :]) ** 2.0 - phi[0, :] ** 2.0) / (1.0 - phi[1, :]) ) else: phi_pert = np.zeros(m) phi = list(phi.reshape(np.hstack([[phi.shape[0]], x.shape[1:]]))) if d == 1: phi = _compute_differenced_model_params(phi, p, 1, 1) phi.append(phi_pert.reshape(x.shape[1:])) if include_constant_term: phi.insert(0, c.reshape(x.shape[1:])) return phi def estimate_ar_params_yw(gamma, d=0, check_stationarity=True): r""" Estimate the parameters of an AR(p) model :math:`x_{k+1}=\phi_1 x_k+\phi_2 x_{k-1}+\dots+\phi_p x_{k-p}+\phi_{p+1}\epsilon` from the Yule-Walker equations using the given set of autocorrelation coefficients. Parameters ---------- gamma: array_like Array of length p containing the lag-l temporal autocorrelation coefficients for l=1,2,...p. The correlation coefficients are assumed to be in ascending order with respect to time lag. d: {0,1} The order of differencing. If d=1, the correlation coefficients gamma are assumed to be computed from the differenced time series, which is also done for the resulting parameter estimates. check_stationarity: bool If True, the stationarity of the resulting VAR(p) process is tested. An exception is thrown if the process is not stationary. Returns ------- out: ndarray Array of length p+1 containing the AR(p) parameters for for the lag-p terms and the innovation term. Notes ----- To estimate the parameters of an integrated ARI(p,d) model, compute the correlation coefficients gamma by calling :py:func:`pysteps.timeseries.correlation.temporal_autocorrelation` with d>0. """ if d not in [0, 1]: raise ValueError("d = %d, but 0 or 1 required" % d) p = len(gamma) g = np.hstack([[1.0], gamma]) G = [] for j in range(p): G.append(np.roll(g[:-1], j)) G = np.array(G) phi = np.linalg.solve(G, g[1:].flatten()) # Check that the absolute values of the roots of the characteristic # polynomial are less than one. # Otherwise the AR(p) model is not stationary. if check_stationarity: if not test_ar_stationarity(phi): raise RuntimeError( "Error in estimate_ar_params_yw: " "nonstationary AR(p) process" ) c = 1.0 for j in range(p): c -= gamma[j] * phi[j] phi_pert = np.sqrt(c) # If the expression inside the square root is negative, phi_pert cannot # be computed and it is set to zero instead. if not np.isfinite(phi_pert): phi_pert = 0.0 if d == 1: phi = _compute_differenced_model_params(phi, p, 1, 1) phi_out = np.empty(len(phi) + 1) phi_out[: len(phi)] = phi phi_out[-1] = phi_pert return phi_out def estimate_ar_params_yw_localized(gamma, d=0): r""" Estimate the parameters of a localized AR(p) model :math:`x_{k+1,i}=\phi_{1,i}x_{k,i}+\phi_{2,i}x_{k-1,i}+\dots+\phi_{p,i}x_{k-p,i}+\phi_{p+1}\epsilon` from the Yule-Walker equations using the given set of autocorrelation coefficients :math`\gamma_{l,i}`, where :math`l` denotes time lag and :math:`i` denote spatial coordinates with arbitrary dimension. Parameters ---------- gamma: array_like A list containing the lag-l temporal autocorrelation coefficient fields for l=1,2,...p. The correlation coefficients are assumed to be in ascending order with respect to time lag. d: {0,1} The order of differencing. If d=1, the correlation coefficients gamma are assumed to be computed from the differenced time series, which is also done for the resulting parameter estimates. Returns ------- out: list List of length p+1 containing the AR(p) parameter fields for for the lag-p terms and the innovation term. The parameter fields have the same shape as the elements of gamma. Notes ----- To estimate the parameters of an integrated ARI(p,d) model, compute the correlation coefficients gamma by calling :py:func:`pysteps.timeseries.correlation.temporal_autocorrelation` with d>0 and window_radius0, the fitting is done by using a history of length h in addition to the minimal required number of time steps n=p+d+1. lam: float If lam>0, the regression is regularized by adding a penalty term (i.e. ridge regression). Returns ------- out: list The estimated parameter matrices :math:`\mathbf{\Phi}_1,\mathbf{\Phi}_2, \dots,\mathbf{\Phi}_{p+1}`. If include_constant_term is True, the constant term :math:`\mathbf{c}` is added to the beginning of the list. Notes ----- Estimation of the innovation parameter :math:`\mathbf{\Phi}_{p+1}` is not currently implemented, and it is set to a zero matrix. """ q = x.shape[1] n = x.shape[0] if n != p + d + h + 1: raise ValueError( "n = %d, p = %d, d = %d, h = %d, but n = p+d+h+1 = %d required" % (n, p, d, h, p + d + h + 1) ) if d not in [0, 1]: raise ValueError("d = %d, but 0 or 1 required" % d) if d == 1: x = np.diff(x, axis=0) n -= d x = x.reshape((n, q, np.prod(x.shape[2:]))) X = [] for i in range(x.shape[2]): for j in range(p + h, n): x_ = x[j, :, i] X.append(x_.reshape((q, 1))) X = np.hstack(X) Z = [] for i in range(x.shape[2]): for j in range(p - 1, n - 1 - h): z_ = np.vstack([x[j - k, :, i].reshape((q, 1)) for k in range(p)]) if include_constant_term: z_ = np.vstack([[1], z_]) Z.append(z_) Z = np.column_stack(Z) B = np.dot(np.dot(X, Z.T), np.linalg.inv(np.dot(Z, Z.T) + lam * np.eye(Z.shape[0]))) phi = [] if include_constant_term: c = B[:, 0] for i in range(p): phi.append(B[:, i * q + 1 : (i + 1) * q + 1]) else: for i in range(p): phi.append(B[:, i * q : (i + 1) * q]) if check_stationarity: M = np.zeros((p * q, p * q)) for i in range(p): M[0:q, i * q : (i + 1) * q] = phi[i] for i in range(1, p): M[i * q : (i + 1) * q, (i - 1) * q : i * q] = np.eye(q, q) r, v = np.linalg.eig(M) if np.any(np.abs(r) > 0.999): raise RuntimeError( "Error in estimate_var_params_ols: " "nonstationary VAR(p) process" ) if d == 1: phi = _compute_differenced_model_params(phi, p, q, 1) if include_constant_term: phi.insert(0, c) phi.append(np.zeros((q, q))) return phi def estimate_var_params_ols_localized( x, p, window_radius, d=0, include_constant_term=False, h=0, lam=0.0, window="gaussian", ): r""" Estimate the parameters of a vector autoregressive VAR(p) model :math:`\mathbf{x}_{k+1,i}=\mathbf{c}_i+\mathbf{\Phi}_{1,i}\mathbf{x}_{k,i}+ \mathbf{\Phi}_{2,i}\mathbf{x}_{k-1,i}+\dots+\mathbf{\Phi}_{p,i} \mathbf{x}_{k-p,i}+\mathbf{\Phi}_{p+1,i}\mathbf{\epsilon}` by using ordinary least squares (OLS), where :math:`i` denote spatial coordinates with arbitrary dimension. If :math:`d\geq 1`, the parameters are estimated for a d times differenced time series that is integrated back to the original one by summation of the differences. Parameters ---------- x: array_like Array of shape (n, q, :) containing a time series of length n=p+d+h+1 with q-dimensional variables. The remaining dimensions are flattened. The remaining dimensions starting from the third one represent the samples. p: int The order of the model. window_radius: float Radius of the moving window. If window is 'gaussian', window_radius is the standard deviation of the Gaussian filter. If window is 'uniform', the size of the window is 2*window_radius+1. d: {0,1} The order of differencing to apply to the time series. include_constant_term: bool Include the constant term :math:`\mathbf{c}` to the model. h: int If h>0, the fitting is done by using a history of length h in addition to the minimal required number of time steps n=p+d+1. lam: float If lam>0, the regression is regularized by adding a penalty term (i.e. ridge regression). window: {"gaussian", "uniform"} The weight function to use for the moving window. Applicable if window_radius < np.inf. Defaults to 'gaussian'. Returns ------- out: list The estimated parameter matrices :math:`\mathbf{\Phi}_{1,i}, \mathbf{\Phi}_{2,i},\dots,\mathbf{\Phi}_{p+1,i}`. If include_constant_term is True, the constant term :math:`\mathbf{c}_i` is added to the beginning of the list. Each element of the list is a matrix of shape (x.shape[2:], q, q). Notes ----- Estimation of the innovation parameter :math:`\mathbf{\Phi}_{p+1}` is not currently implemented, and it is set to a zero matrix. """ q = x.shape[1] n = x.shape[0] if n != p + d + h + 1: raise ValueError( "n = %d, p = %d, d = %d, h = %d, but n = p+d+h+1 = %d required" % (n, p, d, h, p + d + h + 1) ) if d == 1: x = np.diff(x, axis=0) n -= d if window == "gaussian": convol_filter = ndimage.gaussian_filter else: convol_filter = ndimage.uniform_filter if window == "uniform": window_size = 2 * window_radius + 1 else: window_size = window_radius XZ = np.zeros(np.hstack([[q, p * q], x.shape[2:]])) for i in range(q): for k in range(p): for j in range(q): for l in range(h + 1): tmp = convol_filter( x[p + l, i, :] * x[p - 1 - k + l, j, :], window_size, mode="constant", ) XZ[i, k * q + j, :] += tmp if include_constant_term: v = np.zeros(np.hstack([[q], x.shape[2:]])) for i in range(q): for j in range(h + 1): v[i, :] += convol_filter(x[p + j, i, :], window_size, mode="constant") XZ = np.hstack([v[:, np.newaxis, :], XZ]) if not include_constant_term: Z2 = np.zeros(np.hstack([[p * q, p * q], x.shape[2:]])) for i in range(p): for j in range(q): for k in range(p): for l in range(q): for m in range(h + 1): tmp = convol_filter( x[p - 1 - i + m, j, :] * x[p - 1 - k + m, l, :], window_size, mode="constant", ) Z2[i * q + j, k * q + l, :] += tmp else: Z2 = np.zeros(np.hstack([[p * q + 1, p * q + 1], x.shape[2:]])) Z2[0, 0, :] = convol_filter(np.ones(x.shape[2:]), window_size, mode="constant") for i in range(p): for j in range(q): for k in range(h + 1): tmp = convol_filter( x[p - 1 - i + k, j, :], window_size, mode="constant" ) Z2[0, i * q + j + 1, :] += tmp Z2[i * q + j + 1, 0, :] += tmp for i in range(p): for j in range(q): for k in range(p): for l in range(q): for m in range(h + 1): tmp = convol_filter( x[p - 1 - i + m, j, :] * x[p - 1 - k + m, l, :], window_size, mode="constant", ) Z2[i * q + j + 1, k * q + l + 1, :] += tmp m = np.prod(x.shape[2:]) if include_constant_term: c = np.empty((m, q)) XZ = XZ.reshape((XZ.shape[0], XZ.shape[1], m)) Z2 = Z2.reshape((Z2.shape[0], Z2.shape[1], m)) phi = np.empty((p, m, q, q)) for i in range(m): try: B = np.dot( XZ[:, :, i], np.linalg.inv(Z2[:, :, i] + lam * np.eye(Z2.shape[0])) ) for k in range(p): if not include_constant_term: phi[k, i, :, :] = B[:, k * q : (k + 1) * q] else: phi[k, i, :, :] = B[:, k * q + 1 : (k + 1) * q + 1] if include_constant_term: c[i, :] = B[:, 0] except np.linalg.LinAlgError: phi[:, i, :, :] = np.nan if include_constant_term: c[i, :] = np.nan phi_out = [ phi[i].reshape(np.hstack([x.shape[2:], [q, q]])) for i in range(len(phi)) ] if d == 1: phi_out = _compute_differenced_model_params(phi_out, p, q, 1) phi_out.append(np.zeros(phi_out[0].shape)) if include_constant_term: phi_out.insert(0, c.reshape(np.hstack([x.shape[2:], [q]]))) return phi_out def estimate_var_params_yw(gamma, d=0, check_stationarity=True): r""" Estimate the parameters of a VAR(p) model :math:`\mathbf{x}_{k+1}=\mathbf{\Phi}_1\mathbf{x}_k+ \mathbf{\Phi}_2\mathbf{x}_{k-1}+\dots+\mathbf{\Phi}_p\mathbf{x}_{k-p}+ \mathbf{\Phi}_{p+1}\mathbf{\epsilon}` from the Yule-Walker equations using the given correlation matrices :math:`\mathbf{\Gamma}_0,\mathbf{\Gamma}_1,\dots,\mathbf{\Gamma}_n`, where n=p. Parameters ---------- gamma: list List of correlation matrices :math:`\mathbf{\Gamma}_0,\mathbf{\Gamma}_1,\dots,\mathbf{\Gamma}_n`. To obtain these matrices, use :py:func:`pysteps.timeseries.correlation.temporal_autocorrelation_multivariate` with window_radius=np.inf. d: {0,1} The order of differencing. If d=1, the correlation coefficients gamma are assumed to be computed from the differenced time series, which is also done for the resulting parameter estimates. check_stationarity: bool If True, the stationarity of the resulting VAR(p) process is tested. An exception is thrown if the process is not stationary. Returns ------- out: list List of VAR(p) coefficient matrices :math:`\mathbf{\Phi}_1, \mathbf{\Phi}_2,\dots\mathbf{\Phi}_{p+1}`, where the last matrix corresponds to the innovation term. Notes ----- To estimate the parameters of an integrated VARI(p,d) model, compute the correlation coefficients gamma by calling :py:func:`pysteps.timeseries.correlation.temporal_autocorrelation_multivariate` with d>0. Estimation of the innovation parameter :math:`\mathbf{\Phi}_{p+1}` is not currently implemented, and it is set to a zero matrix. """ p = len(gamma) - 1 q = gamma[0].shape[0] for i in range(len(gamma)): if gamma[i].shape[0] != q or gamma[i].shape[1] != q: raise ValueError( "dimension mismatch: gamma[%d].shape=%s, but (%d,%d) expected" % (i, str(gamma[i].shape), q, q) ) if d not in [0, 1]: raise ValueError("d = %d, but 0 or 1 required" % d) a = np.empty((p * q, p * q)) for i in range(p): for j in range(p): a_tmp = gamma[abs(i - j)] if i > j: a_tmp = a_tmp.T a[i * q : (i + 1) * q, j * q : (j + 1) * q] = a_tmp b = np.vstack([gamma[i].T for i in range(1, p + 1)]) x = np.linalg.solve(a, b) phi = [] for i in range(p): phi.append(x[i * q : (i + 1) * q, :]) if check_stationarity: if not test_var_stationarity(phi): raise RuntimeError( "Error in estimate_var_params_yw: " "nonstationary VAR(p) process" ) if d == 1: phi = _compute_differenced_model_params(phi, p, q, 1) phi.append(np.zeros(phi[0].shape)) return phi def estimate_var_params_yw_localized(gamma, d=0): r""" Estimate the parameters of a vector autoregressive VAR(p) model :math:`\mathbf{x}_{k+1,i}=\mathbf{\Phi}_{1,i}\mathbf{x}_{k,i}+ \mathbf{\Phi}_{2,i}\mathbf{x}_{k-1,i}+\dots+\mathbf{\Phi}_{p,i} \mathbf{x}_{k-p,i}+\mathbf{\Phi}_{p+1,i}\mathbf{\epsilon}` from the Yule-Walker equations by using the given correlation matrices, where :math:`i` denote spatial coordinates with arbitrary dimension. Parameters ---------- gamma: list List of correlation matrices :math:`\mathbf{\Gamma}_0,\mathbf{\Gamma}_1,\dots,\mathbf{\Gamma}_n`. To obtain these matrices, use :py:func:`pysteps.timeseries.correlation.temporal_autocorrelation_multivariate` with window_radius0 and window_radius j: a_tmp = a_tmp.T a[i * q : (i + 1) * q, j * q : (j + 1) * q] = a_tmp b = np.vstack([gamma_1d[i][k, :].T for i in range(1, p + 1)]) x = np.linalg.solve(a, b) for i in range(p): phi_out[i][k, :, :] = x[i * q : (i + 1) * q, :] for i in range(len(phi_out)): phi_out[i] = phi_out[i].reshape(np.hstack([gamma[0].shape[:-2], [q, q]])) if d == 1: phi_out = _compute_differenced_model_params(phi_out, p, 1, 1) phi_out.append(np.zeros(gamma[0].shape)) return phi_out def iterate_ar_model(x, phi, eps=None): r"""Apply an AR(p) model :math:`x_{k+1}=\phi_1 x_k+\phi_2 x_{k-1}+\dots+\phi_p x_{k-p}+\phi_{p+1}\epsilon` to a time series :math:`x_k`. Parameters ---------- x: array_like Array of shape (n,...), n>=p, containing a time series of a input variable x. The elements of x along the first dimension are assumed to be in ascending order by time, and the time intervals are assumed to be regular. phi: list List or array of length p+1 specifying the parameters of the AR(p) model. The parameters are in ascending order by increasing time lag, and the last element is the parameter corresponding to the innovation term eps. eps: array_like Optional innovation term for the AR(p) process. The shape of eps is expected to be a scalar or x.shape[1:] if len(x.shape)>1. If eps is None, the innovation term is not added. """ if x.shape[0] < len(phi) - 1: raise ValueError( "dimension mismatch between x and phi: x.shape[0]=%d, len(phi)=%d" % (x.shape[0], len(phi)) ) if len(x.shape) == 1: x_simple_shape = True x = x[:, np.newaxis] else: x_simple_shape = False if eps is not None and eps.shape != x.shape[1:]: raise ValueError( "dimension mismatch between x and eps: x[1:].shape=%s, eps.shape=%s" % (str(x[1:].shape), str(eps.shape)) ) x_new = 0.0 p = len(phi) - 1 for i in range(p): x_new += phi[i] * x[-(i + 1), :] if eps is not None: x_new += phi[-1] * eps if x_simple_shape: return np.hstack([x[1:], [x_new]]) else: return np.concatenate([x[1:, :], x_new[np.newaxis, :]]) def iterate_var_model(x, phi, eps=None): r"""Apply a VAR(p) model :math:`\mathbf{x}_{k+1}=\mathbf{\Phi}_1\mathbf{x}_k+\mathbf{\Phi}_2 \mathbf{x}_{k-1}+\dots+\mathbf{\Phi}_p\mathbf{x}_{k-p}+ \mathbf{\Phi}_{p+1}\mathbf{\epsilon}` to a q-variate time series :math:`\mathbf{x}_k`. Parameters ---------- x: array_like Array of shape (n,q,...), n>=p, containing a q-variate time series of a input variable x. The elements of x along the first dimension are assumed to be in ascending order by time, and the time intervals are assumed to be regular. phi: list List of parameter matrices :math:`\mathbf{\Phi}_1,\mathbf{\Phi}_2,\dots, \mathbf{\Phi}_{p+1}`. eps: array_like Optional innovation term for the AR(p) process. The shape of eps is expected to be (x.shape[1],) or (x.shape[1],x.shape[2:]) if len(x.shape)>2. If eps is None, the innovation term is not added. """ if x.shape[0] < len(phi) - 1: raise ValueError( "dimension mismatch between x and phi: x.shape[0]=%d, len(phi)=%d" % (x.shape[1], len(phi)) ) phi_shape = phi[0].shape if phi_shape[-1] != phi_shape[-2]: raise ValueError( "phi[0].shape = %s, but the last two dimensions are expected to be equal" % str(phi_shape) ) for i in range(1, len(phi)): if phi[i].shape != phi_shape: raise ValueError("dimension mismatch between parameter matrices phi") if len(x.shape) == 2: x_simple_shape = True x = x[:, :, np.newaxis] else: x_simple_shape = False x_new = np.zeros(x.shape[1:]) p = len(phi) - 1 for l in range(p): x_new += np.einsum("...ij,j...->i...", phi[l], x[-(l + 1), :]) if eps is not None: x_new += np.dot(np.dot(phi[-1], phi[-1]), eps) if x_simple_shape: return np.vstack([x[1:, :, 0], x_new[:, 0]]) else: x_new = x_new.reshape(x.shape[1:]) return np.concatenate([x[1:, :], x_new[np.newaxis, :, :]], axis=0) def test_ar_stationarity(phi): r""" Test stationarity of an AR(p) process. That is, test that the roots of the equation :math:`x^p-\phi_1*x^{p-1}-\dots-\phi_p` lie inside the unit circle. Parameters ---------- phi: list List of AR(p) parameters :math:`\phi_1,\phi_2,\dots,\phi_p`. Returns ------- out: bool True/False if the process is/is not stationary. """ r = np.array( [ np.abs(r_) for r_ in np.roots([1.0 if i == 0 else -phi[i] for i in range(len(phi))]) ] ) return False if np.any(r >= 1) else True def test_var_stationarity(phi): r""" Test stationarity of an AR(p) process. That is, test that the moduli of the eigenvalues of the companion matrix lie inside the unit circle. Parameters ---------- phi: list List of VAR(p) parameter matrices :math:`\mathbf{\Phi}_1,\mathbf{\Phi}_2, \dots,\mathbf{\Phi}_p`. Returns ------- out: bool True/False if the process is/is not stationary. """ q = phi[0].shape for i in range(1, len(phi)): if phi[i].shape != q: raise ValueError("dimension mismatch between parameter matrices phi") p = len(phi) q = phi[0].shape[0] M = np.zeros((p * q, p * q)) for i in range(p): M[0:q, i * q : (i + 1) * q] = phi[i] for i in range(1, p): M[i * q : (i + 1) * q, (i - 1) * q : i * q] = np.eye(q, q) r = np.linalg.eig(M)[0] return False if np.any(np.abs(r) >= 1) else True def _compute_differenced_model_params(phi, p, q, d): phi_out = [] for i in range(p + d): if q > 1: if len(phi[0].shape) == 2: phi_out.append(np.zeros((q, q))) else: phi_out.append(np.zeros(phi[0].shape)) else: phi_out.append(0.0) for i in range(1, d + 1): if q > 1: phi_out[i - 1] -= binom(d, i) * (-1) ** i * np.eye(q) else: phi_out[i - 1] -= binom(d, i) * (-1) ** i for i in range(1, p + 1): phi_out[i - 1] += phi[i - 1] for i in range(1, p + 1): for j in range(1, d + 1): phi_out[i + j - 1] += phi[i - 1] * binom(d, j) * (-1) ** j return phi_out ================================================ FILE: pysteps/timeseries/correlation.py ================================================ # -*- coding: utf-8 -*- r""" pysteps.timeseries.correlation ============================== Methods for computing spatial and temporal correlation of time series of two-dimensional fields. .. autosummary:: :toctree: ../generated/ temporal_autocorrelation temporal_autocorrelation_multivariate """ import numpy as np from scipy import ndimage from pysteps.utils import spectral def temporal_autocorrelation( x, d=0, domain="spatial", x_shape=None, mask=None, use_full_fft=False, window="gaussian", window_radius=np.inf, ): r""" Compute lag-l temporal autocorrelation coefficients :math:`\gamma_l=\mbox{corr}(x(t),x(t-l))`, :math:`l=1,2,\dots,n-1`, from a time series :math:`x_1,x_2,\dots,x_n`. If a multivariate time series is given, each element of :math:`x_i` is treated as one sample from the process generating the time series. Use :py:func:`temporal_autocorrelation_multivariate` if cross-correlations between different elements of the time series are desired. Parameters ---------- x: array_like Array of shape (n, ...), where each row contains one sample from the time series :math:`x_i`. The inputs are assumed to be in increasing order with respect to time, and the time step is assumed to be regular. All inputs are required to have finite values. The remaining dimensions after the first one are flattened before computing the correlation coefficients. d: {0,1} The order of differencing. If d=1, the input time series is differenced before computing the correlation coefficients. In this case, a time series of length n+1 is needed for computing the n-1 coefficients. domain: {"spatial", "spectral"} The domain of the time series x. If domain is "spectral", the elements of x are assumed to represent the FFTs of the original elements. x_shape: tuple The shape of the original arrays in the spatial domain before applying the FFT. Required if domain is "spectral". mask: array_like Optional mask to use for computing the correlation coefficients. Input elements with mask==False are excluded from the computations. The shape of the mask is expected to be x.shape[1:]. Applicable if domain is "spatial". use_full_fft: bool If True, x represents the full FFTs of the original arrays. Otherwise, the elements of x are assumed to contain only the symmetric part, i.e. in the format returned by numpy.fft.rfft2. Applicable if domain is 'spectral'. Defaults to False. window: {"gaussian", "uniform"} The weight function to use for the moving window. Applicable if window_radius < np.inf. Defaults to 'gaussian'. window_radius: float If window_radius < np.inf, the correlation coefficients are computed in a moving window. Defaults to np.inf (i.e. the coefficients are computed over the whole domain). If window is 'gaussian', window_radius is the standard deviation of the Gaussian filter. If window is 'uniform', the size of the window is 2*window_radius+1. Returns ------- out: list List of length n-1 containing the temporal autocorrelation coefficients :math:`\gamma_i` for time lags :math:`l=1,2,...,n-1`. If window_radius= 2") if len(x.shape) != 3 and domain == "spectral": raise NotImplementedError( "len(x.shape[1:]) = %d, but with domain == 'spectral', this function has only been implemented for two-dimensional fields" % len(x.shape[1:]) ) if mask is not None and mask.shape != x.shape[1:]: raise ValueError( "dimension mismatch between x and mask: x.shape[1:]=%s, mask.shape=%s" % (str(x.shape[1:]), str(mask.shape)) ) if np.any(~np.isfinite(x)): raise ValueError("x contains non-finite values") if d == 1: x = np.diff(x, axis=0) if domain == "spatial" and mask is None: mask = np.ones(x.shape[1:], dtype=bool) gamma = [] for k in range(x.shape[0] - 1): if domain == "spatial": if window_radius == np.inf: cc = np.corrcoef(x[-1, :][mask], x[-(k + 2), :][mask])[0, 1] else: cc = _moving_window_corrcoef( x[-1, :], x[-(k + 2), :], window_radius, mask=mask ) else: cc = spectral.corrcoef( x[-1, :, :], x[-(k + 2), :], x_shape, use_full_fft=use_full_fft ) gamma.append(cc) return gamma def temporal_autocorrelation_multivariate( x, d=0, mask=None, window="gaussian", window_radius=np.inf ): r""" For a :math:`q`-variate time series :math:`\mathbf{x}_1,\mathbf{x}_2,\dots,\mathbf{x}_n`, compute the lag-l correlation matrices :math:`\mathbf{\Gamma}_l`, where :math:`\Gamma_{l,i,j}=\gamma_{l,i,j}` and :math:`\gamma_{l,i,j}=\mbox{corr}(x_i(t),x_j(t-l))` for :math:`i,j=1,2,\dots,q` and :math:`l=0,1,\dots,n-1`. Parameters ---------- x: array_like Array of shape (n, q, ...) containing the time series :math:`\mathbf{x}_i`. The inputs are assumed to be in increasing order with respect to time, and the time step is assumed to be regular. All inputs are required to have finite values. The remaining dimensions after the second one are flattened before computing the correlation coefficients. d: {0,1} The order of differencing. If d=1, the input time series is differenced before computing the correlation coefficients. In this case, a time series of length n+1 is needed for computing the n-1 coefficients. mask: array_like Optional mask to use for computing the correlation coefficients. Input elements with mask==False are excluded from the computations. The shape of the mask is expected to be x.shape[2:]. window: {"gaussian", "uniform"} The weight function to use for the moving window. Applicable if window_radius < np.inf. Defaults to 'gaussian'. window_radius: float If window_radius < np.inf, the correlation coefficients are computed in a moving window. Defaults to np.inf (i.e. the correlations are computed over the whole domain). If window is 'gaussian', window_radius is the standard deviation of the Gaussian filter. If window is 'uniform', the size of the window is 2*window_radius+1. Returns ------- out: list List of correlation matrices :math:`\Gamma_0,\Gamma_1,\dots,\Gamma_{n-1}` of shape (q,q). If window_radius= 3") if mask is not None and mask.shape != x.shape[2:]: raise ValueError( "dimension mismatch between x and mask: x.shape[2:]=%s, mask.shape=%s" % (str(x.shape[2:]), str(mask.shape)) ) if np.any(~np.isfinite(x)): raise ValueError("x contains non-finite values") if d == 1: x = np.diff(x, axis=0) p = x.shape[0] - 1 q = x.shape[1] gamma = [] for k in range(p + 1): if window_radius == np.inf: gamma_k = np.empty((q, q)) else: gamma_k = np.empty(np.hstack([x.shape[2:], [q, q]])) for i in range(q): x_i = x[-1, i, :] for j in range(q): x_j = x[-(k + 1), j, :] if window_radius == np.inf: gamma_k[i, j] = np.corrcoef(x_i.flatten(), x_j.flatten())[0, 1] else: gamma_k[..., i, j] = _moving_window_corrcoef( x_i, x_j, window_radius, window=window, mask=mask ) gamma.append(gamma_k) return gamma def _moving_window_corrcoef(x, y, window_radius, window="gaussian", mask=None): if window not in ["gaussian", "uniform"]: raise ValueError( "unknown window type %s, the available options are 'gaussian' and 'uniform'" % window ) if mask is None: mask = np.ones(x.shape) else: x = x.copy() x[~mask] = 0.0 y = y.copy() y[~mask] = 0.0 mask = mask.astype(float) if window == "gaussian": convol_filter = ndimage.gaussian_filter else: convol_filter = ndimage.uniform_filter if window == "uniform": window_size = 2 * window_radius + 1 else: window_size = window_radius n = convol_filter(mask, window_size, mode="constant") * window_size**2 sx = convol_filter(x, window_size, mode="constant") * window_size**2 sy = convol_filter(y, window_size, mode="constant") * window_size**2 ssx = convol_filter(x**2, window_size, mode="constant") * window_size**2 ssy = convol_filter(y**2, window_size, mode="constant") * window_size**2 sxy = convol_filter(x * y, window_size, mode="constant") * window_size**2 mux = sx / n muy = sy / n stdx = np.sqrt(ssx - 2 * mux * sx + n * mux**2) stdy = np.sqrt(ssy - 2 * muy * sy + n * muy**2) cov = sxy - muy * sx - mux * sy + n * mux * muy mask = np.logical_and(stdx > 1e-8, stdy > 1e-8) mask = np.logical_and(mask, stdx * stdy > 1e-8) mask = np.logical_and(mask, n >= 3) corr = np.empty(x.shape) corr[mask] = cov[mask] / (stdx[mask] * stdy[mask]) corr[~mask] = np.nan return corr ================================================ FILE: pysteps/tracking/__init__.py ================================================ # -*- coding: utf-8 -*- """Implementations of feature tracking methods.""" from pysteps.tracking.interface import get_method ================================================ FILE: pysteps/tracking/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.tracking.interface =========================== Interface to the tracking module. It returns a callable function for tracking features. .. autosummary:: :toctree: ../generated/ get_method """ from pysteps.tracking import lucaskanade from pysteps.tracking import tdating _tracking_methods = dict() _tracking_methods["lucaskanade"] = lucaskanade.track_features _tracking_methods["tdating"] = tdating.dating def get_method(name): """ Return a callable function for tracking features. Description: Return a callable function for tracking features on input images . Implemented methods: +-----------------+--------------------------------------------------------+ | Name | Description | +=================+========================================================+ | lucaskanade | Wrapper to the OpenCV implementation of the | | | Lucas-Kanade tracking algorithm | +-----------------+--------------------------------------------------------+ | tdating | Thunderstorm Detection and Tracking (DATing) module | +-----------------+--------------------------------------------------------+ """ if isinstance(name, str): name = name.lower() else: raise TypeError( "Only strings supported for the method's names.\n" + "Available names:" + str(list(_tracking_methods.keys())) ) from None try: return _tracking_methods[name] except KeyError: raise ValueError( "Unknown tracking method {}\n".format(name) + "The available methods are:" + str(list(_tracking_methods.keys())) ) from None ================================================ FILE: pysteps/tracking/lucaskanade.py ================================================ # -*- coding: utf-8 -*- """ pysteps.tracking.lucaskanade ============================ The Lucas-Kanade (LK) feature tracking module. This module implements the interface to the local `Lucas-Kanade`_ routine available in OpenCV_. .. _OpenCV: https://opencv.org/ .. _`Lucas-Kanade`:\ https://docs.opencv.org/3.4/dc/d6b/group__video__track.html#ga473e4b886d0bcc6b65831eb88ed93323 .. autosummary:: :toctree: ../generated/ track_features """ import numpy as np from numpy.ma.core import MaskedArray from pysteps.exceptions import MissingOptionalDependency try: import cv2 CV2_IMPORTED = True except ImportError: CV2_IMPORTED = False def track_features( prvs_image, next_image, points, winsize=(50, 50), nr_levels=3, criteria=(3, 10, 0), flags=0, min_eig_thr=1e-4, verbose=False, ): """ Interface to the OpenCV `Lucas-Kanade`_ feature tracking algorithm (cv.calcOpticalFlowPyrLK). .. _`Lucas-Kanade`:\ https://docs.opencv.org/3.4/dc/d6b/group__video__track.html#ga473e4b886d0bcc6b65831eb88ed93323 .. _calcOpticalFlowPyrLK:\ https://docs.opencv.org/3.4/dc/d6b/group__video__track.html#ga473e4b886d0bcc6b65831eb88ed93323 .. _MaskedArray:\ https://docs.scipy.org/doc/numpy/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- prvs_image: ndarray_ or MaskedArray_ Array of shape (m, n) containing the first image. Invalid values (Nans or infs) are replaced with the min value. next_image: ndarray_ or MaskedArray_ Array of shape (m, n) containing the successive image. Invalid values (Nans or infs) are replaced with the min value. points: array_like Array of shape (p, 2) indicating the pixel coordinates of the tracking points (corners). winsize: tuple of int, optional The **winSize** parameter in calcOpticalFlowPyrLK_. It represents the size of the search window that it is used at each pyramid level. The default is (50, 50). nr_levels: int, optional The **maxLevel** parameter in calcOpticalFlowPyrLK_. It represents the 0-based maximal pyramid level number. The default is 3. criteria: tuple of int, optional The **TermCriteria** parameter in calcOpticalFlowPyrLK_ , which specifies the termination criteria of the iterative search algorithm. The default is (3, 10, 0). flags: int, optional Operation flags, see documentation calcOpticalFlowPyrLK_. The default is 0. min_eig_thr: float, optional The **minEigThreshold** parameter in calcOpticalFlowPyrLK_. The default is 1e-4. verbose: bool, optional Print the number of vectors that have been found. The default is False. Returns ------- xy: ndarray_ Array of shape (d, 2) with the x- and y-coordinates of *d* <= *p* detected sparse motion vectors. uv: ndarray_ Array of shape (d, 2) with the u- and v-components of *d* <= *p* detected sparse motion vectors. Notes ----- The tracking points can be obtained with the :py:func:`pysteps.utils.images.ShiTomasi_detection` routine. See also -------- pysteps.motion.lucaskanade.dense_lucaskanade References ---------- Bouguet, J.-Y.: Pyramidal implementation of the affine Lucas Kanade feature tracker description of the algorithm, Intel Corp., 5, 4, 2001 Lucas, B. D. and Kanade, T.: An iterative image registration technique with an application to stereo vision, in: Proceedings of the 1981 DARPA Imaging Understanding Workshop, pp. 121–130, 1981. """ if not CV2_IMPORTED: raise MissingOptionalDependency( "opencv package is required for the calcOpticalFlowPyrLK() " "routine but it is not installed" ) prvs_img = prvs_image.copy() next_img = next_image.copy() p0 = np.copy(points) # Check if a MaskedArray is used. If not, mask the ndarray if not isinstance(prvs_img, MaskedArray): prvs_img = np.ma.masked_invalid(prvs_img) np.ma.set_fill_value(prvs_img, prvs_img.min()) if not isinstance(next_img, MaskedArray): next_img = np.ma.masked_invalid(next_img) np.ma.set_fill_value(next_img, next_img.min()) # scale between 0 and 255 im_min = prvs_img.min() im_max = prvs_img.max() if (im_max - im_min) > 1e-8: prvs_img = (prvs_img.filled() - im_min) / (im_max - im_min) * 255 else: prvs_img = prvs_img.filled() - im_min im_min = next_img.min() im_max = next_img.max() if (im_max - im_min) > 1e-8: next_img = (next_img.filled() - im_min) / (im_max - im_min) * 255 else: next_img = next_img.filled() - im_min # convert to 8-bit prvs_img = np.ndarray.astype(prvs_img, "uint8") next_img = np.ndarray.astype(next_img, "uint8") # Lucas-Kanade # TODO: use the error returned by the OpenCV routine params = dict( winSize=winsize, maxLevel=nr_levels, criteria=criteria, flags=flags, minEigThreshold=min_eig_thr, ) p1, st, __ = cv2.calcOpticalFlowPyrLK(prvs_img, next_img, p0, None, **params) # keep only features that have been found st = np.atleast_1d(st.squeeze()) == 1 if np.any(st): p1 = p1[st, :] p0 = p0[st, :] # extract vectors xy = p0 uv = p1 - p0 else: xy = uv = np.empty(shape=(0, 2)) if verbose: print(f"--- {xy.shape[0]} sparse vectors found ---") return xy, uv ================================================ FILE: pysteps/tracking/tdating.py ================================================ # -*- coding: utf-8 -*- """ pysteps.tracking.tdating ======================== Thunderstorm Detection and Tracking (DATing) module This module was implemented following the procedures used in the TRT Thunderstorms Radar Tracking algorithm (:cite:`TRT2004`) used operationally at MeteoSwiss. Full documentation is published in :cite:`Feldmann2021`. Modifications include advecting the identified thunderstorms with the optical flow obtained from pysteps, as well as additional options in the thresholding. References ............... :cite:`TRT2004` :cite:`Feldmann2021` @author: mfeldman .. autosummary:: :toctree: ../generated/ dating tracking advect match couple_track """ import numpy as np import pysteps.feature.tstorm as tstorm_detect from pysteps import motion from pysteps.exceptions import MissingOptionalDependency try: import skimage SKIMAGE_IMPORTED = True except ImportError: SKIMAGE_IMPORTED = False if SKIMAGE_IMPORTED: import skimage.measure as skime try: import pandas as pd PANDAS_IMPORTED = True except ImportError: PANDAS_IMPORTED = False def dating( input_video, timelist, mintrack=3, cell_list=None, label_list=None, start=0, minref=35, maxref=48, mindiff=6, minsize=50, minmax=41, mindis=10, dyn_thresh=False, match_frac=0.4, split_frac=0.1, merge_frac=0.1, output_splits_merges=False, ): """ This function performs the thunderstorm detection and tracking DATing. It requires a 3-D input array that contains the temporal succession of the 2-D data array of each timestep. On each timestep the detection is performed, the identified objects are advected with a flow prediction and the advected objects are matched to the newly identified objects of the next timestep. The last portion re-arranges the data into tracks sorted by ID-number. Parameters ---------- input_video: array-like Array of shape (t,m,n) containing input image, with t being the temporal dimension and m,n the spatial dimensions. Thresholds are tuned to maximum reflectivity in dBZ with a spatial resolution of 1 km and a temporal resolution of 5 min. Nan values are ignored. timelist: list List of length t containing string of time and date of each (m,n) field. mintrack: int, optional minimum duration of cell-track to be counted. The default is 3 time steps. cell_list: list or None, optional If you wish to expand an existing list of cells, insert previous cell-list here. The default is None. If not None, requires that label_list has the same length. label_list: list or None, optional If you wish to expand an existing list of cells, insert previous label-list here. The default is None. If not None, requires that cell_list has the same length. start: int, optional If you wish to expand an existing list of cells, the input video must contain 2 timesteps prior to the merging. The start can then be set to 2, allowing the motion vectors to be formed from the first three grids and continuing the cell tracking from there. The default is 0, which initiates a new tracking sequence. minref: float, optional Lower threshold for object detection. Lower values will be set to NaN. The default is 35 dBZ. maxref: float, optional Upper threshold for object detection. Higher values will be set to this value. The default is 48 dBZ. mindiff: float, optional Minimal difference between two identified maxima within same area to split area into two objects. The default is 6 dBZ. minsize: float, optional Minimal area for possible detected object. The default is 50 pixels. minmax: float, optional Minimum value of maximum in identified objects. Objects with a maximum lower than this will be discarded. The default is 41 dBZ. mindis: float, optional Minimum distance between two maxima of identified objects. Objects with a smaller distance will be merged. The default is 10 km. match_frac: float, optional Minimum overlap fraction between two objects to be considered the same object. Default is 0.4. split_frac: float, optional Minimum overlap fraction between two objects for the object at second timestep to be considered possibly split from the object at the first timestep. Default is 0.1. merge_frac: float, optional Minimum overlap fraction between two objects for the object at second timestep to be considered possibly merged from the object at the first timestep. Default is 0.1. output_splits_merges: bool, optional If True, the output will contain information about splits and merges. The provided columns are: .. tabularcolumns:: |p{2cm}|L| +-------------------+--------------------------------------------------------------+ | Attribute | Description | +===================+==============================================================+ | splitted | Indicates if the cell is considered split into multiple cells| +-------------------+--------------------------------------------------------------+ | split_IDs | List of IDs at the next timestep that the cell split into | +-------------------+--------------------------------------------------------------+ | merged | Indicates if the cell is considered a merge of multiple cells| +-------------------+--------------------------------------------------------------+ | merged_IDs | List of IDs from the previous timestep that merged into this | | | cell | +-------------------+--------------------------------------------------------------+ | results_from_split| True if the cell is a result of a split (i.e., the ID of the | | | cell is present in the split_IDs of some cell at the previous| | | timestep) | +-------------------+--------------------------------------------------------------+ | will_merge | True if the cell will merge at the next timestep (i.e., the | | | ID of the cell is present in the merge_IDs of some cell at | | | the next timestep; empty if the next timestep is not tracked)| +-------------------+--------------------------------------------------------------+ Returns ------- track_list: list of dataframes Each dataframe contains the track and properties belonging to one cell ID. Columns of dataframes: ID - cell ID, time - time stamp, x - array of all x-coordinates of cell, y - array of all y-coordinates of cell, cen_x - x-coordinate of cell centroid, cen_y - y-coordinate of cell centroid, max_ref - maximum (reflectivity) value of cell, cont - cell contours cell_list: list of dataframes Each dataframe contains the detected cells and properties belonging to one timestep. The IDs are already matched to provide a track. Columns of dataframes: ID - cell ID, time - time stamp, x - array of all x-coordinates of cell, y - array of all y-coordinates of cell, cen_x - x-coordinate of cell centroid, cen_y - y-coordinate of cell centroid, max_ref - maximum (reflectivity) value of cell, cont - cell contours label_list: list of arrays Each (n,m) array contains the gridded IDs of the cells identified in the corresponding timestep. The IDs are already matched to provide a track. """ if not SKIMAGE_IMPORTED: raise MissingOptionalDependency( "skimage is required for thunderstorm DATing " "but it is not installed" ) if not PANDAS_IMPORTED: raise MissingOptionalDependency( "pandas is required for thunderstorm DATing " "but it is not installed" ) # Check arguments if cell_list is None or label_list is None: cell_list = [] label_list = [] else: if not len(cell_list) == len(label_list): raise ValueError("len(cell_list) != len(label_list)") if start > len(timelist): raise ValueError("start > len(timelist)") oflow_method = motion.get_method("LK") if len(label_list) == 0: max_ID = 0 else: max_ID = np.nanmax([np.nanmax(np.unique(label_list)), 0]) for t in range(start, len(timelist)): cells_id, labels = tstorm_detect.detection( input_video[t, :, :], minref=minref, maxref=maxref, mindiff=mindiff, minsize=minsize, minmax=minmax, mindis=mindis, time=timelist[t], output_splits_merges=output_splits_merges, ) if len(cell_list) < 2: cell_list.append(cells_id) label_list.append(labels) cid = np.unique(labels) max_ID = np.nanmax([np.nanmax(cid), max_ID]) + 1 continue if t >= 2: flowfield = oflow_method(input_video[t - 2 : t + 1, :, :]) cells_id, max_ID, newlabels, splitted_cells = tracking( cells_id, cell_list[-1], labels, flowfield, max_ID, match_frac=match_frac, split_frac=split_frac, merge_frac=merge_frac, output_splits_merges=output_splits_merges, ) if output_splits_merges: # Assign splitted parameters for the previous timestep for _, split_cell in splitted_cells.iterrows(): prev_list_id = cell_list[-1][ cell_list[-1].ID == split_cell.ID ].index.item() split_ids = split_cell.split_IDs split_ids_updated = [] for sid in split_ids: split_ids_updated.append(newlabels[labels == sid][0]) cell_list[-1].at[prev_list_id, "splitted"] = True cell_list[-1].at[prev_list_id, "split_IDs"] = split_ids_updated for sid in split_ids_updated: cur_list_id = cells_id[cells_id.ID == sid].index.item() cells_id.at[cur_list_id, "results_from_split"] = True merged_cells = cells_id[cells_id.merged == True] for _, cell in merged_cells.iterrows(): for merged_id in cell.merged_IDs: prev_list_id = cell_list[-1][ cell_list[-1].ID == merged_id ].index.item() cell_list[-1].at[prev_list_id, "will_merge"] = True cid = np.unique(newlabels) # max_ID = np.nanmax([np.nanmax(cid), max_ID]) cell_list.append(cells_id) label_list.append(newlabels) track_list = couple_track(cell_list[2:], int(max_ID), mintrack) return track_list, cell_list, label_list def tracking( cells_id, cells_id_prev, labels, V1, max_ID, match_frac=0.4, merge_frac=0.1, split_frac=0.1, output_splits_merges=False, ): """ This function performs the actual tracking procedure. First the cells are advected, then overlapped and finally their IDs are matched. If no match is found, a new ID is assigned. """ cells_id_new = cells_id.copy() cells_ad = advect( cells_id_prev, labels, V1, output_splits_merges=output_splits_merges ) cells_ov, labels, possible_merge_ids = match( cells_ad, labels, output_splits_merges=output_splits_merges, split_frac=split_frac, match_frac=match_frac, ) splitted_cells = None if output_splits_merges: splitted_cells = cells_ov[cells_ov.splitted == True] newlabels = np.zeros(labels.shape) possible_merge_ids_new = {} for index, cell in cells_id_new.iterrows(): if cell.ID == 0 or np.isnan(cell.ID): continue new_ID = cells_ov[cells_ov.t_ID == cell.ID].ID.values if len(new_ID) > 0: xx = cells_ov[cells_ov.t_ID == cell.ID].x size = [] for x in xx: size.append(len(x)) biggest = np.argmax(size) new_ID = new_ID[biggest] cells_id_new.loc[index, "ID"] = new_ID else: max_ID += 1 new_ID = max_ID cells_id_new.loc[index, "ID"] = new_ID newlabels[labels == index + 1] = new_ID possible_merge_ids_new[new_ID] = possible_merge_ids[cell.ID] del new_ID if output_splits_merges: # Process possible merges for target_id, possible_IDs in possible_merge_ids_new.items(): merge_ids = [] for p_id in possible_IDs: cell_a = cells_ad[cells_ad.ID == p_id] ID_vec = newlabels[cell_a.y.item(), cell_a.x.item()] overlap = np.sum(ID_vec == target_id) / len(ID_vec) if overlap > merge_frac: merge_ids.append(p_id) if len(merge_ids) > 1: cell_id = cells_id_new[cells_id_new.ID == target_id].index.item() # Merge cells cells_id_new.at[cell_id, "merged"] = True cells_id_new.at[cell_id, "merged_IDs"] = merge_ids return cells_id_new, max_ID, newlabels, splitted_cells def advect(cells_id, labels, V1, output_splits_merges=False): """ This function advects all identified cells with the estimated flow. """ columns = [ "ID", "x", "y", "cen_x", "cen_y", "max_ref", "cont", "t_ID", "frac", "flowx", "flowy", ] if output_splits_merges: columns.extend(["splitted", "split_IDs", "split_fracs"]) cells_ad = pd.DataFrame( data=None, index=range(len(cells_id)), columns=columns, ) for ID, cell in cells_id.iterrows(): if cell.ID == 0 or np.isnan(cell.ID): continue ad_x = np.round(np.nanmean(V1[0, cell.y, cell.x])).astype(int) ad_y = np.round(np.nanmean(V1[1, cell.y, cell.x])).astype(int) new_x = cell.x + ad_x new_y = cell.y + ad_y new_x[new_x > labels.shape[1] - 1] = labels.shape[1] - 1 new_y[new_y > labels.shape[0] - 1] = labels.shape[0] - 1 new_x[new_x < 0] = 0 new_y[new_y < 0] = 0 new_cen_x = cell.cen_x + ad_x new_cen_y = cell.cen_y + ad_y # Use scalar cell assignment (.at) to store array-like payloads in object columns. cells_ad.at[ID, "x"] = new_x cells_ad.at[ID, "y"] = new_y cells_ad.at[ID, "flowx"] = ad_x cells_ad.at[ID, "flowy"] = ad_y cells_ad.at[ID, "cen_x"] = new_cen_x cells_ad.at[ID, "cen_y"] = new_cen_y cells_ad.at[ID, "ID"] = cell.ID cell_unique = np.zeros(labels.shape) cell_unique[new_y, new_x] = 1 cells_ad.at[ID, "cont"] = skime.find_contours(cell_unique, 0.8) return cells_ad def match(cells_ad, labels, match_frac=0.4, split_frac=0.1, output_splits_merges=False): """ This function matches the advected cells of the previous timestep to the newly identified ones. A minimal overlap of 40% is required. In case of split of merge, the larger cell supersedes the smaller one in naming. """ cells_ov = cells_ad.copy() possible_merge_ids = {i: [] for i in np.unique(labels)} for ID_a, cell_a in cells_ov.iterrows(): if cell_a.ID == 0 or np.isnan(cell_a.ID): continue ID_vec = labels[cell_a.y, cell_a.x] IDs = np.unique(ID_vec) n_IDs = len(IDs) if n_IDs == 1 and IDs[0] == 0: cells_ov.at[ID_a, "t_ID"] = 0 continue IDs = IDs[IDs != 0] n_IDs = len(IDs) for i in IDs: possible_merge_ids[i].append(cell_a.ID) N = np.zeros(n_IDs) for n in range(n_IDs): N[n] = len(np.where(ID_vec == IDs[n])[0]) if output_splits_merges: # Only consider possible split if overlap is large enough valid_split_ids = (N / len(ID_vec)) > split_frac # splits here if sum(valid_split_ids) > 1: # Save split information cells_ov.at[ID_a, "splitted"] = True cells_ov.at[ID_a, "split_IDs"] = IDs[valid_split_ids].tolist() cells_ov.at[ID_a, "split_fracs"] = (N / len(ID_vec)).tolist() m = np.argmax(N) ID_match = IDs[m] ID_coverage = N[m] / len(ID_vec) if ID_coverage >= match_frac: cells_ov.at[ID_a, "t_ID"] = ID_match else: cells_ov.at[ID_a, "t_ID"] = 0 cells_ov.at[ID_a, "frac"] = ID_coverage return cells_ov, labels, possible_merge_ids def couple_track(cell_list, max_ID, mintrack): """ The coupled cell tracks are re-arranged from the list of cells sorted by time, to a list of tracks sorted by ID. Tracks shorter than mintrack are rejected. """ track_list = [] for n in range(1, max_ID): cell_track = pd.DataFrame( data=None, index=None, columns=["ID", "time", "x", "y", "cen_x", "cen_y", "max_ref", "cont"], ) cell_track = [] for t in range(len(cell_list)): mytime = cell_list[t] cell_track.append(mytime[mytime.ID == n]) cell_track = pd.concat(cell_track, axis=0) if len(cell_track) < mintrack: continue track_list.append(cell_track) return track_list ================================================ FILE: pysteps/utils/__init__.py ================================================ """Miscellaneous utility functions.""" from .arrays import * from .cleansing import * from .conversion import * from .dimension import * from .images import * from .interface import get_method from .interpolate import * from .fft import * from .pca import * from .reprojection import * from .spectral import * from .tapering import * from .transformation import * ================================================ FILE: pysteps/utils/arrays.py ================================================ """ pysteps.utils.arrays ==================== Utility methods for creating and processing arrays. .. autosummary:: :toctree: ../generated/ compute_centred_coord_array """ import numpy as np def compute_centred_coord_array(M, N): """ Compute a 2D coordinate array, where the origin is at the center. Parameters ---------- M : int The height of the array. N : int The width of the array. Returns ------- out : ndarray The coordinate array. Examples -------- >>> compute_centred_coord_array(2, 2) (array([[-2],\n [-1],\n [ 0],\n [ 1],\n [ 2]]), array([[-2, -1, 0, 1, 2]])) """ if M % 2 == 1: s1 = np.s_[-int(M / 2) : int(M / 2) + 1] else: s1 = np.s_[-int(M / 2) : int(M / 2)] if N % 2 == 1: s2 = np.s_[-int(N / 2) : int(N / 2) + 1] else: s2 = np.s_[-int(N / 2) : int(N / 2)] YC, XC = np.ogrid[s1, s2] return YC, XC ================================================ FILE: pysteps/utils/check_norain.py ================================================ import numpy as np from pysteps import utils def check_norain(precip_arr, precip_thr=None, norain_thr=0.0, win_fun=None): """ Parameters ---------- precip_arr: array-like An at least 2 dimensional array containing the input precipitation field precip_thr: float, optional Specifies the threshold value for minimum observable precipitation intensity. If None, the minimum value over the domain is taken. norain_thr: float, optional Specifies the threshold value for the fraction of rainy pixels in precip_arr below which we consider there to be no rain. Standard set to 0.0 win_fun: {'hann', 'tukey', None} Optional tapering function to be applied to the input field, generated with :py:func:`pysteps.utils.tapering.compute_window_function` (default None). This parameter needs to match the window function you use in later noise generation, or else this method will say that there is rain, while after the tapering function is applied there is no rain left, so you will run into a ValueError. Returns ------- norain: bool Returns whether the fraction of rainy pixels is below the norain_thr threshold. """ if win_fun is not None: tapering = utils.tapering.compute_window_function( precip_arr.shape[-2], precip_arr.shape[-1], win_fun ) else: tapering = np.ones((precip_arr.shape[-2], precip_arr.shape[-1])) tapering_mask = tapering == 0.0 masked_precip = precip_arr.copy() masked_precip[..., tapering_mask] = np.nanmin(precip_arr) if precip_thr is None: precip_thr = np.nanmin(masked_precip) rain_pixels = masked_precip[masked_precip > precip_thr] norain = rain_pixels.size / masked_precip.size <= norain_thr print( f"Rain fraction is: {str(rain_pixels.size / masked_precip.size)}, while minimum fraction is {str(norain_thr)}" ) return norain ================================================ FILE: pysteps/utils/cleansing.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.cleansing ======================= Data cleansing routines for pysteps. .. autosummary:: :toctree: ../generated/ decluster detect_outliers """ import warnings import numpy as np import scipy.spatial def decluster(coord, input_array, scale, min_samples=1, verbose=False): """ Decluster a set of sparse data points by aggregating, that is, taking the median value of all values lying within a certain distance (i.e., a cluster). Parameters ---------- coord: array_like Array of shape (n, d) containing the coordinates of the input data into a space of *d* dimensions. input_array: array_like Array of shape (n) or (n, m), where *n* is the number of samples and *m* the number of variables. All values in ``input_array`` are required to have finite values. scale: float or array_like The ``scale`` parameter in the same units of ``coord``. It can be a scalar or an array_like of shape (d). Data points within the declustering ``scale`` are aggregated. min_samples: int, optional The minimum number of samples for computing the median within a given cluster. verbose: bool, optional Print out information. Returns ------- out: tuple of ndarrays A two-element tuple (``out_coord``, ``output_array``) containing the declustered coordinates (l, d) and input array (l, m), where *l* is the new number of samples with *l* <= *n*. """ coord = np.copy(coord) input_array = np.copy(input_array) # check inputs if np.any(~np.isfinite(input_array)): raise ValueError("input_array contains non-finite values") if input_array.ndim == 1: nvar = 1 input_array = input_array[:, None] elif input_array.ndim == 2: nvar = input_array.shape[1] else: raise ValueError( "input_array must have 1 (n) or 2 dimensions (n, m), but it has %i" % input_array.ndim ) if coord.ndim != 2: raise ValueError( "coord must have 2 dimensions (n, d), but it has %i" % coord.ndim ) if coord.shape[0] != input_array.shape[0]: raise ValueError( "the number of samples in the input_array does not match the " + "number of coordinates %i!=%i" % (input_array.shape[0], coord.shape[0]) ) if np.isscalar(scale): scale = float(scale) else: scale = np.copy(scale) if scale.ndim != 1: raise ValueError( "scale must have 1 dimension (d), but it has %i" % scale.ndim ) if scale.shape[0] != coord.shape[1]: raise ValueError( "scale must have %i elements, but it has %i" % (coord.shape[1], scale.shape[0]) ) scale = scale[None, :] # reduce original coordinates coord_ = np.floor(coord / scale) # keep only unique pairs of the reduced coordinates ucoord_ = np.unique(coord_, axis=0) # loop through these unique values and average data points which belong to # the same cluster dinput = np.empty(shape=(0, nvar)) dcoord = np.empty(shape=(0, coord.shape[1])) for i in range(ucoord_.shape[0]): idx = np.all(coord_ == ucoord_[i, :], axis=1) npoints = np.sum(idx) if npoints >= min_samples: dinput = np.append( dinput, np.median(input_array[idx, :], axis=0)[None, :], axis=0 ) dcoord = np.append( dcoord, np.median(coord[idx, :], axis=0)[None, :], axis=0 ) if verbose: print("--- %i samples left after declustering ---" % dinput.shape[0]) return dcoord, dinput def detect_outliers(input_array, thr, coord=None, k=None, verbose=False): """ Detect outliers in a (multivariate and georeferenced) dataset. Assume a (multivariate) Gaussian distribution and detect outliers based on the number of standard deviations from the mean. If spatial information is provided through coordinates, the outlier detection can be localized by considering only the k-nearest neighbours when computing the local mean and standard deviation. Parameters ---------- input_array: array_like Array of shape (n) or (n, m), where *n* is the number of samples and *m* the number of variables. If *m* > 1, the Mahalanobis distance is used. All values in ``input_array`` are required to have finite values. thr: float The number of standard deviations from the mean used to define an outlier. coord: array_like or None, optional Array of shape (n, d) containing the coordinates of the input data into a space of *d* dimensions. Passing ``coord`` requires that ``k`` is not None. k: int or None, optional The number of nearest neighbours used to localize the outlier detection. If set to None (the default), it employs all the data points (global detection). Setting ``k`` requires that ``coord`` is not None. verbose: bool, optional Print out information. Returns ------- out: array_like A 1-D boolean array of shape (n) with True values indicating the outliers detected in ``input_array``. """ input_array = np.copy(input_array) if np.any(~np.isfinite(input_array)): raise ValueError("input_array contains non-finite values") if input_array.ndim == 1: nsamples = input_array.size nvar = 1 elif input_array.ndim == 2: nsamples = input_array.shape[0] nvar = input_array.shape[1] else: raise ValueError( f"input_array must have 1 (n) or 2 dimensions (n, m), " f"but it has {input_array.ndim}" ) if nsamples < 2: return np.zeros(nsamples, dtype=bool) if coord is not None and k is not None: coord = np.copy(coord) if coord.ndim == 1: coord = coord[:, None] elif coord.ndim > 2: raise ValueError( "coord must have 2 dimensions (n, d)," f"but it has {coord.ndim}" ) if coord.shape[0] != nsamples: raise ValueError( "the number of samples in input_array does not match the " f"number of coordinates {nsamples}!={coord.shape[0]}" ) k = np.min((nsamples, k + 1)) # global if k is None or coord is None: if nvar == 1: # univariate zdata = np.abs(input_array - np.mean(input_array)) / np.std(input_array) outliers = zdata > thr else: # multivariate (mahalanobis distance) zdata = input_array - np.mean(input_array, axis=0) V = np.cov(zdata.T) try: VI = np.linalg.inv(V) MD = np.sqrt(np.dot(np.dot(zdata, VI), zdata.T).diagonal()) except np.linalg.LinAlgError as err: warnings.warn(f"{err} during outlier detection") MD = np.zeros(nsamples) outliers = MD > thr # local else: tree = scipy.spatial.cKDTree(coord) __, inds = tree.query(coord, k=k) outliers = np.empty(shape=0, dtype=bool) for i in range(inds.shape[0]): if nvar == 1: # univariate thisdata = input_array[i] neighbours = input_array[inds[i, 1:]] thiszdata = np.abs(thisdata - np.mean(neighbours)) / np.std(neighbours) outliers = np.append(outliers, thiszdata > thr) else: # multivariate (mahalanobis distance) thisdata = input_array[i, :] neighbours = input_array[inds[i, 1:], :].copy() thiszdata = thisdata - np.mean(neighbours, axis=0) neighbours = neighbours - np.mean(neighbours, axis=0) V = np.cov(neighbours.T) try: VI = np.linalg.inv(V) MD = np.sqrt(np.dot(np.dot(thiszdata, VI), thiszdata.T)) except np.linalg.LinAlgError as err: warnings.warn(f"{err} during outlier detection") MD = 0 outliers = np.append(outliers, MD > thr) if verbose: print(f"--- {np.sum(outliers)} outliers detected ---") return outliers ================================================ FILE: pysteps/utils/conversion.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.conversion ======================== Methods for converting physical units. .. autosummary:: :toctree: ../generated/ to_rainrate to_raindepth to_reflectivity """ import warnings from . import transformation # TODO: This should not be done. Instead fix the code so that it doesn't # produce the warnings. # to deactivate warnings for comparison operators with NaNs warnings.filterwarnings("ignore", category=RuntimeWarning) def to_rainrate(R, metadata, zr_a=None, zr_b=None): """ Convert to rain rate [mm/h]. Parameters ---------- R: array-like Array of any shape to be (back-)transformed. metadata: dict Metadata dictionary containing the accutime, transform, unit, threshold and zerovalue attributes as described in the documentation of :py:mod:`pysteps.io.importers`. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, but only if zr_a = zr_b = None. If missing, it defaults to Marshall–Palmer relation, that is, zr_a = 200.0 and zr_b = 1.6. zr_a, zr_b: float, optional The a and b coefficients of the Z-R relationship (Z = a*R^b). Returns ------- R: array-like Array of any shape containing the converted units. metadata: dict The metadata with updated attributes. """ R = R.copy() metadata = metadata.copy() if metadata["transform"] is not None: if metadata["transform"] == "dB": R, metadata = transformation.dB_transform(R, metadata, inverse=True) elif metadata["transform"] in ["BoxCox", "log"]: R, metadata = transformation.boxcox_transform(R, metadata, inverse=True) elif metadata["transform"] == "NQT": R, metadata = transformation.NQ_transform(R, metadata, inverse=True) elif metadata["transform"] == "sqrt": R, metadata = transformation.sqrt_transform(R, metadata, inverse=True) else: raise ValueError("Unknown transformation %s" % metadata["transform"]) if metadata["unit"] == "mm/h": pass elif metadata["unit"] == "mm": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too R = R / float(metadata["accutime"]) * 60.0 threshold = threshold / float(metadata["accutime"]) * 60.0 zerovalue = zerovalue / float(metadata["accutime"]) * 60.0 metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue elif metadata["unit"] == "dBZ": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too # Z to R if zr_a is None: zr_a = metadata.get("zr_a", 200.0) # default to Marshall–Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # default to Marshall–Palmer R = (R / zr_a) ** (1.0 / zr_b) threshold = (threshold / zr_a) ** (1.0 / zr_b) zerovalue = (zerovalue / zr_a) ** (1.0 / zr_b) metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue else: raise ValueError( "Cannot convert unit %s and transform %s to mm/h" % (metadata["unit"], metadata["transform"]) ) metadata["unit"] = "mm/h" return R, metadata def to_raindepth(R, metadata, zr_a=None, zr_b=None): """ Convert to rain depth [mm]. Parameters ---------- R: array-like Array of any shape to be (back-)transformed. metadata: dict Metadata dictionary containing the accutime, transform, unit, threshold and zerovalue attributes as described in the documentation of :py:mod:`pysteps.io.importers`. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, but only if zr_a = zr_b = None. If missing, it defaults to Marshall–Palmer relation, that is, zr_a = 200.0 and zr_b = 1.6. zr_a, zr_b: float, optional The a and b coefficients of the Z-R relationship (Z = a*R^b). Returns ------- R: array-like Array of any shape containing the converted units. metadata: dict The metadata with updated attributes. """ R = R.copy() metadata = metadata.copy() if metadata["transform"] is not None: if metadata["transform"] == "dB": R, metadata = transformation.dB_transform(R, metadata, inverse=True) elif metadata["transform"] in ["BoxCox", "log"]: R, metadata = transformation.boxcox_transform(R, metadata, inverse=True) elif metadata["transform"] == "NQT": R, metadata = transformation.NQ_transform(R, metadata, inverse=True) elif metadata["transform"] == "sqrt": R, metadata = transformation.sqrt_transform(R, metadata, inverse=True) else: raise ValueError("Unknown transformation %s" % metadata["transform"]) if metadata["unit"] == "mm" and metadata["transform"] is None: pass elif metadata["unit"] == "mm/h": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too R = R / 60.0 * metadata["accutime"] threshold = threshold / 60.0 * metadata["accutime"] zerovalue = zerovalue / 60.0 * metadata["accutime"] metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue elif metadata["unit"] == "dBZ": threshold = metadata["threshold"] # convert the threshold, too zerovalue = metadata["zerovalue"] # convert the zerovalue, too # Z to R if zr_a is None: zr_a = metadata.get("zr_a", 200.0) # Default to Marshall–Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # Default to Marshall–Palmer R = (R / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"] threshold = (threshold / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"] zerovalue = (zerovalue / zr_a) ** (1.0 / zr_b) / 60.0 * metadata["accutime"] metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue else: raise ValueError( "Cannot convert unit %s and transform %s to mm" % (metadata["unit"], metadata["transform"]) ) metadata["unit"] = "mm" return R, metadata def to_reflectivity(R, metadata, zr_a=None, zr_b=None): """ Convert to reflectivity [dBZ]. Parameters ---------- R: array-like Array of any shape to be (back-)transformed. metadata: dict Metadata dictionary containing the accutime, transform, unit, threshold and zerovalue attributes as described in the documentation of :py:mod:`pysteps.io.importers`. Additionally, in case of conversion to/from reflectivity units, the zr_a and zr_b attributes are also required, but only if zr_a = zr_b = None. If missing, it defaults to Marshall–Palmer relation, that is, zr_a = 200.0 and zr_b = 1.6. zr_a, zr_b: float, optional The a and b coefficients of the Z-R relationship (Z = a*R^b). Returns ------- R: array-like Array of any shape containing the converted units. metadata: dict The metadata with updated attributes. """ R = R.copy() metadata = metadata.copy() if metadata["transform"] is not None: if metadata["transform"] == "dB": R, metadata = transformation.dB_transform(R, metadata, inverse=True) elif metadata["transform"] in ["BoxCox", "log"]: R, metadata = transformation.boxcox_transform(R, metadata, inverse=True) elif metadata["transform"] == "NQT": R, metadata = transformation.NQ_transform(R, metadata, inverse=True) elif metadata["transform"] == "sqrt": R, metadata = transformation.sqrt_transform(R, metadata, inverse=True) else: raise ValueError("Unknown transformation %s" % metadata["transform"]) if metadata["unit"] == "mm/h": # Z to R if zr_a is None: zr_a = metadata.get("zr_a", 200.0) # Default to Marshall–Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # Default to Marshall–Palmer R = zr_a * R**zr_b metadata["threshold"] = zr_a * metadata["threshold"] ** zr_b metadata["zerovalue"] = zr_a * metadata["zerovalue"] ** zr_b metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b # Z to dBZ R, metadata = transformation.dB_transform(R, metadata) elif metadata["unit"] == "mm": # depth to rate R, metadata = to_rainrate(R, metadata) # Z to R if zr_a is None: zr_a = metadata.get("zr_a", 200.0) # Default to Marshall-Palmer if zr_b is None: zr_b = metadata.get("zr_b", 1.6) # Default to Marshall-Palmer R = zr_a * R**zr_b metadata["threshold"] = zr_a * metadata["threshold"] ** zr_b metadata["zerovalue"] = zr_a * metadata["zerovalue"] ** zr_b metadata["zr_a"] = zr_a metadata["zr_b"] = zr_b # Z to dBZ R, metadata = transformation.dB_transform(R, metadata) elif metadata["unit"] == "dBZ": # Z to dBZ R, metadata = transformation.dB_transform(R, metadata) else: raise ValueError( "Cannot convert unit %s and transform %s to mm/h" % (metadata["unit"], metadata["transform"]) ) metadata["unit"] = "dBZ" return R, metadata ================================================ FILE: pysteps/utils/dimension.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.dimension ======================= Functions to manipulate array dimensions. .. autosummary:: :toctree: ../generated/ aggregate_fields aggregate_fields_time aggregate_fields_space clip_domain square_domain """ import numpy as np _aggregation_methods = dict( sum=np.sum, mean=np.mean, nanmean=np.nanmean, nansum=np.nansum ) def aggregate_fields_time(R, metadata, time_window_min, ignore_nan=False): """Aggregate fields in time. Parameters ---------- R: array-like Array of shape (t,m,n) or (l,t,m,n) containing a time series of (ensemble) input fields. They must be evenly spaced in time. metadata: dict Metadata dictionary containing the timestamps and unit attributes as described in the documentation of :py:mod:`pysteps.io.importers`. time_window_min: float or None The length in minutes of the time window that is used to aggregate the fields. The time spanned by the t dimension of R must be a multiple of time_window_min. If set to None, it returns a copy of the original R and metadata. ignore_nan: bool, optional If True, ignore nan values. Returns ------- outputarray: array-like The new array of aggregated fields of shape (k,m,n) or (l,k,m,n), where k = t*delta/time_window_min and delta is the time interval between two successive timestamps. metadata: dict The metadata with updated attributes. See also -------- pysteps.utils.dimension.aggregate_fields_space, pysteps.utils.dimension.aggregate_fields """ R = R.copy() metadata = metadata.copy() if time_window_min is None: return R, metadata unit = metadata["unit"] timestamps = metadata["timestamps"] if "leadtimes" in metadata: leadtimes = metadata["leadtimes"] if len(R.shape) < 3: raise ValueError("The number of dimension must be > 2") if len(R.shape) == 3: axis = 0 if len(R.shape) == 4: axis = 1 if len(R.shape) > 4: raise ValueError("The number of dimension must be <= 4") if R.shape[axis] != len(timestamps): raise ValueError( "The list of timestamps has length %i, " % len(timestamps) + "but R contains %i frames" % R.shape[axis] ) # assumes that frames are evenly spaced delta = (timestamps[1] - timestamps[0]).seconds / 60 if delta == time_window_min: return R, metadata if (R.shape[axis] * delta) % time_window_min: raise ValueError("time_window_size does not equally split R") nframes = int(time_window_min / delta) # specify the operator to be used to aggregate # the values within the time window if unit == "mm/h": method = "mean" elif unit == "mm": method = "sum" else: raise ValueError( "can only aggregate units of 'mm/h' or 'mm'" + " not %s" % unit ) if ignore_nan: method = "".join(("nan", method)) R = aggregate_fields(R, nframes, axis=axis, method=method) metadata["accutime"] = time_window_min metadata["timestamps"] = timestamps[nframes - 1 :: nframes] if "leadtimes" in metadata: metadata["leadtimes"] = leadtimes[nframes - 1 :: nframes] return R, metadata def aggregate_fields_space(R, metadata, space_window, ignore_nan=False): """ Upscale fields in space. Parameters ---------- R: array-like Array of shape (m,n), (t,m,n) or (l,t,m,n) containing a single field or a time series of (ensemble) input fields. metadata: dict Metadata dictionary containing the xpixelsize, ypixelsize and unit attributes as described in the documentation of :py:mod:`pysteps.io.importers`. space_window: float, tuple or None The length of the space window that is used to upscale the fields. If a float is given, the same window size is used for the x- and y-directions. Separate window sizes are used for x- and y-directions if a two-element tuple is given. The space_window unit is the same used in the geographical projection of R and hence the same as for the xpixelsize and ypixelsize attributes. The space spanned by the n- and m-dimensions of R must be a multiple of space_window. If set to None, the function returns a copy of the original R and metadata. ignore_nan: bool, optional If True, ignore nan values. Returns ------- outputarray: array-like The new array of aggregated fields of shape (k,j), (t,k,j) or (l,t,k,j), where k = m*ypixelsize/space_window[1] and j = n*xpixelsize/space_window[0]. metadata: dict The metadata with updated attributes. See also -------- pysteps.utils.dimension.aggregate_fields_time, pysteps.utils.dimension.aggregate_fields """ R = R.copy() metadata = metadata.copy() if space_window is None: return R, metadata unit = metadata["unit"] ypixelsize = metadata["ypixelsize"] xpixelsize = metadata["xpixelsize"] if len(R.shape) < 2: raise ValueError("The number of dimensions must be >= 2") if len(R.shape) == 2: axes = [0, 1] if len(R.shape) == 3: axes = [1, 2] if len(R.shape) == 4: axes = [2, 3] if len(R.shape) > 4: raise ValueError("The number of dimensions must be <= 4") if np.isscalar(space_window): space_window = (space_window, space_window) # assumes that frames are evenly spaced if ypixelsize == space_window[1] and xpixelsize == space_window[0]: return R, metadata ysize = R.shape[axes[0]] * ypixelsize xsize = R.shape[axes[1]] * xpixelsize if ( abs(ysize / space_window[1] - round(ysize / space_window[1])) > 1e-10 or abs(xsize / space_window[0] - round(xsize / space_window[0])) > 1e-10 ): raise ValueError("space_window does not equally split R") nframes = [int(space_window[1] / ypixelsize), int(space_window[0] / xpixelsize)] # specify the operator to be used to aggregate the values # within the space window if unit == "mm/h" or unit == "mm": method = "mean" else: raise ValueError( "can only aggregate units of 'mm/h' or 'mm' " + "not %s" % unit ) if ignore_nan: method = "".join(("nan", method)) R = aggregate_fields(R, nframes[0], axis=axes[0], method=method) R = aggregate_fields(R, nframes[1], axis=axes[1], method=method) metadata["ypixelsize"] = space_window[1] metadata["xpixelsize"] = space_window[0] return R, metadata def aggregate_fields(data, window_size, axis=0, method="mean", trim=False): """Aggregate fields along a given direction. It attempts to aggregate the given R axis in an integer number of sections of length = ``window_size``. If such a aggregation is not possible, an error is raised unless ``trim`` set to True, in which case the axis is trimmed (from the end) to make it perfectly divisible". Parameters ---------- data: array-like Array of any shape containing the input fields. window_size: int or tuple of ints The length of the window that is used to aggregate the fields. If a single integer value is given, the same window is used for all the selected axis. If ``window_size`` is a 1D array-like, each element indicates the length of the window that is used to aggregate the fields along each axis. In this case, the number of elements of 'window_size' must be the same as the elements in the ``axis`` argument. axis: int or array-like of ints Axis or axes where to perform the aggregation. If this is a tuple of ints, the aggregation is performed over multiple axes, instead of a single axis method: string, optional Optional argument that specifies the operation to use to aggregate the values within the window. Default to mean operator. trim: bool In case that the ``data`` is not perfectly divisible by ``window_size`` along the selected axis: - trim=True: the data will be trimmed (from the end) along that axis to make it perfectly divisible. - trim=False: a ValueError exception is raised. Returns ------- new_array: array-like The new aggregated array with shape[axis] = k, where k = R.shape[axis] / window_size. See also -------- pysteps.utils.dimension.aggregate_fields_time, pysteps.utils.dimension.aggregate_fields_space """ if np.ndim(axis) > 1: raise TypeError( "Only integers or integer 1D arrays can be used for the " "'axis' argument." ) if np.ndim(axis) == 1: axis = np.asarray(axis) if np.ndim(window_size) == 0: window_size = (window_size,) * axis.size window_size = np.asarray(window_size, dtype="int") if window_size.shape != axis.shape: raise ValueError( "The 'window_size' and 'axis' shapes are incompatible." f"window_size.shape: {str(window_size.shape)}, " f"axis.shape: {str(axis.shape)}, " ) new_data = data.copy() for i in range(axis.size): # Recursively call the aggregate_fields function new_data = aggregate_fields( new_data, window_size[i], axis=axis[i], method=method, trim=trim ) return new_data if np.ndim(window_size) != 0: raise TypeError( "A single axis was selected for the aggregation but several" f"of window_sizes were given: {str(window_size)}." ) data = np.asarray(data).copy() orig_shape = data.shape if method not in _aggregation_methods: raise ValueError( "Aggregation method not recognized. " f"Available methods: {str(list(_aggregation_methods.keys()))}" ) if window_size <= 0: raise ValueError("'window_size' must be strictly positive") if (orig_shape[axis] % window_size) and (not trim): raise ValueError( f"Since 'trim' argument was set to False," f"the 'window_size' {window_size} must exactly divide" f"the dimension along the selected axis:" f"data.shape[axis]={orig_shape[axis]}" ) new_data = data.swapaxes(axis, 0) if trim: trim_size = data.shape[axis] % window_size if trim_size > 0: new_data = new_data[:-trim_size] new_data_shape = list(new_data.shape) new_data_shape[0] //= window_size # Final shape new_data = new_data.reshape(new_data_shape[0], window_size, -1) new_data = _aggregation_methods[method](new_data, axis=1) new_data = new_data.reshape(new_data_shape).swapaxes(axis, 0) return new_data def clip_domain(R, metadata, extent=None): """ Clip the field domain by geographical coordinates. Parameters ---------- R: array-like Array of shape (m,n) or (t,m,n) containing the input fields. metadata: dict Metadata dictionary containing the x1, x2, y1, y2, xpixelsize, ypixelsize, zerovalue and yorigin attributes as described in the documentation of :py:mod:`pysteps.io.importers`. extent: scalars (left, right, bottom, top), optional The extent of the bounding box in data coordinates to be used to clip the data. Note that the direction of the vertical axis and thus the default values for top and bottom depend on origin. We follow the same convention as in the imshow method of matplotlib: https://matplotlib.org/tutorials/intermediate/imshow_extent.html Returns ------- R: array-like the clipped array metadata: dict the metadata with updated attributes. """ R = R.copy() R_shape = np.array(R.shape) metadata = metadata.copy() if extent is None: return R, metadata if len(R.shape) < 2: raise ValueError("The number of dimension must be > 1") if len(R.shape) == 2: R = R[None, None, :, :] if len(R.shape) == 3: R = R[None, :, :, :] if len(R.shape) > 4: raise ValueError("The number of dimension must be <= 4") # extract original domain coordinates left = metadata["x1"] right = metadata["x2"] bottom = metadata["y1"] top = metadata["y2"] # extract bounding box coordinates left_ = extent[0] right_ = extent[1] bottom_ = extent[2] top_ = extent[3] # compute its extent in pixels dim_x_ = int((right_ - left_) / metadata["xpixelsize"]) dim_y_ = int((top_ - bottom_) / metadata["ypixelsize"]) R_ = np.ones((R.shape[0], R.shape[1], dim_y_, dim_x_)) * metadata["zerovalue"] # build set of coordinates for the original domain y_coord = ( np.linspace(bottom, top - metadata["ypixelsize"], R.shape[2]) + metadata["ypixelsize"] / 2.0 ) x_coord = ( np.linspace(left, right - metadata["xpixelsize"], R.shape[3]) + metadata["xpixelsize"] / 2.0 ) # build set of coordinates for the new domain y_coord_ = ( np.linspace(bottom_, top_ - metadata["ypixelsize"], R_.shape[2]) + metadata["ypixelsize"] / 2.0 ) x_coord_ = ( np.linspace(left_, right_ - metadata["xpixelsize"], R_.shape[3]) + metadata["xpixelsize"] / 2.0 ) # origin='upper' reverses the vertical axes direction if metadata["yorigin"] == "upper": y_coord = y_coord[::-1] y_coord_ = y_coord_[::-1] # extract original domain idx_y = np.where(np.logical_and(y_coord < top_, y_coord > bottom_))[0] idx_x = np.where(np.logical_and(x_coord < right_, x_coord > left_))[0] # extract new domain idx_y_ = np.where(np.logical_and(y_coord_ < top, y_coord_ > bottom))[0] idx_x_ = np.where(np.logical_and(x_coord_ < right, x_coord_ > left))[0] # compose the new array R_[:, :, idx_y_[0] : (idx_y_[-1] + 1), idx_x_[0] : (idx_x_[-1] + 1)] = R[ :, :, idx_y[0] : (idx_y[-1] + 1), idx_x[0] : (idx_x[-1] + 1) ] # update coordinates metadata["y1"] = bottom_ metadata["y2"] = top_ metadata["x1"] = left_ metadata["x2"] = right_ R_shape[-2] = R_.shape[-2] R_shape[-1] = R_.shape[-1] return R_.reshape(R_shape), metadata def square_domain(R, metadata, method="pad", inverse=False): """ Either pad or crop a field to obtain a square domain. Parameters ---------- R: array-like Array of shape (m,n) or (t,m,n) containing the input fields. metadata: dict Metadata dictionary containing the x1, x2, y1, y2, xpixelsize, ypixelsize, attributes as described in the documentation of :py:mod:`pysteps.io.importers`. method: {'pad', 'crop'}, optional Either pad or crop. If pad, an equal number of zeros is added to both ends of its shortest side in order to produce a square domain. If crop, an equal number of pixels is removed to both ends of its longest side in order to produce a square domain. Note that the crop method involves an irreversible loss of data. inverse: bool, optional Perform the inverse method to recover the original domain shape. After a crop, the inverse is performed by padding the field with zeros. Returns ------- R: array-like the reshape dataset metadata: dict the metadata with updated attributes. """ R = R.copy() R_shape = np.array(R.shape) metadata = metadata.copy() if not inverse: if len(R.shape) < 2: raise ValueError("The number of dimension must be > 1") if len(R.shape) == 2: R = R[None, None, :] if len(R.shape) == 3: R = R[None, :] if len(R.shape) > 4: raise ValueError("The number of dimension must be <= 4") if R.shape[2] == R.shape[3]: return R.squeeze() orig_dim = R.shape orig_dim_n = orig_dim[0] orig_dim_t = orig_dim[1] orig_dim_y = orig_dim[2] orig_dim_x = orig_dim[3] if method == "pad": new_dim = np.max(orig_dim[2:]) R_ = np.ones((orig_dim_n, orig_dim_t, new_dim, new_dim)) * R.min() if orig_dim_x < new_dim: idx_buffer = int((new_dim - orig_dim_x) / 2.0) R_[:, :, :, idx_buffer : (idx_buffer + orig_dim_x)] = R metadata["x1"] -= idx_buffer * metadata["xpixelsize"] metadata["x2"] += idx_buffer * metadata["xpixelsize"] elif orig_dim_y < new_dim: idx_buffer = int((new_dim - orig_dim_y) / 2.0) R_[:, :, idx_buffer : (idx_buffer + orig_dim_y), :] = R metadata["y1"] -= idx_buffer * metadata["ypixelsize"] metadata["y2"] += idx_buffer * metadata["ypixelsize"] elif method == "crop": new_dim = np.min(orig_dim[2:]) R_ = np.zeros((orig_dim_n, orig_dim_t, new_dim, new_dim)) if orig_dim_x > new_dim: idx_buffer = int((orig_dim_x - new_dim) / 2.0) R_ = R[:, :, :, idx_buffer : (idx_buffer + new_dim)] metadata["x1"] += idx_buffer * metadata["xpixelsize"] metadata["x2"] -= idx_buffer * metadata["xpixelsize"] elif orig_dim_y > new_dim: idx_buffer = int((orig_dim_y - new_dim) / 2.0) R_ = R[:, :, idx_buffer : (idx_buffer + new_dim), :] metadata["y1"] += idx_buffer * metadata["ypixelsize"] metadata["y2"] -= idx_buffer * metadata["ypixelsize"] else: raise ValueError("Unknown type") metadata["orig_domain"] = (orig_dim_y, orig_dim_x) metadata["square_method"] = method R_shape[-2] = R_.shape[-2] R_shape[-1] = R_.shape[-1] return R_.reshape(R_shape), metadata elif inverse: if len(R.shape) < 2: raise ValueError("The number of dimension must be > 2") if len(R.shape) == 2: R = R[None, None, :] if len(R.shape) == 3: R = R[None, :] if len(R.shape) > 4: raise ValueError("The number of dimension must be <= 4") method = metadata.pop("square_method") shape = metadata.pop("orig_domain") if R.shape[2] == shape[0] and R.shape[3] == shape[1]: return R.squeeze(), metadata R_ = np.zeros((R.shape[0], R.shape[1], shape[0], shape[1])) if method == "pad": if R.shape[2] == shape[0]: idx_buffer = int((R.shape[3] - shape[1]) / 2.0) R_ = R[:, :, :, idx_buffer : (idx_buffer + shape[1])] metadata["x1"] += idx_buffer * metadata["xpixelsize"] metadata["x2"] -= idx_buffer * metadata["xpixelsize"] elif R.shape[3] == shape[1]: idx_buffer = int((R.shape[2] - shape[0]) / 2.0) R_ = R[:, :, idx_buffer : (idx_buffer + shape[0]), :] metadata["y1"] += idx_buffer * metadata["ypixelsize"] metadata["y2"] -= idx_buffer * metadata["ypixelsize"] elif method == "crop": if R.shape[2] == shape[0]: idx_buffer = int((shape[1] - R.shape[3]) / 2.0) R_[:, :, :, idx_buffer : (idx_buffer + R.shape[3])] = R metadata["x1"] -= idx_buffer * metadata["xpixelsize"] metadata["x2"] += idx_buffer * metadata["xpixelsize"] elif R.shape[3] == shape[1]: idx_buffer = int((shape[0] - R.shape[2]) / 2.0) R_[:, :, idx_buffer : (idx_buffer + R.shape[2]), :] = R metadata["y1"] -= idx_buffer * metadata["ypixelsize"] metadata["y2"] += idx_buffer * metadata["ypixelsize"] R_shape[-2] = R_.shape[-2] R_shape[-1] = R_.shape[-1] return R_.reshape(R_shape), metadata ================================================ FILE: pysteps/utils/fft.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.fft ================= Interface module for different FFT methods. .. autosummary:: :toctree: ../generated/ get_numpy get_scipy get_pyfftw """ from pysteps.exceptions import MissingOptionalDependency from types import SimpleNamespace def get_numpy(shape, fftn_shape=None, **kwargs): import numpy.fft as numpy_fft f = { "fft2": numpy_fft.fft2, "ifft2": numpy_fft.ifft2, "rfft2": numpy_fft.rfft2, "irfft2": lambda X: numpy_fft.irfft2(X, s=shape), "fftshift": numpy_fft.fftshift, "ifftshift": numpy_fft.ifftshift, "fftfreq": numpy_fft.fftfreq, } if fftn_shape is not None: f["fftn"] = numpy_fft.fftn fft = SimpleNamespace(**f) return fft def get_scipy(shape, fftn_shape=None, **kwargs): import numpy.fft as numpy_fft import scipy.fftpack as scipy_fft # use numpy implementation of rfft2/irfft2 because they have not been # implemented in scipy.fftpack f = { "fft2": scipy_fft.fft2, "ifft2": scipy_fft.ifft2, "rfft2": numpy_fft.rfft2, "irfft2": lambda X: numpy_fft.irfft2(X, s=shape), "fftshift": scipy_fft.fftshift, "ifftshift": scipy_fft.ifftshift, "fftfreq": scipy_fft.fftfreq, } if fftn_shape is not None: f["fftn"] = scipy_fft.fftn fft = SimpleNamespace(**f) return fft def get_pyfftw(shape, fftn_shape=None, n_threads=1, **kwargs): try: import pyfftw.interfaces.numpy_fft as pyfftw_fft import pyfftw pyfftw.interfaces.cache.enable() except ImportError: raise MissingOptionalDependency("pyfftw is required but not installed") X = pyfftw.empty_aligned(shape, dtype="complex128") F = pyfftw.empty_aligned(shape, dtype="complex128") fft_obj = pyfftw.FFTW( X, F, flags=["FFTW_ESTIMATE"], direction="FFTW_FORWARD", axes=(0, 1), threads=n_threads, ) ifft_obj = pyfftw.FFTW( F, X, flags=["FFTW_ESTIMATE"], direction="FFTW_BACKWARD", axes=(0, 1), threads=n_threads, ) if fftn_shape is not None: X = pyfftw.empty_aligned(fftn_shape, dtype="complex128") F = pyfftw.empty_aligned(fftn_shape, dtype="complex128") fftn_obj = pyfftw.FFTW( X, F, flags=["FFTW_ESTIMATE"], direction="FFTW_FORWARD", axes=list(range(len(fftn_shape))), threads=n_threads, ) X = pyfftw.empty_aligned(shape, dtype="float64") output_shape = list(shape[:-1]) output_shape.append(int(shape[-1] / 2) + 1) output_shape = tuple(output_shape) F = pyfftw.empty_aligned(output_shape, dtype="complex128") rfft_obj = pyfftw.FFTW( X, F, flags=["FFTW_ESTIMATE"], direction="FFTW_FORWARD", axes=(0, 1), threads=n_threads, ) irfft_obj = pyfftw.FFTW( F, X, flags=["FFTW_ESTIMATE"], direction="FFTW_BACKWARD", axes=(0, 1), threads=n_threads, ) f = { "fft2": lambda X: fft_obj(input_array=X.copy()).copy(), "ifft2": lambda X: ifft_obj(input_array=X.copy()).copy(), "rfft2": lambda X: rfft_obj(input_array=X.copy()).copy(), "irfft2": lambda X: irfft_obj(input_array=X.copy()).copy(), "fftshift": pyfftw_fft.fftshift, "ifftshift": pyfftw_fft.ifftshift, "fftfreq": pyfftw_fft.fftfreq, } if fftn_shape is not None: f["fftn"] = lambda X: fftn_obj(input_array=X).copy() fft = SimpleNamespace(**f) return fft ================================================ FILE: pysteps/utils/images.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.images ==================== Image processing routines for pysteps. .. autosummary:: :toctree: ../generated/ morph_opening """ import numpy as np from numpy.ma.core import MaskedArray from pysteps.exceptions import MissingOptionalDependency try: import cv2 CV2_IMPORTED = True except ImportError: CV2_IMPORTED = False def morph_opening(input_image, thr, n): """ Filter out small scale noise on the image by applying a binary morphological opening, that is, erosion followed by dilation. .. _MaskedArray:\ https://docs.scipy.org/doc/numpy/reference/maskedarray.baseclass.html#numpy.ma.MaskedArray .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- input_image: ndarray_ or MaskedArray_ Array of shape (m, n) containing the input image. thr: float The threshold used to convert the image into a binary image. n: int The structuring element size [pixels]. Returns ------- input_image: ndarray_ or MaskedArray_ Array of shape (m,n) containing the filtered image. """ if not CV2_IMPORTED: raise MissingOptionalDependency( "opencv package is required for the morphologyEx " "routine but it is not installed" ) input_image = input_image.copy() # Check if a MaskedArray is used. If not, mask the ndarray to_ndarray = False if not isinstance(input_image, MaskedArray): to_ndarray = True input_image = np.ma.masked_invalid(input_image) np.ma.set_fill_value(input_image, input_image.min()) # Convert to binary image field_bin = np.ndarray.astype(input_image.filled() > thr, "uint8") # Build a structuring element of size n kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (n, n)) # Apply morphological opening (i.e. erosion then dilation) field_bin_out = cv2.morphologyEx(field_bin, cv2.MORPH_OPEN, kernel) # Build mask to be applied on the original image mask = (field_bin - field_bin_out) > 0 # Filter out small isolated pixels based on mask input_image[mask] = np.nanmin(input_image) if to_ndarray: input_image = np.array(input_image) return input_image ================================================ FILE: pysteps/utils/interface.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.interface ======================= Interface for the utils module. .. autosummary:: :toctree: ../generated/ get_method """ from . import arrays from . import cleansing from . import conversion from . import dimension from . import fft from . import images from . import interpolate from . import pca from . import reprojection from . import spectral from . import tapering from . import transformation def get_method(name, **kwargs): """ Return a callable function for the utility method corresponding to the given name.\n\ Arrays methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | centred_coord | compute a 2D coordinate array | +-------------------+-----------------------------------------------------+ Cleansing methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | decluster | decluster a set of sparse data points | +-------------------+-----------------------------------------------------+ | detect_outliers | detect outliers in a dataset | +-------------------+-----------------------------------------------------+ Conversion methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | mm/h or rainrate | convert to rain rate [mm/h] | +-------------------+-----------------------------------------------------+ | mm or raindepth | convert to rain depth [mm] | +-------------------+-----------------------------------------------------+ | dbz or | convert to reflectivity [dBZ] | | reflectivity | | +-------------------+-----------------------------------------------------+ Dimension methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | accumulate | aggregate fields in time | +-------------------+-----------------------------------------------------+ | clip | resize the field domain by geographical coordinates | +-------------------+-----------------------------------------------------+ | square | either pad or crop the data to get a square domain | +-------------------+-----------------------------------------------------+ | upscale | upscale the field | +-------------------+-----------------------------------------------------+ FFT methods (wrappers to different implementations): +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | numpy | numpy.fft | +-------------------+-----------------------------------------------------+ | scipy | scipy.fftpack | +-------------------+-----------------------------------------------------+ | pyfftw | pyfftw.interfaces.numpy_fft | +-------------------+-----------------------------------------------------+ Image processing methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | morph_opening | filter small scale noise | +-------------------+-----------------------------------------------------+ Interpolation methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | rbfinterp2d | Radial Basis Function (RBF) interpolation of a | | | (multivariate) array over a 2D grid. | +-------------------+-----------------------------------------------------+ | idwinterp2d | Inverse distance weighting (IDW) interpolation of a | | | (multivariate) array over a 2D grid. | +-------------------+-----------------------------------------------------+ Additional keyword arguments are passed to the initializer of the FFT methods, see utils.fft. Principal component analysis methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | pca_transform | Transform a two-dimensional array into principal | | | component analysis | +-------------------+-----------------------------------------------------+ | pca_backtransform | Transform a given principal component trans- | | | formation back into physical space | +-------------------+-----------------------------------------------------+ Reprojection methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | reproject_grids | Reproject grids to a destination grid. | +-------------------+-----------------------------------------------------+ Spectral methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | rapsd | Compute radially averaged power spectral density | +-------------------+-----------------------------------------------------+ | rm_rdisc | remove the rain / no-rain discontinuity | +-------------------+-----------------------------------------------------+ Tapering methods: +-------------------------------+-----------------------------------------+ | Name | Description | +===============================+=========================================+ | compute_mask_window_function | Compute window function for a | | | two-dimensional area defined by a | | | non-rectangular mask. | +-------------------------------+-----------------------------------------+ | compute_window_function | Compute window function for a | | | two-dimensional rectangular area. | +-------------------------------+-----------------------------------------+ Transformation methods: +-------------------+-----------------------------------------------------+ | Name | Description | +===================+=====================================================+ | boxcox or box-cox | one-parameter Box-Cox transform | +-------------------+-----------------------------------------------------+ | db or decibel | transform to units of decibel | +-------------------+-----------------------------------------------------+ | log | log transform | +-------------------+-----------------------------------------------------+ | nqt | Normal Quantile Transform | +-------------------+-----------------------------------------------------+ | sqrt | square-root transform | +-------------------+-----------------------------------------------------+ """ if name is None: name = "none" name = name.lower() def donothing(R, metadata=None, *args, **kwargs): return R.copy(), {} if metadata is None else metadata.copy() methods_objects = dict() methods_objects["none"] = donothing # arrays methods methods_objects["centred_coord"] = arrays.compute_centred_coord_array # cleansing methods methods_objects["decluster"] = cleansing.decluster methods_objects["detect_outliers"] = cleansing.detect_outliers # conversion methods methods_objects["mm/h"] = conversion.to_rainrate methods_objects["rainrate"] = conversion.to_rainrate methods_objects["mm"] = conversion.to_raindepth methods_objects["raindepth"] = conversion.to_raindepth methods_objects["dbz"] = conversion.to_reflectivity methods_objects["reflectivity"] = conversion.to_reflectivity # dimension methods methods_objects["accumulate"] = dimension.aggregate_fields_time methods_objects["clip"] = dimension.clip_domain methods_objects["square"] = dimension.square_domain methods_objects["upscale"] = dimension.aggregate_fields_space # image processing methods methods_objects["morph_opening"] = images.morph_opening # interpolation methods methods_objects["rbfinterp2d"] = interpolate.rbfinterp2d methods_objects["idwinterp2d"] = interpolate.idwinterp2d # pca methods methods_objects["pca_transform"] = pca.pca_transform methods_objects["pca_backtransform"] = pca.pca_backtransform # reprojection methods methods_objects["reproject_grids"] = reprojection.reproject_grids # spectral methods methods_objects["rapsd"] = spectral.rapsd methods_objects["rm_rdisc"] = spectral.remove_rain_norain_discontinuity # tapering methods methods_objects["compute_mask_window_function"] = ( tapering.compute_mask_window_function ) methods_objects["compute_window_function"] = tapering.compute_window_function # transformation methods methods_objects["boxcox"] = transformation.boxcox_transform methods_objects["box-cox"] = transformation.boxcox_transform methods_objects["db"] = transformation.dB_transform methods_objects["decibel"] = transformation.dB_transform methods_objects["log"] = transformation.boxcox_transform methods_objects["nqt"] = transformation.NQ_transform methods_objects["sqrt"] = transformation.sqrt_transform # FFT methods if name in ["numpy", "pyfftw", "scipy"]: if "shape" not in kwargs.keys(): raise KeyError("mandatory keyword argument shape not given") return _get_fft_method(name, **kwargs) else: try: return methods_objects[name] except KeyError as e: raise ValueError( "Unknown method %s\n" % e + "Supported methods:%s" % str(methods_objects.keys()) ) def _get_fft_method(name, **kwargs): kwargs = kwargs.copy() shape = kwargs["shape"] kwargs.pop("shape") if name == "numpy": return fft.get_numpy(shape, **kwargs) elif name == "scipy": return fft.get_scipy(shape, **kwargs) elif name == "pyfftw": return fft.get_pyfftw(shape, **kwargs) else: raise ValueError( "Unknown method {}\n".format(name) + "The available methods are:" + str(["numpy", "pyfftw", "scipy"]) ) from None ================================================ FILE: pysteps/utils/interpolate.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.interpolate ========================= Interpolation routines for pysteps. .. autosummary:: :toctree: ../generated/ idwinterp2d rbfinterp2d """ import warnings import numpy as np from scipy.interpolate import Rbf from scipy.spatial import cKDTree from scipy.spatial.distance import cdist from pysteps.decorators import memoize, prepare_interpolator @prepare_interpolator() def idwinterp2d( xy_coord, values, xgrid, ygrid, power=0.5, k=20, dist_offset=0.5, **kwargs ): """ Inverse distance weighting interpolation of a sparse (multivariate) array. .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html Parameters ---------- xy_coord: ndarray_ Array of shape (n, 2) containing the coordinates of the data points in a 2-dimensional space. values: ndarray_ Array of shape (n) or (n, m) containing the values of the data points, where *n* is the number of data points and *m* the number of co-located variables. All elements in ``values`` are required to be finite. xgrid, ygrid: ndarray_ 1-D arrays representing the coordinates of the 2-D output grid. power: positive float, optional The power parameter used to compute the distance weights as ``weight = distance ** (-power)``. k: positive int or None, optional The number of nearest neighbours used for each target location. If set to None, it interpolates using all the data points at once. dist_offset: float, optional A small, positive constant that is added to distances to avoid zero values. It has units of pixels. Other Parameters ---------------- {extra_kwargs_doc} Returns ------- output_array: ndarray_ The interpolated field(s) having shape (``ygrid.size``, ``xgrid.size``) or (*m*, ``ygrid.size``, ``xgrid.size``). """ if values.ndim == 1: nvar = 1 values = values[:, None] elif values.ndim == 2: nvar = values.shape[1] npoints = values.shape[0] # generate the target grid xgridv, ygridv = np.meshgrid(xgrid, ygrid) gridv = np.column_stack((xgridv.ravel(), ygridv.ravel())) if k is not None: k = int(np.min((k, npoints))) tree = _cKDTree_cached(xy_coord, hkey=kwargs.get("hkey", None)) dist, inds = tree.query(gridv, k=k) if dist.ndim == 1: dist = dist[..., None] inds = inds[..., None] else: # use all points dist = cdist(xy_coord, gridv, "euclidean").transpose() inds = np.arange(npoints)[None, :] * np.ones((gridv.shape[0], npoints)).astype( int ) # convert geographical distances to number of pixels x_res = np.gradient(xgrid) y_res = np.gradient(ygrid) mean_res = np.mean(np.abs([x_res.mean(), y_res.mean()])) dist /= mean_res # compute distance-based weights dist += dist_offset # avoid zero distances weights = 1 / np.power(dist, power) weights = weights / np.sum(weights, axis=1, keepdims=True) # interpolate output_array = np.sum( values[inds, :] * weights[..., None], axis=1, ) # reshape to final grid size output_array = output_array.reshape(ygrid.size, xgrid.size, nvar) return np.moveaxis(output_array, -1, 0).squeeze() @prepare_interpolator() def rbfinterp2d(xy_coord, values, xgrid, ygrid, **kwargs): """ Radial basis function interpolation of a sparse (multivariate) array. .. _ndarray:\ https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html .. _`scipy.interpolate.Rbf`:\ https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.Rbf.html This method wraps the `scipy.interpolate.Rbf`_ class. Parameters ---------- xy_coord: ndarray_ Array of shape (n, 2) containing the coordinates of the data points in a 2-dimensional space. values: ndarray_ Array of shape (n) or (n, m) containing the values of the data points, where *n* is the number of data points and *m* the number of co-located variables. All values in ``values`` are required to be finite. xgrid, ygrid: ndarray_ 1-D arrays representing the coordinates of the 2-D output grid. Other Parameters ---------------- Any of the parameters from the original `scipy.interpolate.Rbf`_ class. {extra_kwargs_doc} Returns ------- output_array: ndarray_ The interpolated field(s) having shape (``ygrid.size``, ``xgrid.size``) or (*m*, ``ygrid.size``, ``xgrid.size``). """ deprecated_args = ["rbfunction", "k"] deprecated_args = [arg for arg in deprecated_args if arg in list(kwargs.keys())] if deprecated_args: warnings.warn( "rbfinterp2d: The following keyword arguments are deprecated:\n" + str(deprecated_args), DeprecationWarning, ) if values.ndim == 1: kwargs["mode"] = "1-D" else: kwargs["mode"] = "N-D" xgridv, ygridv = np.meshgrid(xgrid, ygrid) rbfi = _Rbf_cached(*np.split(xy_coord, xy_coord.shape[1], 1), values, **kwargs) output_array = rbfi(xgridv, ygridv) return np.moveaxis(output_array, -1, 0).squeeze() @memoize() def _cKDTree_cached(*args, **kwargs): """Add LRU cache to cKDTree class.""" return cKDTree(*args) @memoize() def _Rbf_cached(*args, **kwargs): """Add LRU cache to Rbf class.""" return Rbf(*args, **kwargs) ================================================ FILE: pysteps/utils/pca.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.pca Principal component analysis for pysteps. .. autosummary:: :toctree: ../generated/ pca_transform pca_backtransform """ import numpy as np from pysteps.exceptions import MissingOptionalDependency try: from sklearn import decomposition SKLEARN_IMPORTED = True except ImportError: SKLEARN_IMPORTED = False def pca_transform( forecast_ens: np.ndarray, mask: np.ndarray | None = None, pca_params: dict | None = None, get_params: bool = False, **kwargs: dict, ): """ Transform ensemble forecasts from physical space into principal component (PC) space. Parameters ---------- forecast_ens : np.ndarray Array of shape (n_ens, n_features) containing the ensemble forecasts in physical space. mask : np.ndarray, optional Mask to transform only grid points at which at least 10 ensemble members have forecast precipitation, to fulfill the Lien criterion (Lien et al., 2013) mentioned in Nerini et al., 2019. The default is None. pca_params : dict, optional Preconstructed Principal Component Analysis (PCA) object. If given, this is used instead of fitting a new PCA. The default is None. get_params : bool, optional If True, return the PCA parameters in addition to the transformed data. The default is False. n_components : int Number of principal components to retain. svd_solver : {'auto', 'full', 'covariance_eigh', 'arpack', 'randomized'} Solver to use for the singular value decomposition. For details, see the documentation of ``sklearn.decomposition.PCA``. Returns ------- forecast_ens_pc : np.ndarray Array of shape (n_components, n_ens) containing the ensemble forecasts transformed into PC space. If no mask is given, the full dataset is transformed; otherwise only the mask-filtered values are transformed. pca_params : dict, optional Dictionary containing the PCA parameters, returned if ``get_params=True``. The dictionary has the following keys: principal_components : np.ndarray Array of shape (n_components, n_features) containing the principal component vectors in feature space. mean : np.ndarray Array of shape (n_features,) containing the per-feature empirical mean estimated from the input data. explained_variance : np.ndarray Array of shape (n_features,) containing the per-feature explained variance ratio. """ # Test import of sklean if not SKLEARN_IMPORTED: raise MissingOptionalDependency( "scikit-learn package is required for principal component analysis " "but it is not installed" ) # Input data have to be two-dimensional if forecast_ens.ndim != 2: raise ValueError("Input array should be two-dimensional!") if pca_params is None: # Check whether n_components and svd_solver are given as keyword arguments n_components = kwargs.get("n_components", forecast_ens.shape[0]) svd_solver = kwargs.get("svd_solver", "full") # Initialize PCA and fit it to the input data pca = decomposition.PCA(n_components=n_components, svd_solver=svd_solver) pca.fit(forecast_ens) # Create output dictionary and save principal components and mean pca_params = {} pca_params["principal_components"] = pca.components_ pca_params["mean"] = pca.mean_ pca_params["explained_variance"] = pca.explained_variance_ratio_ else: # If output dict is given, check whether principal components and mean are included if not "principal_components" in pca_params.keys(): raise KeyError("Output is not None but has no key 'principal_components'!") if not "mean" in pca_params.keys(): raise KeyError("Output is not None but has no key 'mean'!") # Check whether PC and mean have the correct shape if forecast_ens.shape[1] != len(pca_params["mean"]): raise ValueError("pca mean has not the same length as the input array!") if forecast_ens.shape[1] != pca_params["principal_components"].shape[1]: raise ValueError( "principal components have not the same length as the input array" ) # If no mask is given, transform the full input data into PC space. if mask is None: forecast_ens_pc = np.dot( (forecast_ens - pca_params["mean"]), pca_params["principal_components"].T ) else: forecast_ens_pc = np.dot( (forecast_ens[:, mask] - pca_params["mean"][mask]), pca_params["principal_components"][:, mask].T, ) if get_params: return forecast_ens_pc, pca_params else: return forecast_ens_pc def pca_backtransform(forecast_ens_pc: np.ndarray, pca_params: dict): """ Reconstruct ensemble forecasts from principal component (PC) space back into physical space. Parameters ---------- forecast_ens_pc : np.ndarray Array of shape (n_components, n_ens) containing the ensemble forecasts represented in PC space. pca_params : dict Parameters of the PCA transformation. The dictionary contains the following keys: principal_components : np.ndarray Array of shape (n_components, n_features) containing the principal component vectors in feature space. mean : np.ndarray Array of shape (n_features,) containing the per-feature empirical mean estimated from the training data. Returns ------- forecast_ens : np.ndarray Array of shape (n_ens, n_features) containing the ensemble forecasts reconstructed in physical space. """ # If output dict is given, check whether principal components and mean are included if not "principal_components" in pca_params.keys(): raise KeyError("Output is not None but has no key 'principal_components'!") if not "mean" in pca_params.keys(): raise KeyError("Output is not None but has no key 'mean'!") # Check whether PC and forecast_ens_pc have the correct shape if forecast_ens_pc.shape[1] != pca_params["principal_components"].shape[0]: raise ValueError("pca mean has not the same length as the input array!") # Transform forecast_ens_pc back into physical space. forecast_ens = ( np.dot(forecast_ens_pc, pca_params["principal_components"]) + pca_params["mean"] ) return forecast_ens ================================================ FILE: pysteps/utils/reprojection.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.reprojection ========================== Reprojection tools to reproject grids and adjust the grid cell size of an input field to a destination field. .. autosummary:: :toctree: ../generated/ reproject_grids """ from pysteps.exceptions import MissingOptionalDependency from scipy.interpolate import griddata import numpy as np try: from rasterio import Affine as A from rasterio.warp import reproject, Resampling RASTERIO_IMPORTED = True except ImportError: RASTERIO_IMPORTED = False try: import pyproj PYPROJ_IMPORTED = True except ImportError: PYPROJ_IMPORTED = False def reproject_grids(src_array, dst_array, metadata_src, metadata_dst): """ Reproject precipitation fields to the domain of another precipitation field. Parameters ---------- src_array: array-like Three-dimensional array of shape (t, x, y) containing a time series of precipitation fields. These precipitation fields will be reprojected. dst_array: array-like Array containing a precipitation field or a time series of precipitation fields. The src_array will be reprojected to the domain of dst_array. metadata_src: dict Metadata dictionary containing the projection, x- and ypixelsize, x1 and y2 attributes of the src_array as described in the documentation of :py:mod:`pysteps.io.importers`. metadata_dst: dict Metadata dictionary containing the projection, x- and ypixelsize, x1 and y2 attributes of the dst_array. Returns ------- r_rprj: array-like Three-dimensional array of shape (t, x, y) containing the precipitation fields of src_array, but reprojected to the domain of dst_array. metadata: dict Metadata dictionary containing the projection, x- and ypixelsize, x1 and y2 attributes of the reprojected src_array. """ if not RASTERIO_IMPORTED: raise MissingOptionalDependency( "rasterio package is required for the reprojection module, but it is " "not installed" ) # Extract the grid info from src_array src_crs = metadata_src["projection"] x1_src = metadata_src["x1"] y2_src = metadata_src["y2"] xpixelsize_src = metadata_src["xpixelsize"] ypixelsize_src = metadata_src["ypixelsize"] src_transform = A.translation(float(x1_src), float(y2_src)) * A.scale( float(xpixelsize_src), float(-ypixelsize_src) ) # Extract the grid info from dst_array dst_crs = metadata_dst["projection"] x1_dst = metadata_dst["x1"] y2_dst = metadata_dst["y2"] xpixelsize_dst = metadata_dst["xpixelsize"] ypixelsize_dst = metadata_dst["ypixelsize"] dst_transform = A.translation(float(x1_dst), float(y2_dst)) * A.scale( float(xpixelsize_dst), float(-ypixelsize_dst) ) # Initialise the reprojected array r_rprj = np.zeros((src_array.shape[0], dst_array.shape[-2], dst_array.shape[-1])) # For every timestep, reproject the precipitation field of src_array to # the domain of dst_array if metadata_src["yorigin"] != metadata_dst["yorigin"]: src_array = src_array[:, ::-1, :] for i in range(src_array.shape[0]): reproject( src_array[i, :, :], r_rprj[i, :, :], src_transform=src_transform, src_crs=src_crs, dst_transform=dst_transform, dst_crs=dst_crs, resampling=Resampling.nearest, dst_nodata=np.nan, ) # Update the metadata metadata = metadata_src.copy() for key in [ "projection", "yorigin", "xpixelsize", "ypixelsize", "x1", "x2", "y1", "y2", "cartesian_unit", ]: metadata[key] = metadata_dst[key] return r_rprj, metadata def unstructured2regular(src_array, metadata_src, metadata_dst): """ Reproject unstructured data onto a regular grid on the assumption that both src data and dst grid have the same projection. Parameters ---------- src_array: np.ndarray Three-dimensional array of shape (t, n_ens, n_gridcells) containing a time series of precipitation ensemble forecasts. These precipitation fields will be reprojected. metadata_src: dict Metadata dictionary containing the projection, clon, clat, and ngridcells and attributes of the src_array as described in the documentation of :py:mod:`pysteps.io.importers`. metadata_dst: dict Metadata dictionary containing the projection, x- and ypixelsize, x1 and y2 attributes of the dst_array. Returns ------- tuple A tuple containing: - r_rprj: np.ndarray Four dimensional array of shape (t, n_ens, x, y) containing the precipitation fields of src_array, but reprojected to the grid of dst_array. - metadata: dict Dictionary containing geospatial metadat such as: - 'projection' : PROJ.4 string defining the stereographic projection. - 'xpixelsize', 'ypixelsize': Pixel size in meters. - 'x1', 'y1': Carthesian coordinates of the lower-left corner. - 'x2', 'y2': Carthesian coordinates of the upper-right corner. - 'cartesian_unit': Unit of the coordinate system (meters). """ if not PYPROJ_IMPORTED: raise MissingOptionalDependency( "pyproj package is required to reproject DWD's NWP data" "but it is not installed" ) if not "clon" in metadata_src.keys(): raise KeyError("Center longitude (clon) is missing in metadata_src") if not "clat" in metadata_src.keys(): raise KeyError("Center latitude (clat) is missing in metadata_src") # Get number of grid cells Nc = metadata_src["clon"].shape[0] ic_in = np.arange(Nc) # Get cartesian coordinates of destination grid x_dst = np.arange( np.float32(metadata_dst["x1"]), np.float32(metadata_dst["x2"]), metadata_dst["xpixelsize"], ) y_dst = np.arange( np.float32(metadata_dst["y1"]), np.float32(metadata_dst["y2"]), metadata_dst["ypixelsize"], ) # Create destination grid if metadata_dst["yorigin"] == "upper": y_dst = y_dst[::-1] xx_dst, yy_dst = np.meshgrid(x_dst, y_dst) s_out = yy_dst.shape # Extract the grid info of src_array assuming the same projection of src and dst pr = pyproj.Proj(metadata_dst["projection"]) x_src, y_src = pr(metadata_src["clon"], metadata_src["clat"]) # Create array of x-y pairs for interpolation P_in = np.stack((x_src, y_src)).T P_out = np.array((xx_dst.flatten(), yy_dst.flatten())).T # Nearest neighbor interpolation of x-y pairs ic_out = ( griddata(P_in, ic_in.flatten(), P_out, method="nearest") .reshape(s_out) .astype(int) ) # Apply interpolation on all time steps and ensemble members r_rprj = np.array( [ [src_array[i, j][ic_out] for j in range(src_array.shape[1])] for i in range(src_array.shape[0]) ] ) # Update the src metadata metadata = metadata_src.copy() for key in [ "projection", "yorigin", "xpixelsize", "ypixelsize", "x1", "x2", "y1", "y2", "cartesian_unit", ]: metadata[key] = metadata_dst[key] return r_rprj, metadata ================================================ FILE: pysteps/utils/spectral.py ================================================ """ pysteps.utils.spectral ====================== Utility methods for processing and analyzing precipitation fields in the Fourier domain. .. autosummary:: :toctree: ../generated/ corrcoef mean rapsd remove_rain_norain_discontinuity std """ import numpy as np from . import arrays def corrcoef(X, Y, shape, use_full_fft=False): """ Compute the correlation coefficient between two-dimensional arrays in the spectral domain. Parameters ---------- X: array_like A complex array representing the Fourier transform of a two-dimensional array. Y: array_like A complex array representing the Fourier transform of a two-dimensional array. shape: tuple A two-element tuple specifying the shape of the original input arrays in the spatial domain. use_full_fft: bool If True, X and Y represent the full FFTs of the original arrays. Otherwise, they are assumed to contain only the symmetric part, i.e. in the format returned by numpy.fft.rfft2. Returns ------- out: float The correlation coefficient. Gives the same result as numpy.corrcoef(X.flatten(), Y.flatten())[0, 1]. """ if len(X.shape) != 2: raise ValueError("X is not a two-dimensional array") if len(Y.shape) != 2: raise ValueError("Y is not a two-dimensional array") if X.shape != Y.shape: raise ValueError( "dimension mismatch between X and Y: " + "X.shape=%d,%d , " % (X.shape[0], X.shape[1]) + "Y.shape=%d,%d" % (Y.shape[0], Y.shape[1]) ) n = np.real(np.sum(X * np.conj(Y))) - np.real(X[0, 0] * Y[0, 0]) d1 = np.sum(np.abs(X) ** 2) - np.real(X[0, 0]) ** 2 d2 = np.sum(np.abs(Y) ** 2) - np.real(Y[0, 0]) ** 2 if not use_full_fft: if shape[1] % 2 == 1: n += np.real(np.sum(X[:, 1:] * np.conj(Y[:, 1:]))) d1 += np.sum(np.abs(X[:, 1:]) ** 2) d2 += np.sum(np.abs(Y[:, 1:]) ** 2) else: n += np.real(np.sum(X[:, 1:-1] * np.conj(Y[:, 1:-1]))) d1 += np.sum(np.abs(X[:, 1:-1]) ** 2) d2 += np.sum(np.abs(Y[:, 1:-1]) ** 2) return n / np.sqrt(d1 * d2) def mean(X, shape): """ Compute the mean value of a two-dimensional array in the spectral domain. Parameters ---------- X: array_like A complex array representing the Fourier transform of a two-dimensional array. shape: tuple A two-element tuple specifying the shape of the original input array in the spatial domain. Returns ------- out: float The mean value. """ return np.real(X[0, 0]) / (shape[0] * shape[1]) def rapsd( field, fft_method=None, return_freq=False, d=1.0, normalize=False, **fft_kwargs ): """ Compute radially averaged power spectral density (RAPSD) from the given 2D input field. Parameters ---------- field: array_like A 2d array of shape (m, n) containing the input field. fft_method: object A module or object implementing the same methods as numpy.fft and scipy.fftpack. If set to None, field is assumed to represent the shifted discrete Fourier transform of the input field, where the origin is at the center of the array (see numpy.fft.fftshift or scipy.fftpack.fftshift). return_freq: bool Whether to also return the Fourier frequencies. d: scalar Sample spacing (inverse of the sampling rate). Defaults to 1. Applicable if return_freq is 'True'. normalize: bool If True, normalize the power spectrum so that it sums to one. Returns ------- out: ndarray One-dimensional array containing the RAPSD. The length of the array is int(l/2) (if l is even) or int(l/2)+1 (if l is odd), where l=max(m,n). freq: ndarray One-dimensional array containing the Fourier frequencies. References ---------- :cite:`RC2011` """ if len(field.shape) != 2: raise ValueError( f"{len(field.shape)} dimensions are found, but the number " "of dimensions should be 2" ) if np.sum(np.isnan(field)) > 0: raise ValueError("input field should not contain nans") m, n = field.shape yc, xc = arrays.compute_centred_coord_array(m, n) r_grid = np.sqrt(xc * xc + yc * yc).round() l = max(field.shape[0], field.shape[1]) if l % 2 == 1: r_range = np.arange(0, int(l / 2) + 1) else: r_range = np.arange(0, int(l / 2)) if fft_method is not None: psd = fft_method.fftshift(fft_method.fft2(field, **fft_kwargs)) psd = np.abs(psd) ** 2 / psd.size else: psd = field result = [] for r in r_range: mask = r_grid == r psd_vals = psd[mask] result.append(np.mean(psd_vals)) result = np.array(result) if normalize: result /= np.sum(result) if return_freq: freq = np.fft.fftfreq(l, d=d) freq = freq[r_range] return result, freq else: return result def remove_rain_norain_discontinuity(R): """Function to remove the rain/no-rain discontinuity. It can be used before computing Fourier filters to reduce the artificial increase of power at high frequencies caused by the discontinuity. Parameters ---------- R: array-like Array of any shape to be transformed. Returns ------- R: array-like Array of any shape containing the transformed data. """ R = R.copy() zerovalue = np.nanmin(R) threshold = np.nanmin(R[R > zerovalue]) R[R > zerovalue] -= threshold - zerovalue R -= np.nanmin(R) return R def std(X, shape, use_full_fft=False): """ Compute the standard deviation of a two-dimensional array in the spectral domain. Parameters ---------- X: array_like A complex array representing the Fourier transform of a two-dimensional array. shape: tuple A two-element tuple specifying the shape of the original input array in the spatial domain. use_full_fft: bool If True, X represents the full FFT of the original array. Otherwise, it is assumed to contain only the symmetric part, i.e. in the format returned by numpy.fft.rfft2. Returns ------- out: float The standard deviation. """ res = np.sum(np.abs(X) ** 2) - np.real(X[0, 0]) ** 2 if not use_full_fft: if shape[1] % 2 == 1: res += np.sum(np.abs(X[:, 1:]) ** 2) else: res += np.sum(np.abs(X[:, 1:-1]) ** 2) return np.sqrt(res / (shape[0] * shape[1]) ** 2) ================================================ FILE: pysteps/utils/tapering.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.tapering ====================== Implementations of window functions for computing of the FFT. .. autosummary:: :toctree: ../generated/ compute_mask_window_function compute_window_function """ import numpy as np from scipy.spatial import cKDTree def compute_mask_window_function(mask, func, **kwargs): """ Compute window function for a two-dimensional area defined by a non-rectangular mask. The window function is computed based on the distance to the nearest boundary point of the mask. Window function-specific parameters are given as keyword arguments. Parameters ---------- mask: array_like Two-dimensional boolean array containing the mask. Pixels with True/False are inside/outside the mask. func: str The name of the window function. The currently implemented function is 'tukey'. Returns ------- out: array Array containing the tapering weights. """ R = _compute_mask_distances(mask) if func == "hann": raise NotImplementedError("Hann function has not been implemented") elif func == "tukey": r_max = kwargs.get("r_max", 10.0) return _tukey_masked(R, r_max, np.isfinite(R)) else: raise ValueError("invalid window function '%s'" % func) def compute_window_function(m, n, func, **kwargs): """ Compute window function for a two-dimensional rectangular region. Window function-specific parameters are given as keyword arguments. Parameters ---------- m: int Height of the array. n: int Width of the array. func: str The name of the window function. The currently implemented functions are 'hann' and 'tukey'. Other Parameters ---------------- alpha: float Applicable if func is 'tukey'. Notes ----- Two-dimensional tapering weights are computed from one-dimensional window functions using w(r), where r is the distance from the center of the region. Returns ------- out: array Array of shape (m, n) containing the tapering weights. """ X, Y = np.meshgrid(np.arange(n), np.arange(m)) R = np.sqrt(((X / n) - 0.5) ** 2 + ((Y / m) - 0.5) ** 2) if func == "hann": return _hann(R) elif func == "tukey": alpha = kwargs.get("alpha", 0.2) return _tukey(R, alpha) else: raise ValueError("invalid window function '%s'" % func) def _compute_mask_distances(mask): X, Y = np.meshgrid(np.arange(mask.shape[1]), np.arange(mask.shape[0])) tree = cKDTree(np.vstack([X[~mask], Y[~mask]]).T) r, i = tree.query(np.vstack([X[mask], Y[mask]]).T, k=1) R = np.ones(mask.shape) * np.nan R[Y[mask], X[mask]] = r return R def _hann(R): W = np.ones_like(R) mask = R > 0.5 W[mask] = 0.0 W[~mask] = 0.5 * (1.0 - np.cos(2.0 * np.pi * (R[~mask] + 0.5))) return W def _tukey(R, alpha): W = np.ones_like(R) mask1 = R < 0.5 mask2 = R > 0.5 * (1.0 - alpha) mask = np.logical_and(mask1, mask2) W[mask] = 0.5 * ( 1.0 + np.cos(np.pi * (R[mask] / (alpha * 0.5) - 1.0 / alpha + 1.0)) ) mask = R >= 0.5 W[mask] = 0.0 return W def _tukey_masked(R, r_max, mask): W = np.ones_like(R) mask_r = R < r_max mask_ = np.logical_and(mask, mask_r) W[mask_] = 0.5 * (1.0 + np.cos(np.pi * (R[mask_] / r_max - 1.0))) W[~mask] = np.nan return W ================================================ FILE: pysteps/utils/transformation.py ================================================ # -*- coding: utf-8 -*- """ pysteps.utils.transformation ============================ Methods for transforming data values. .. autosummary:: :toctree: ../generated/ boxcox_transform dB_transform NQ_transform sqrt_transform """ import numpy as np import scipy.stats as scipy_stats import warnings from scipy.interpolate import interp1d warnings.filterwarnings( "ignore", category=RuntimeWarning ) # To deactivate warnings for comparison operators with NaNs def boxcox_transform( R, metadata=None, Lambda=None, threshold=None, zerovalue=None, inverse=False ): """ The one-parameter Box-Cox transformation. The Box-Cox transform is a well-known power transformation introduced by Box and Cox (1964). In its one-parameter version, the Box-Cox transform takes the form T(x) = ln(x) for Lambda = 0, or T(x) = (x**Lambda - 1)/Lambda otherwise. Default parameters will produce a log transform (i.e. Lambda=0). Parameters ---------- R: array-like Array of any shape to be transformed. metadata: dict, optional Metadata dictionary containing the transform, zerovalue and threshold attributes as described in the documentation of :py:mod:`pysteps.io.importers`. Lambda: float, optional Parameter Lambda of the Box-Cox transformation. It is 0 by default, which produces the log transformation. Choose Lambda < 1 for positively skewed data, Lambda > 1 for negatively skewed data. threshold: float, optional The value that is used for thresholding with the same units as R. If None, the threshold contained in metadata is used. If no threshold is found in the metadata, a value of 0.1 is used as default. zerovalue: float, optional The value to be assigned to no rain pixels as defined by the threshold. It is equal to the threshold - 1 by default. inverse: bool, optional If set to True, it performs the inverse transform. False by default. Returns ------- R: array-like Array of any shape containing the (back-)transformed units. metadata: dict The metadata with updated attributes. References ---------- Box, G. E. and Cox, D. R. (1964), An Analysis of Transformations. Journal of the Royal Statistical Society: Series B (Methodological), 26: 211-243. doi:10.1111/j.2517-6161.1964.tb00553.x """ R = R.copy() if metadata is None: if inverse: metadata = {"transform": "BoxCox"} else: metadata = {"transform": None} else: metadata = metadata.copy() if not inverse: if metadata["transform"] == "BoxCox": return R, metadata if Lambda is None: Lambda = metadata.get("BoxCox_lambda", 0.0) if threshold is None: threshold = metadata.get("threshold", 0.1) zeros = R < threshold # Apply Box-Cox transform if Lambda == 0.0: R[~zeros] = np.log(R[~zeros]) threshold = np.log(threshold) else: R[~zeros] = (R[~zeros] ** Lambda - 1) / Lambda threshold = (threshold**Lambda - 1) / Lambda # Set value for zeros if zerovalue is None: zerovalue = threshold - 1 # TODO: set to a more meaningful value R[zeros] = zerovalue metadata["transform"] = "BoxCox" metadata["BoxCox_lambda"] = Lambda metadata["zerovalue"] = zerovalue metadata["threshold"] = threshold elif inverse: if metadata["transform"] not in ["BoxCox", "log"]: return R, metadata if Lambda is None: Lambda = metadata.pop("BoxCox_lambda", 0.0) if threshold is None: threshold = metadata.get("threshold", -10.0) if zerovalue is None: zerovalue = 0.0 # Apply inverse Box-Cox transform if Lambda == 0.0: R = np.exp(R) threshold = np.exp(threshold) else: R = np.exp(np.log(Lambda * R + 1) / Lambda) threshold = np.exp(np.log(Lambda * threshold + 1) / Lambda) R[R < threshold] = zerovalue metadata["transform"] = None metadata["zerovalue"] = zerovalue metadata["threshold"] = threshold return R, metadata def dB_transform(R, metadata=None, threshold=None, zerovalue=None, inverse=False): """Methods to transform precipitation intensities to/from dB units. Parameters ---------- R: array-like Array of any shape to be (back-)transformed. metadata: dict, optional Metadata dictionary containing the transform, zerovalue and threshold attributes as described in the documentation of :py:mod:`pysteps.io.importers`. threshold: float, optional Optional value that is used for thresholding with the same units as R. If None, the threshold contained in metadata is used. If no threshold is found in the metadata, a value of 0.1 is used as default. zerovalue: float, optional The value to be assigned to no rain pixels as defined by the threshold. It is equal to the threshold - 1 by default. inverse: bool, optional If set to True, it performs the inverse transform. False by default. Returns ------- R: array-like Array of any shape containing the (back-)transformed units. metadata: dict The metadata with updated attributes. """ R = R.copy() if metadata is None: if inverse: metadata = {"transform": "dB"} else: metadata = {"transform": None} else: metadata = metadata.copy() # to dB units if not inverse: if metadata["transform"] == "dB": return R, metadata if threshold is None: threshold = metadata.get("threshold", 0.1) zeros = R < threshold # Convert to dB R[~zeros] = 10.0 * np.log10(R[~zeros]) threshold = 10.0 * np.log10(threshold) # Set value for zeros if zerovalue is None: zerovalue = threshold - 5 # TODO: set to a more meaningful value R[zeros] = zerovalue metadata["transform"] = "dB" metadata["zerovalue"] = zerovalue metadata["threshold"] = threshold return R, metadata # from dB units elif inverse: if metadata["transform"] != "dB": return R, metadata if threshold is None: threshold = metadata.get("threshold", -10.0) if zerovalue is None: zerovalue = 0.0 R = 10.0 ** (R / 10.0) threshold = 10.0 ** (threshold / 10.0) R[R < threshold] = zerovalue metadata["transform"] = None metadata["threshold"] = threshold metadata["zerovalue"] = zerovalue return R, metadata def NQ_transform(R, metadata=None, inverse=False, **kwargs): """ The normal quantile transformation as in Bogner et al (2012). Zero rain vales are set to zero in norm space. Parameters ---------- R: array-like Array of any shape to be transformed. metadata: dict, optional Metadata dictionary containing the transform, zerovalue and threshold attributes as described in the documentation of :py:mod:`pysteps.io.importers`. inverse: bool, optional If set to True, it performs the inverse transform. False by default. Other Parameters ---------------- a: float, optional The offset fraction to be used for plotting positions; typically in (0,1). The default is 0., that is, it spaces the points evenly in the uniform distribution. Returns ------- R: array-like Array of any shape containing the (back-)transformed units. metadata: dict The metadata with updated attributes. References ---------- Bogner, K., Pappenberger, F., and Cloke, H. L.: Technical Note: The normal quantile transformation and its application in a flood forecasting system, Hydrol. Earth Syst. Sci., 16, 1085-1094, https://doi.org/10.5194/hess-16-1085-2012, 2012. """ # defaults a = kwargs.get("a", 0.0) R = R.copy() shape0 = R.shape R = R.ravel().astype(float) idxNan = np.isnan(R) R_ = R[~idxNan] if metadata is None: if inverse: metadata = {"transform": "NQT"} else: metadata = {"transform": None} metadata["zerovalue"] = np.min(R_) else: metadata = metadata.copy() if not inverse: # Plotting positions # https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot#Plotting_position n = R_.size Rpp = ((np.arange(n) + 1 - a) / (n + 1 - 2 * a)).reshape(R_.shape) # NQ transform Rqn = scipy_stats.norm.ppf(Rpp) R__ = np.interp(R_, R_[np.argsort(R_)], Rqn) # set zero rain to 0 in norm space R__[R[~idxNan] == metadata["zerovalue"]] = 0 # build inverse transform metadata["inqt"] = interp1d( Rqn, R_[np.argsort(R_)], bounds_error=False, fill_value=(R_.min(), R_.max()) ) metadata["transform"] = "NQT" metadata["zerovalue"] = 0 metadata["threshold"] = R__[R__ > 0].min() else: f = metadata.pop("inqt") R__ = f(R_) metadata["transform"] = None metadata["zerovalue"] = R__.min() metadata["threshold"] = R__[R__ > R__.min()].min() R[~idxNan] = R__ return R.reshape(shape0), metadata def sqrt_transform(R, metadata=None, inverse=False, **kwargs): """ Square-root transform. Parameters ---------- R: array-like Array of any shape to be transformed. metadata: dict, optional Metadata dictionary containing the transform, zerovalue and threshold attributes as described in the documentation of :py:mod:`pysteps.io.importers`. inverse: bool, optional If set to True, it performs the inverse transform. False by default. Returns ------- R: array-like Array of any shape containing the (back-)transformed units. metadata: dict The metadata with updated attributes. """ R = R.copy() if metadata is None: if inverse: metadata = {"transform": "sqrt"} else: metadata = {"transform": None} metadata["zerovalue"] = np.nan metadata["threshold"] = np.nan else: metadata = metadata.copy() if not inverse: # sqrt transform R = np.sqrt(R) metadata["transform"] = "sqrt" metadata["zerovalue"] = np.sqrt(metadata["zerovalue"]) metadata["threshold"] = np.sqrt(metadata["threshold"]) else: # inverse sqrt transform R = R**2 metadata["transform"] = None metadata["zerovalue"] = metadata["zerovalue"] ** 2 metadata["threshold"] = metadata["threshold"] ** 2 return R, metadata ================================================ FILE: pysteps/verification/__init__.py ================================================ # -- coding: utf-8 -- """Methods for verification of deterministic, probabilistic and ensemble forecasts.""" from .interface import get_method from .detcatscores import * from .detcontscores import * from .ensscores import * from .plots import * from .probscores import * from .spatialscores import * from .salscores import * ================================================ FILE: pysteps/verification/detcatscores.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.detcatscores ================================= Forecast evaluation and skill scores for deterministic categorial (dichotomous) forecasts. .. autosummary:: :toctree: ../generated/ det_cat_fct det_cat_fct_init det_cat_fct_accum det_cat_fct_merge det_cat_fct_compute """ import collections import numpy as np def det_cat_fct(pred, obs, thr, scores="", axis=None): """ Calculate simple and skill scores for deterministic categorical (dichotomous) forecasts. Parameters ---------- pred: array_like Array of predictions. NaNs are ignored. obs: array_like Array of verifying observations. NaNs are ignored. thr: float The threshold that is applied to predictions and observations in order to define events vs no events (yes/no). scores: {string, list of strings}, optional The name(s) of the scores. The default, scores="", will compute all available scores. The available score names are: .. tabularcolumns:: |p{2cm}|L| +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | ACC | accuracy (proportion correct) | +------------+--------------------------------------------------------+ | BIAS | frequency bias | +------------+--------------------------------------------------------+ | CSI | critical success index (threat score) | +------------+--------------------------------------------------------+ | ETS | equitable threat score | +------------+--------------------------------------------------------+ | F1 | the harmonic mean of precision and sensitivity | +------------+--------------------------------------------------------+ | FA | false alarm rate (prob. of false detection, fall-out, | | | false positive rate) | +------------+--------------------------------------------------------+ | FAR | false alarm ratio (false discovery rate) | +------------+--------------------------------------------------------+ | GSS | Gilbert skill score (equitable threat score) | +------------+--------------------------------------------------------+ | HK | Hanssen-Kuipers discriminant (Pierce skill score) | +------------+--------------------------------------------------------+ | HSS | Heidke skill score | +------------+--------------------------------------------------------+ | MCC | Matthews correlation coefficient | +------------+--------------------------------------------------------+ | POD | probability of detection (hit rate, sensitivity, | | | recall, true positive rate) | +------------+--------------------------------------------------------+ | SEDI | symmetric extremal dependency index | +------------+--------------------------------------------------------+ axis: None or int or tuple of ints, optional Axis or axes along which a score is integrated. The default, axis=None, will integrate all of the elements of the input arrays.\n If axis is -1 (or any negative integer), the integration is not performed and scores are computed on all of the elements in the input arrays.\n If axis is a tuple of ints, the integration is performed on all of the axes specified in the tuple. Returns ------- result: dict Dictionary containing the verification results. See also -------- pysteps.verification.detcontscores.det_cont_fct """ contab = det_cat_fct_init(thr, axis) det_cat_fct_accum(contab, pred, obs) return det_cat_fct_compute(contab, scores) def det_cat_fct_init(thr, axis=None): """ Initialize a contingency table object. Parameters ---------- thr: float threshold that is applied to predictions and observations in order to define events vs no events (yes/no). axis: None or int or tuple of ints, optional Axis or axes along which a score is integrated. The default, axis=None, will integrate all of the elements of the input arrays.\n If axis is -1 (or any negative integer), the integration is not performed and scores are computed on all of the elements in the input arrays.\n If axis is a tuple of ints, the integration is performed on all of the axes specified in the tuple. Returns ------- out: dict The contingency table object. """ contab = {} # catch case of axis passed as integer def get_iterable(x): if x is None or ( isinstance(x, collections.abc.Iterable) and not isinstance(x, int) ): return x else: return (x,) contab["thr"] = thr contab["axis"] = get_iterable(axis) contab["hits"] = None contab["false_alarms"] = None contab["misses"] = None contab["correct_negatives"] = None return contab def det_cat_fct_accum(contab, pred, obs): """Accumulate the frequency of "yes" and "no" forecasts and observations in the contingency table. Parameters ---------- contab: dict A contingency table object initialized with pysteps.verification.detcatscores.det_cat_fct_init. pred: array_like Array of predictions. NaNs are ignored. obs: array_like Array of verifying observations. NaNs are ignored. """ pred = np.asarray(pred.copy()) obs = np.asarray(obs.copy()) axis = tuple(range(pred.ndim)) if contab["axis"] is None else contab["axis"] # checks if pred.shape != obs.shape: raise ValueError( "the shape of pred does not match the shape of obs %s!=%s" % (pred.shape, obs.shape) ) if pred.ndim <= np.max(axis): raise ValueError( "axis %d is out of bounds for array of dimension %d" % (np.max(axis), len(pred.shape)) ) idims = [dim not in axis for dim in range(pred.ndim)] nshape = tuple(np.array(pred.shape)[np.array(idims)]) if contab["hits"] is None: # initialize the count arrays in the contingency table contab["hits"] = np.zeros(nshape, dtype=int) contab["false_alarms"] = np.zeros(nshape, dtype=int) contab["misses"] = np.zeros(nshape, dtype=int) contab["correct_negatives"] = np.zeros(nshape, dtype=int) else: # check dimensions if contab["hits"].shape != nshape: raise ValueError( "the shape of the input arrays does not match " + "the shape of the " + "contingency table %s!=%s" % (nshape, contab["hits"].shape) ) # add dummy axis in case integration is not required if np.max(axis) < 0: pred = pred[None, :] obs = obs[None, :] axis = (0,) axis = tuple([a for a in axis if a >= 0]) # apply threshold predb = pred > contab["thr"] obsb = obs > contab["thr"] # calculate hits, misses, false positives, correct rejects H_idx = np.logical_and(predb == 1, obsb == 1) F_idx = np.logical_and(predb == 1, obsb == 0) M_idx = np.logical_and(predb == 0, obsb == 1) R_idx = np.logical_and(predb == 0, obsb == 0) # accumulate in the contingency table contab["hits"] += np.nansum(H_idx.astype(int), axis=axis) contab["misses"] += np.nansum(M_idx.astype(int), axis=axis) contab["false_alarms"] += np.nansum(F_idx.astype(int), axis=axis) contab["correct_negatives"] += np.nansum(R_idx.astype(int), axis=axis) def det_cat_fct_merge(contab_1, contab_2): """ Merge two contingency table objects. Parameters ---------- contab_1: dict A contingency table object initialized with :py:func:`pysteps.verification.detcatscores.det_cat_fct_init` and populated with :py:func:`pysteps.verification.detcatscores.det_cat_fct_accum`. contab_2: dict Another contingency table object initialized with :py:func:`pysteps.verification.detcatscores.det_cat_fct_init` and populated with :py:func:`pysteps.verification.detcatscores.det_cat_fct_accum`. Returns ------- out: dict The merged contingency table object. """ # checks if contab_1["thr"] != contab_2["thr"]: raise ValueError( "cannot merge: the thresholds are not same %s!=%s" % (contab_1["thr"], contab_2["thr"]) ) if contab_1["axis"] != contab_2["axis"]: raise ValueError( "cannot merge: the axis are not same %s!=%s" % (contab_1["axis"], contab_2["axis"]) ) if contab_1["hits"] is None or contab_2["hits"] is None: raise ValueError("cannot merge: no data found") # merge the contingency tables contab = contab_1.copy() contab["hits"] += contab_2["hits"] contab["misses"] += contab_2["misses"] contab["false_alarms"] += contab_2["false_alarms"] contab["correct_negatives"] += contab_2["correct_negatives"] return contab def det_cat_fct_compute(contab, scores=""): """ Compute simple and skill scores for deterministic categorical (dichotomous) forecasts from a contingency table object. Parameters ---------- contab: dict A contingency table object initialized with pysteps.verification.detcatscores.det_cat_fct_init and populated with pysteps.verification.detcatscores.det_cat_fct_accum. scores: {string, list of strings}, optional The name(s) of the scores. The default, scores="", will compute all available scores. The available score names a .. tabularcolumns:: |p{2cm}|L| +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | ACC | accuracy (proportion correct) | +------------+--------------------------------------------------------+ | BIAS | frequency bias | +------------+--------------------------------------------------------+ | CSI | critical success index (threat score) | +------------+--------------------------------------------------------+ | ETS | equitable threat score | +------------+--------------------------------------------------------+ | F1 | the harmonic mean of precision and sensitivity | +------------+--------------------------------------------------------+ | FA | false alarm rate (prob. of false detection, fall-out, | | | false positive rate) | +------------+--------------------------------------------------------+ | FAR | false alarm ratio (false discovery rate) | +------------+--------------------------------------------------------+ | GSS | Gilbert skill score (equitable threat score) | +------------+--------------------------------------------------------+ | HK | Hanssen-Kuipers discriminant (Pierce skill score) | +------------+--------------------------------------------------------+ | HSS | Heidke skill score | +------------+--------------------------------------------------------+ | MCC | Matthews correlation coefficient | +------------+--------------------------------------------------------+ | POD | probability of detection (hit rate, sensitivity, | | | recall, true positive rate) | +------------+--------------------------------------------------------+ | SEDI | symmetric extremal dependency index | +------------+--------------------------------------------------------+ Returns ------- result: dict Dictionary containing the verification results. """ # catch case of single score passed as string def get_iterable(x): if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): return x else: return (x,) scores = get_iterable(scores) H = 1.0 * contab["hits"] # true positives M = 1.0 * contab["misses"] # false negatives F = 1.0 * contab["false_alarms"] # false positives R = 1.0 * contab["correct_negatives"] # true negatives result = {} for score in scores: # catch None passed as score if score is None: continue score_ = score.lower() # simple scores POD = H / (H + M) FAR = F / (H + F) FA = F / (F + R) s = (H + M) / (H + M + F + R) if score_ in ["pod", ""]: # probability of detection result["POD"] = POD if score_ in ["far", ""]: # false alarm ratio result["FAR"] = FAR if score_ in ["fa", ""]: # false alarm rate (prob of false detection) result["FA"] = FA if score_ in ["acc", ""]: # accuracy (fraction correct) ACC = (H + R) / (H + M + F + R) result["ACC"] = ACC if score_ in ["csi", ""]: # critical success index CSI = H / (H + M + F) result["CSI"] = CSI if score_ in ["bias", ""]: # frequency bias B = (H + F) / (H + M) result["BIAS"] = B # skill scores if score_ in ["hss", ""]: # Heidke Skill Score (-1 < HSS < 1) < 0 implies no skill HSS = 2 * (H * R - F * M) / ((H + M) * (M + R) + (H + F) * (F + R)) result["HSS"] = HSS if score_ in ["hk", ""]: # Hanssen-Kuipers Discriminant HK = POD - FA result["HK"] = HK if score_ in ["gss", "ets", ""]: # Gilbert Skill Score GSS = (POD - FA) / ((1 - s * POD) / (1 - s) + FA * (1 - s) / s) if score_ == "ets": result["ETS"] = GSS else: result["GSS"] = GSS if score_ in ["sedi", ""]: # Symmetric extremal dependence index SEDI = (np.log(FA) - np.log(POD) + np.log(1 - POD) - np.log(1 - FA)) / ( np.log(FA) + np.log(POD) + np.log(1 - POD) + np.log(1 - FA) ) result["SEDI"] = SEDI if score_ in ["mcc", ""]: # Matthews correlation coefficient MCC = (H * R - F * M) / np.sqrt((H + F) * (H + M) * (R + F) * (R + M)) result["MCC"] = MCC if score_ in ["f1", ""]: # F1 score F1 = 2 * H / (2 * H + F + M) result["F1"] = F1 return result ================================================ FILE: pysteps/verification/detcontscores.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.detcontscores ================================== Forecast evaluation and skill scores for deterministic continuous forecasts. .. autosummary:: :toctree: ../generated/ det_cont_fct det_cont_fct_init det_cont_fct_accum det_cont_fct_merge det_cont_fct_compute """ import collections import numpy as np from scipy.stats import spearmanr def det_cont_fct(pred, obs, scores="", axis=None, conditioning=None, thr=0.0): """ Calculate simple and skill scores for deterministic continuous forecasts. Parameters ---------- pred: array_like Array of predictions. NaNs are ignored. obs: array_like Array of verifying observations. NaNs are ignored. scores: {string, list of strings}, optional The name(s) of the scores. The default, scores="", will compute all available scores. The available score names are: .. tabularcolumns:: |p{2cm}|L| +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | beta1 | linear regression slope (type 1 conditional bias) | +------------+--------------------------------------------------------+ | beta2 | linear regression slope (type 2 conditional bias) | +------------+--------------------------------------------------------+ | corr_p | pearson's correleation coefficien (linear correlation) | +------------+--------------------------------------------------------+ | corr_s* | spearman's correlation coefficient (rank correlation) | +------------+--------------------------------------------------------+ | DRMSE | debiased root mean squared error | +------------+--------------------------------------------------------+ | MAE | mean absolute error | +------------+--------------------------------------------------------+ | ME | mean error or bias | +------------+--------------------------------------------------------+ | MSE | mean squared error | +------------+--------------------------------------------------------+ | NMSE | normalized mean squared error | +------------+--------------------------------------------------------+ | RMSE | root mean squared error | +------------+--------------------------------------------------------+ | RV | reduction of variance | | | (Brier Score, Nash-Sutcliffe Efficiency) | +------------+--------------------------------------------------------+ | scatter* | half the distance between the 16% and 84% percentiles | | | of the weighted cumulative error distribution, | | | where error = dB(pred/obs), | | | as in Germann et al. (2006) | +------------+--------------------------------------------------------+ axis: {int, tuple of int, None}, optional Axis or axes along which a score is integrated. The default, axis=None, will integrate all of the elements of the input arrays.\n If axis is -1 (or any negative integer), the integration is not performed and scores are computed on all of the elements in the input arrays.\n If axis is a tuple of ints, the integration is performed on all of the axes specified in the tuple. conditioning: {None, "single", "double"}, optional The type of conditioning used for the verification. The default, conditioning=None, includes all pairs. With conditioning="single", only pairs with either pred or obs > thr are included. With conditioning="double", only pairs with both pred and obs > thr are included. thr: float Optional threshold value for conditioning. Defaults to 0. Returns ------- result: dict Dictionary containing the verification results. Notes ----- Multiplicative scores can be computed by passing log-tranformed values. Note that "scatter" is the only score that will be computed in dB units of the multiplicative error, i.e.: 10*log10(pred/obs). beta1 measures the degree of conditional bias of the observations given the forecasts (type 1). beta2 measures the degree of conditional bias of the forecasts given the observations (type 2). The normalized MSE is computed as NMSE = E[(pred - obs)^2]/E[(pred + obs)^2]. The debiased RMSE is computed as DRMSE = sqrt(MSE - ME^2). The reduction of variance score is computed as RV = 1 - MSE/Var(obs). Score names denoted by * can only be computed offline, meaning that the these cannot be computed using _init, _accum and _compute methods of this module. References ---------- Germann, U. , Galli, G. , Boscacci, M. and Bolliger, M. (2006), Radar precipitation measurement in a mountainous region. Q.J.R. Meteorol. Soc., 132: 1669-1692. doi:10.1256/qj.05.190 Potts, J. (2012), Chapter 2 - Basic concepts. Forecast verification: a practitioner’s guide in atmospheric sciences, I. T. Jolliffe, and D. B. Stephenson, Eds., Wiley-Blackwell, 11–29. See also -------- pysteps.verification.detcatscores.det_cat_fct """ # catch case of single score passed as string def get_iterable(x): if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): return x else: return (x,) scores = get_iterable(scores) # split between online and offline scores loffline = ["scatter", "corr_s"] onscores = [ score for score in scores if str(score).lower() not in loffline or score == "" ] offscores = [ score for score in scores if str(score).lower() in loffline or score == "" ] # unique lists onscores = _uniquelist(onscores) offscores = _uniquelist(offscores) # online scores onresult = {} if onscores: err = det_cont_fct_init(axis=axis, conditioning=conditioning, thr=thr) det_cont_fct_accum(err, pred, obs) onresult = det_cont_fct_compute(err, onscores) # offline scores offresult = {} if offscores: pred = np.asarray(pred.copy()) obs = np.asarray(obs.copy()) if pred.shape != obs.shape: raise ValueError( "the shape of pred does not match the shape of obs %s!=%s" % (pred.shape, obs.shape) ) # conditioning if conditioning is not None: if conditioning == "single": idx = np.logical_or(obs > thr, pred > thr) elif conditioning == "double": idx = np.logical_and(obs > thr, pred > thr) else: raise ValueError("unkown conditioning %s" % conditioning) obs[~idx] = np.nan pred[~idx] = np.nan for score in offscores: # catch None passed as score if score is None: continue score_ = score.lower() # spearman corr (rank correlation) if score_ in ["corr_s", "spearmanr", ""]: corr_s = _spearmanr(pred, obs, axis=axis) offresult["corr_s"] = corr_s # scatter if score_ in ["scatter", ""]: scatter = _scatter(pred, obs, axis=axis) offresult["scatter"] = scatter # pull all results together result = onresult result.update(offresult) return result def det_cont_fct_init(axis=None, conditioning=None, thr=0.0): """ Initialize a verification error object. Parameters ---------- axis: {int, tuple of int, None}, optional Axis or axes along which a score is integrated. The default, axis=None, will integrate all of the elements of the input arrays.\n If axis is -1 (or any negative integer), the integration is not performed and scores are computed on all of the elements in the input arrays.\n If axis is a tuple of ints, the integration is performed on all of the axes specified in the tuple. conditioning: {None, "single", "double"}, optional The type of conditioning used for the verification. The default, conditioning=None, includes all pairs. With conditioning="single", only pairs with either pred or obs > thr are included. With conditioning="double", only pairs with both pred and obs > thr are included. thr: float Optional threshold value for conditioning. Defaults to 0. Returns ------- out: dict The verification error object. """ err = {} # catch case of axis passed as integer def get_iterable(x): if x is None or ( isinstance(x, collections.abc.Iterable) and not isinstance(x, int) ): return x else: return (x,) err["axis"] = get_iterable(axis) err["conditioning"] = conditioning err["thr"] = thr err["cov"] = None err["vobs"] = None err["vpred"] = None err["mobs"] = None err["mpred"] = None err["me"] = None err["mse"] = None err["mss"] = None # mean square sum, i.e. E[(pred + obs)^2] err["mae"] = None err["n"] = None return err def det_cont_fct_accum(err, pred, obs): """Accumulate the forecast error in the verification error object. Parameters ---------- err: dict A verification error object initialized with :py:func:`pysteps.verification.detcontscores.det_cont_fct_init`. pred: array_like Array of predictions. NaNs are ignored. obs: array_like Array of verifying observations. NaNs are ignored. References ---------- Chan, Tony F.; Golub, Gene H.; LeVeque, Randall J. (1979), "Updating Formulae and a Pairwise Algorithm for Computing Sample Variances.", Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. Schubert, Erich; Gertz, Michael (2018-07-09). "Numerically stable parallel computation of (co-)variance". ACM: 10. doi:10.1145/3221269.3223036. """ pred = np.asarray(pred.copy()) obs = np.asarray(obs.copy()) axis = tuple(range(pred.ndim)) if err["axis"] is None else err["axis"] # checks if pred.shape != obs.shape: raise ValueError( "the shape of pred does not match the shape of obs %s!=%s" % (pred.shape, obs.shape) ) if pred.ndim <= np.max(axis): raise ValueError( "axis %d is out of bounds for array of dimension %d" % (np.max(axis), len(pred.shape)) ) idims = [dim not in axis for dim in range(pred.ndim)] nshape = tuple(np.array(pred.shape)[np.array(idims)]) if err["cov"] is None: # initialize the error arrays in the verification object err["cov"] = np.zeros(nshape) err["vobs"] = np.zeros(nshape) err["vpred"] = np.zeros(nshape) err["mobs"] = np.zeros(nshape) err["mpred"] = np.zeros(nshape) err["me"] = np.zeros(nshape) err["mse"] = np.zeros(nshape) err["mss"] = np.zeros(nshape) err["mae"] = np.zeros(nshape) err["n"] = np.zeros(nshape) else: # check dimensions if err["cov"].shape != nshape: raise ValueError( "the shape of the input arrays does not match " + "the shape of the " + "verification object %s!=%s" % (nshape, err["cov"].shape) ) # conditioning if err["conditioning"] is not None: if err["conditioning"] == "single": idx = np.logical_or(obs > err["thr"], pred > err["thr"]) elif err["conditioning"] == "double": idx = np.logical_and(obs > err["thr"], pred > err["thr"]) else: raise ValueError("unkown conditioning %s" % err["conditioning"]) obs[~idx] = np.nan pred[~idx] = np.nan # add dummy axis in case integration is not required if np.max(axis) < 0: pred = pred[None, :] obs = obs[None, :] axis = (0,) axis = tuple([a for a in axis if a >= 0]) # compute residuals res = pred - obs sum = pred + obs n = np.sum(np.isfinite(res), axis=axis) # new means mobs = np.nanmean(obs, axis=axis) mpred = np.nanmean(pred, axis=axis) me = np.nanmean(res, axis=axis) mse = np.nanmean(res**2, axis=axis) mss = np.nanmean(sum**2, axis=axis) mae = np.nanmean(np.abs(res), axis=axis) # expand axes for broadcasting for ax in sorted(axis): mobs = np.expand_dims(mobs, ax) mpred = np.expand_dims(mpred, ax) # new cov matrix cov = np.nanmean((obs - mobs) * (pred - mpred), axis=axis) vobs = np.nanmean(np.abs(obs - mobs) ** 2, axis=axis) vpred = np.nanmean(np.abs(pred - mpred) ** 2, axis=axis) mobs = mobs.squeeze() mpred = mpred.squeeze() # update variances _parallel_var(err["mobs"], err["n"], err["vobs"], mobs, n, vobs) _parallel_var(err["mpred"], err["n"], err["vpred"], mpred, n, vpred) # update covariance _parallel_cov(err["cov"], err["mobs"], err["mpred"], err["n"], cov, mobs, mpred, n) # update means _parallel_mean(err["mobs"], err["n"], mobs, n) _parallel_mean(err["mpred"], err["n"], mpred, n) _parallel_mean(err["me"], err["n"], me, n) _parallel_mean(err["mse"], err["n"], mse, n) _parallel_mean(err["mss"], err["n"], mss, n) _parallel_mean(err["mae"], err["n"], mae, n) # update number of samples err["n"] += n def det_cont_fct_merge(err_1, err_2): """ Merge two verification error objects. Parameters ---------- err_1: dict A verification error object initialized with :py:func:`pysteps.verification.detcontscores.det_cont_fct_init` and populated with :py:func:`pysteps.verification.detcontscores.det_cont_fct_accum`. err_2: dict Another verification error object initialized with :py:func:`pysteps.verification.detcontscores.det_cont_fct_init` and populated with :py:func:`pysteps.verification.detcontscores.det_cont_fct_accum`. Returns ------- out: dict The merged verification error object. """ # checks if err_1["axis"] != err_2["axis"]: raise ValueError( "cannot merge: the axis are not same %s!=%s" % (err_1["axis"], err_2["axis"]) ) if err_1["conditioning"] != err_2["conditioning"]: raise ValueError( "cannot merge: the conditioning is not same %s!=%s" % (err_1["conditioning"], err_2["conditioning"]) ) if err_1["thr"] != err_2["thr"]: raise ValueError( "cannot merge: the threshold is not same %s!=%s" % (err_1["thr"], err_2["thr"]) ) if err_1["cov"] is None or err_2["cov"] is None: raise ValueError("cannot merge: no data found") # merge the two verification error objects err = err_1.copy() # update variances _parallel_var( err["mobs"], err["n"], err["vobs"], err_2["mobs"], err_2["n"], err_2["vobs"] ) _parallel_var( err["mpred"], err["n"], err["vpred"], err_2["mpred"], err_2["n"], err_2["vpred"], ) # update covariance _parallel_cov( err["cov"], err["mobs"], err["mpred"], err["n"], err_2["cov"], err_2["mobs"], err_2["mpred"], err_2["n"], ) # update means _parallel_mean(err["mobs"], err["n"], err_2["mobs"], err_2["n"]) _parallel_mean(err["mpred"], err["n"], err_2["mpred"], err_2["n"]) _parallel_mean(err["me"], err["n"], err_2["me"], err_2["n"]) _parallel_mean(err["mse"], err["n"], err_2["mse"], err_2["n"]) _parallel_mean(err["mss"], err["n"], err_2["mss"], err_2["n"]) _parallel_mean(err["mae"], err["n"], err_2["mae"], err_2["n"]) # update number of samples err["n"] += err_2["n"] return err def det_cont_fct_compute(err, scores=""): """ Compute simple and skill scores for deterministic continuous forecasts from a verification error object. Parameters ---------- err: dict A verification error object initialized with :py:func:`pysteps.verification.detcontscores.det_cont_fct_init` and populated with :py:func:`pysteps.verification.detcontscores.det_cont_fct_accum`. scores: {string, list of strings}, optional The name(s) of the scores. The default, scores="", will compute all available scores. The available score names are: .. tabularcolumns:: |p{2cm}|L| +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | beta1 | linear regression slope (type 1 conditional bias) | +------------+--------------------------------------------------------+ | beta2 | linear regression slope (type 2 conditional bias) | +------------+--------------------------------------------------------+ | corr_p | pearson's correleation coefficien (linear correlation) | +------------+--------------------------------------------------------+ | DRMSE | debiased root mean squared error, i.e. | | | :math:`DRMSE = \\sqrt{RMSE - ME^2}` | +------------+--------------------------------------------------------+ | MAE | mean absolute error | +------------+--------------------------------------------------------+ | ME | mean error or bias | +------------+--------------------------------------------------------+ | MSE | mean squared error | +------------+--------------------------------------------------------+ | NMSE | normalized mean squared error | +------------+--------------------------------------------------------+ | RMSE | root mean squared error | +------------+--------------------------------------------------------+ | RV | reduction of variance | | | (Brier Score, Nash-Sutcliffe Efficiency), i.e. | | | :math:`RV = 1 - \\frac{MSE}{s^2_o}` | +------------+--------------------------------------------------------+ Returns ------- result: dict Dictionary containing the verification results. """ # catch case of single score passed as string def get_iterable(x): if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): return x else: return (x,) scores = get_iterable(scores) result = {} for score in scores: # catch None passed as score if score is None: continue score_ = score.lower() # bias (mean error, systematic error) if score_ in ["bias", "me", ""]: bias = err["me"] result["ME"] = bias # mean absolute error if score_ in ["mae", ""]: MAE = err["mae"] result["MAE"] = MAE # mean squared error if score_ in ["mse", ""]: MSE = err["mse"] result["MSE"] = MSE # normalized mean squared error if score_ in ["nmse", ""]: NMSE = err["mse"] / err["mss"] result["NMSE"] = NMSE # root mean squared error if score_ in ["rmse", ""]: RMSE = np.sqrt(err["mse"]) result["RMSE"] = RMSE # linear correlation coeff (pearson corr) if score_ in ["corr_p", "pearsonr", ""]: corr_p = err["cov"] / np.sqrt(err["vobs"]) / np.sqrt(err["vpred"]) result["corr_p"] = corr_p # beta1 (linear regression slope) if score_ in ["beta", "beta1", ""]: beta1 = err["cov"] / err["vpred"] result["beta1"] = beta1 # beta2 (linear regression slope) if score_ in ["beta2", ""]: beta2 = err["cov"] / err["vobs"] result["beta2"] = beta2 # debiased RMSE if score_ in ["drmse", ""]: RMSE_d = np.sqrt(err["mse"] - err["me"] ** 2) result["DRMSE"] = RMSE_d # reduction of variance # (Brier Score, Nash-Sutcliffe efficiency coefficient, # MSE skill score) if score_ in ["rv", "brier_score", "nse", ""]: RV = 1.0 - err["mse"] / err["vobs"] result["RV"] = RV return result def _parallel_mean(avg_a, count_a, avg_b, count_b): """Update avg_a with avg_b.""" idx = count_b > 0 avg_a[idx] = (count_a[idx] * avg_a[idx] + count_b[idx] * avg_b[idx]) / ( count_a[idx] + count_b[idx] ) def _parallel_var(avg_a, count_a, var_a, avg_b, count_b, var_b): """ Update var_a with var_b. source: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance """ idx = count_b > 0 delta = avg_b - avg_a m_a = var_a * count_a m_b = var_b * count_b var_a[idx] = ( m_a[idx] + m_b[idx] + delta[idx] ** 2 * count_a[idx] * count_b[idx] / (count_a[idx] + count_b[idx]) ) var_a[idx] = var_a[idx] / (count_a[idx] + count_b[idx]) def _parallel_cov(cov_a, avg_xa, avg_ya, count_a, cov_b, avg_xb, avg_yb, count_b): """Update cov_a with cov_b.""" idx = count_b > 0 deltax = avg_xb - avg_xa deltay = avg_yb - avg_ya c_a = cov_a * count_a c_b = cov_b * count_b cov_a[idx] = ( c_a[idx] + c_b[idx] + deltax[idx] * deltay[idx] * count_a[idx] * count_b[idx] / (count_a[idx] + count_b[idx]) ) cov_a[idx] = cov_a[idx] / (count_a[idx] + count_b[idx]) def _uniquelist(mylist): used = set() return [x for x in mylist if x not in used and (used.add(x) or True)] def _scatter(pred, obs, axis=None): pred = pred.copy() obs = obs.copy() # catch case of axis passed as integer def get_iterable(x): if x is None or ( isinstance(x, collections.abc.Iterable) and not isinstance(x, int) ): return x else: return (x,) axis = get_iterable(axis) # reshape arrays as 2d matrices # rows: samples; columns: variables axis = tuple(range(pred.ndim)) if axis is None else axis axis = tuple(np.sort(axis)) for ax in axis: pred = np.rollaxis(pred, ax, 0) obs = np.rollaxis(obs, ax, 0) shp_rows = pred.shape[: len(axis)] shp_cols = pred.shape[len(axis) :] pred = np.reshape(pred, (np.prod(shp_rows), -1)) obs = np.reshape(obs, (np.prod(shp_rows), -1)) # compute multiplicative erros in dB q = 10 * np.log10(pred / obs) # nans are given zero weight and are set equal to (min value - 1) idkeep = np.isfinite(q) q[~idkeep] = q[idkeep].min() - 1 obs[~idkeep] = 0 # compute scatter along rows xs = np.sort(q, axis=0) xs = np.vstack((xs[0, :], xs)) ixs = np.argsort(q, axis=0) ws = np.take_along_axis(obs, ixs, axis=0) ws = np.vstack((ws[0, :] * 0.0, ws)) wsc = np.cumsum(ws, axis=0) / np.sum(ws, axis=0) xint = np.zeros((2, xs.shape[1])) for i in range(xint.shape[1]): xint[:, i] = np.interp([0.16, 0.84], wsc[:, i], xs[:, i]) scatter = (xint[1, :] - xint[0, :]) / 2.0 # reshape back scatter = scatter.reshape(shp_cols) return float(scatter) if scatter.size == 1 else scatter def _spearmanr(pred, obs, axis=None): pred = pred.copy() obs = obs.copy() # catch case of axis passed as integer def get_iterable(x): if x is None or ( isinstance(x, collections.abc.Iterable) and not isinstance(x, int) ): return x else: return (x,) axis = get_iterable(axis) # reshape arrays as 2d matrices # rows: samples; columns: variables axis = tuple(range(pred.ndim)) if axis is None else axis axis = tuple(np.sort(axis)) for ax in axis: pred = np.rollaxis(pred, ax, 0) obs = np.rollaxis(obs, ax, 0) shp_rows = pred.shape[: len(axis)] shp_cols = pred.shape[len(axis) :] pred = np.reshape(pred, (np.prod(shp_rows), -1)) obs = np.reshape(obs, (np.prod(shp_rows), -1)) # apply only with more than 2 valid samples # although this does not seem to solve the error # "ValueError: The input must have at least 3 entries!" ... corr_s = np.zeros(pred.shape[1]) * np.nan nsamp = np.sum(np.logical_and(np.isfinite(pred), np.isfinite(obs)), axis=0) idx = nsamp > 2 if np.any(idx): corr_s_ = spearmanr(pred[:, idx], obs[:, idx], axis=0, nan_policy="omit")[0] if corr_s_.size > 1: corr_s[idx] = np.diag(corr_s_, idx.sum()) else: corr_s = corr_s_ return float(corr_s) if corr_s.size == 1 else corr_s.reshape(shp_cols) ================================================ FILE: pysteps/verification/ensscores.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.ensscores ============================== Evaluation and skill scores for ensemble forecasts. .. autosummary:: :toctree: ../generated/ ensemble_skill ensemble_spread rankhist rankhist_init rankhist_accum rankhist_compute """ import numpy as np from .interface import get_method def ensemble_skill(X_f, X_o, metric, **kwargs): """ Compute mean ensemble skill for a given skill metric. Parameters ---------- X_f: array-like Array of shape (l,m,n) containing the forecast fields of shape (m,n) from l ensemble members. X_o: array_like Array of shape (m,n) containing the observed field corresponding to the forecast. metric: str The deterministic skill metric to be used (list available in :func:`~pysteps.verification.interface.get_method`). Returns ------- out: float The mean skill of all ensemble members that is used as defintion of ensemble skill (as in Zacharov and Rezcova 2009 with the FSS). References ---------- :cite:`ZR2009` """ if len(X_f.shape) != 3: raise ValueError( "the number of dimensions of X_f must be equal to 3, " + "but %i dimensions were passed" % len(X_f.shape) ) if X_f.shape[1:] != X_o.shape: raise ValueError( "the shape of X_f does not match the shape of " + "X_o (%d,%d)!=(%d,%d)" % (X_f.shape[1], X_f.shape[2], X_o.shape[0], X_o.shape[1]) ) compute_skill = get_method(metric, type="deterministic") lolo = X_f.shape[0] skill = [] for member in range(lolo): skill_ = compute_skill(X_f[member, :, :], X_o, **kwargs) if isinstance(skill_, dict): skill_ = skill_[metric] skill.append(skill_) return np.mean(skill) def ensemble_spread(X_f, metric, **kwargs): """ Compute mean ensemble spread for a given skill metric. Parameters ---------- X_f: array-like Array of shape (l,m,n) containing the forecast fields of shape (m,n) from l ensemble members. metric: str The deterministic skill metric to be used (list available in :func:`~pysteps.verification.interface.get_method`). Returns ------- out: float The mean skill compted between all possible pairs of the ensemble members, which can be used as definition of mean ensemble spread (as in Zacharov and Rezcova 2009 with the FSS). References ---------- :cite:`ZR2009` """ if len(X_f.shape) != 3: raise ValueError( "the number of dimensions of X_f must be equal to 3, " + "but %i dimensions were passed" % len(X_f.shape) ) if X_f.shape[0] < 2: raise ValueError( "the number of members in X_f must be greater than 1," + " but %i members were passed" % X_f.shape[0] ) compute_spread = get_method(metric, type="deterministic") lolo = X_f.shape[0] spread = [] for member in range(lolo): for othermember in range(member + 1, lolo): spread_ = compute_spread( X_f[member, :, :], X_f[othermember, :, :], **kwargs ) if isinstance(spread_, dict): spread_ = spread_[metric] spread.append(spread_) return np.mean(spread) def rankhist(X_f, X_o, X_min=None, normalize=True): """ Compute a rank histogram counts and optionally normalize the histogram. Parameters ---------- X_f: array-like Array of shape (k,m,n,...) containing the values from an ensemble forecast of k members with shape (m,n,...). X_o: array_like Array of shape (m,n,...) containing the observed values corresponding to the forecast. X_min: {float,None} Threshold for minimum intensity. Forecast-observation pairs, where all ensemble members and verifying observations are below X_min, are not counted in the rank histogram. If set to None, thresholding is not used. normalize: {bool, True} If True, normalize the rank histogram so that the bin counts sum to one. """ X_f = X_f.copy() X_o = X_o.copy() num_ens_members = X_f.shape[0] rhist = rankhist_init(num_ens_members, X_min) rankhist_accum(rhist, X_f, X_o) return rankhist_compute(rhist, normalize) def rankhist_init(num_ens_members, X_min=None): """ Initialize a rank histogram object. Parameters ---------- num_ens_members: int Number ensemble members in the forecasts to accumulate into the rank histogram. X_min: {float,None} Threshold for minimum intensity. Forecast-observation pairs, where all ensemble members and verifying observations are below X_min, are not counted in the rank histogram. If set to None, thresholding is not used. Returns ------- out: dict The rank histogram object. """ rankhist = {} rankhist["num_ens_members"] = num_ens_members rankhist["n"] = np.zeros(num_ens_members + 1, dtype=int) rankhist["X_min"] = X_min return rankhist def rankhist_accum(rankhist, X_f, X_o): """Accumulate forecast-observation pairs to the given rank histogram. Parameters ---------- rankhist: dict The rank histogram object. X_f: array-like Array of shape (k,m,n,...) containing the values from an ensemble forecast of k members with shape (m,n,...). X_o: array_like Array of shape (m,n,...) containing the observed values corresponding to the forecast. """ if X_f.shape[0] != rankhist["num_ens_members"]: raise ValueError( "the number of ensemble members in X_f does not " + "match the number of members in the rank " + "histogram (%d!=%d)" % (X_f.shape[0], rankhist["num_ens_members"]) ) X_f = np.vstack([X_f[i, :].flatten() for i in range(X_f.shape[0])]).T X_o = X_o.flatten() X_min = rankhist["X_min"] mask = np.logical_and(np.isfinite(X_o), np.all(np.isfinite(X_f), axis=1)) # ignore pairs where the verifying observations and all ensemble members # are below the threshold X_min if X_min is not None: mask_nz = np.logical_or(X_o >= X_min, np.any(X_f >= X_min, axis=1)) mask = np.logical_and(mask, mask_nz) X_f = X_f[mask, :].copy() X_o = X_o[mask].copy() if X_min is not None: X_f[X_f < X_min] = X_min - 1 X_o[X_o < X_min] = X_min - 1 X_o = np.reshape(X_o, (len(X_o), 1)) X_c = np.hstack([X_f, X_o]) X_c.sort(axis=1) idx1 = np.where(X_c == X_o) _, idx2, idx_counts = np.unique(idx1[0], return_index=True, return_counts=True) bin_idx_1 = idx1[1][idx2] bin_idx = list(bin_idx_1[np.where(idx_counts == 1)[0]]) # handle ties, where the verifying observation lies between ensemble # members having the same value idxdup = np.where(idx_counts > 1)[0] if len(idxdup) > 0: X_c_ = np.fliplr(X_c) idx1 = np.where(X_c_ == X_o) _, idx2 = np.unique(idx1[0], return_index=True) bin_idx_2 = X_f.shape[1] - idx1[1][idx2] idxr = np.random.uniform(low=0.0, high=1.0, size=len(idxdup)) idxr = bin_idx_1[idxdup] + idxr * (bin_idx_2[idxdup] + 1 - bin_idx_1[idxdup]) bin_idx.extend(idxr.astype(int)) for bi in bin_idx: rankhist["n"][bi] += 1 def rankhist_compute(rankhist, normalize=True): """ Return the rank histogram counts and optionally normalize the histogram. Parameters ---------- rankhist: dict A rank histogram object created with rankhist_init. normalize: bool If True, normalize the rank histogram so that the bin counts sum to one. Returns ------- out: array_like The counts for the n+1 bins in the rank histogram, where n is the number of ensemble members. """ if normalize: return 1.0 * rankhist["n"] / sum(rankhist["n"]) else: return rankhist["n"] ================================================ FILE: pysteps/verification/interface.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.interface ============================== Interface for the verification module. .. autosummary:: :toctree: ../generated/ get_method """ def get_method(name, type="deterministic"): """ Return a callable function for the method corresponding to the given verification score. Parameters ---------- name : str Name of the verification method. The available options are:\n\ type: deterministic .. tabularcolumns:: |p{2cm}|L| +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | ACC | accuracy (proportion correct) | +------------+--------------------------------------------------------+ | BIAS | frequency bias | +------------+--------------------------------------------------------+ | CSI | critical success index (threat score) | +------------+--------------------------------------------------------+ | F1 | the harmonic mean of precision and sensitivity | +------------+--------------------------------------------------------+ | FA | false alarm rate (prob. of false detection, fall-out, | | | false positive rate) | +------------+--------------------------------------------------------+ | FAR | false alarm ratio (false discovery rate) | +------------+--------------------------------------------------------+ | GSS | Gilbert skill score (equitable threat score) | +------------+--------------------------------------------------------+ | HK | Hanssen-Kuipers discriminant (Pierce skill score) | +------------+--------------------------------------------------------+ | HSS | Heidke skill score | +------------+--------------------------------------------------------+ | MCC | Matthews correlation coefficient | +------------+--------------------------------------------------------+ | POD | probability of detection (hit rate, sensitivity, | | | recall, true positive rate) | +------------+--------------------------------------------------------+ | SEDI | symmetric extremal dependency index | +------------+--------------------------------------------------------+ | beta1 | linear regression slope (type 1 conditional bias) | +------------+--------------------------------------------------------+ | beta2 | linear regression slope (type 2 conditional bias) | +------------+--------------------------------------------------------+ | corr_p | pearson's correleation coefficien (linear correlation) | +------------+--------------------------------------------------------+ | corr_s* | spearman's correlation coefficient (rank correlation) | +------------+--------------------------------------------------------+ | DRMSE | debiased root mean squared error | +------------+--------------------------------------------------------+ | MAE | mean absolute error of residuals | +------------+--------------------------------------------------------+ | ME | mean error or bias of residuals | +------------+--------------------------------------------------------+ | MSE | mean squared error | +------------+--------------------------------------------------------+ | NMSE | normalized mean squared error | +------------+--------------------------------------------------------+ | RMSE | root mean squared error | +------------+--------------------------------------------------------+ | RV | reduction of variance | | | (Brier Score, Nash-Sutcliffe Efficiency) | +------------+--------------------------------------------------------+ | scatter* | half the distance between the 16% and 84% percentiles | | | of the weighted cumulative error distribution, | | | where error = dB(pred/obs), | | | as in Germann et al. (2006) | +------------+--------------------------------------------------------+ | binary_mse| binary MSE | +------------+--------------------------------------------------------+ | FSS | fractions skill score | +------------+--------------------------------------------------------+ | SAL | Structure-Amplitude-Location score | +------------+--------------------------------------------------------+ type: ensemble .. tabularcolumns:: |p{2cm}|L| +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | ens_skill | mean ensemble skill | +------------+--------------------------------------------------------+ | ens_spread | mean ensemble spread | +------------+--------------------------------------------------------+ | rankhist | rank histogram | +------------+--------------------------------------------------------+ type: probabilistic .. tabularcolumns:: |p{2cm}|L| +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | CRPS | continuous ranked probability score | +------------+--------------------------------------------------------+ | reldiag | reliability diagram | +------------+--------------------------------------------------------+ | ROC | ROC curve | +------------+--------------------------------------------------------+ type : {'deterministic', 'ensemble', 'probabilistic'}, optional Type of the verification method. Notes ----- Multiplicative scores can be computed by passing log-tranformed values. Note that "scatter" is the only score that will be computed in dB units of the multiplicative error, i.e.: 10*log10(pred/obs). beta1 measures the degree of conditional bias of the observations given the forecasts (type 1). beta2 measures the degree of conditional bias of the forecasts given the observations (type 2). The normalized MSE is computed as NMSE = E[(pred - obs)^2]/E[(pred + obs)^2]. The debiased RMSE is computed as DRMSE = sqrt(RMSE - ME^2). The reduction of variance score is computed as RV = 1 - MSE/Var(obs). Score names denoted by * can only be computed offline, meaning that the these cannot be computed using _init, _accum and _compute methods of this module. References ---------- Germann, U. , Galli, G. , Boscacci, M. and Bolliger, M. (2006), Radar precipitation measurement in a mountainous region. Q.J.R. Meteorol. Soc., 132: 1669-1692. doi:10.1256/qj.05.190 Potts, J. (2012), Chapter 2 - Basic concepts. Forecast verification: a practitioner’s guide in atmospheric sciences, I. T. Jolliffe, and D. B. Stephenson, Eds., Wiley-Blackwell, 11–29. """ if name is None: name = "none" if type is None: type = "none" name = name.lower() type = type.lower() if type == "deterministic": from .detcatscores import det_cat_fct from .detcontscores import det_cont_fct from .spatialscores import binary_mse, fss from .salscores import sal # categorical if name in [ "acc", "bias", "csi", "f1", "fa", "far", "gss", "hk", "hss", "mcc", "pod", "sedi", ]: def f(fct, obs, **kwargs): return det_cat_fct(fct, obs, kwargs.pop("thr"), [name]) return f # continuous elif name in [ "beta", "beta1", "beta2", "corr_p", "corr_s", "drmse", "mae", "mse", "me", "nmse", "rmse", "rv", "scatter", ]: def f(fct, obs, **kwargs): return det_cont_fct(fct, obs, [name], **kwargs) return f # spatial elif name == "binary_mse": return binary_mse elif name == "fss": return fss elif name == "sal": return sal else: raise ValueError("unknown deterministic method %s" % name) elif type == "ensemble": from .ensscores import ensemble_skill, ensemble_spread, rankhist if name == "ens_skill": return ensemble_skill elif name == "ens_spread": return ensemble_spread elif name == "rankhist": return rankhist else: raise ValueError("unknown ensemble method %s" % name) elif type == "probabilistic": from .probscores import CRPS, reldiag, ROC_curve if name == "crps": return CRPS elif name == "reldiag": return reldiag elif name == "roc": return ROC_curve else: raise ValueError("unknown probabilistic method %s" % name) else: raise ValueError("unknown type %s" % name) ================================================ FILE: pysteps/verification/lifetime.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.lifetime ============================= Estimation of precipitation lifetime from a decaying verification score function (e.g. autocorrelation function). .. autosummary:: :toctree: ../generated/ lifetime lifetime_init lifetime_accum lifetime_compute """ from math import exp import numpy as np from scipy.integrate import simpson def lifetime(X_s, X_t, rule="1/e"): """ Compute the average lifetime by integrating the correlation function as a function of lead time. When not using the 1/e rule, the correlation function must be long enough to converge to 0, otherwise the lifetime is underestimated. The correlation function can be either empirical or theoretical, e.g. derived using the function 'ar_acf' in timeseries/autoregression.py. Parameters ---------- X_s: array-like Array with the correlation function. Works also with other decaying scores that are defined in the range [0,1]=[min_skill,max_skill]. X_t: array-like Array with the forecast lead times in the desired unit, e.g. [min, hour]. rule: str {'1/e', 'trapz', 'simpson'}, optional Name of the method to integrate the correlation curve. \n '1/e' uses the 1/e rule and assumes an exponential decay. It linearly interpolates the time when the correlation goes below the value 1/e. When all values are > 1/e it returns the max lead time. When all values are < 1/e it returns the min lead time. \n 'trapz' uses the trapezoidal rule for integration.\n 'simpson' uses the Simpson's rule for integration. Returns ------- lf: float Estimated lifetime with same units of X_t. """ X_s = X_s.copy() X_t = X_t.copy() life = lifetime_init(rule) lifetime_accum(life, X_s, X_t) return lifetime_compute(life) def lifetime_init(rule="1/e"): """ Initialize a lifetime object. Parameters ---------- rule: str {'1/e', 'trapz', 'simpson'}, optional Name of the method to integrate the correlation curve. \n '1/e' uses the 1/e rule and assumes an exponential decay. It linearly interpolates the time when the correlation goes below the value 1/e. When all values are > 1/e it returns the max lead time. When all values are < 1/e it returns the min lead time.\n 'trapz' uses the trapezoidal rule for integration.\n 'simpson' uses the Simpson's rule for integration. Returns ------- out: dict The lifetime object. """ list_rules = ["trapz", "simpson", "1/e"] if rule not in list_rules: raise ValueError( "Unknown rule %s for integration.\n" % rule + "The available methods are: " + str(list_rules) ) lifetime = {} lifetime["lifetime_sum"] = 0.0 lifetime["n"] = 0.0 lifetime["rule"] = rule return lifetime def lifetime_accum(lifetime, X_s, X_t): """ Compute the lifetime by integrating the correlation function and accumulate the result into the given lifetime object. Parameters ---------- X_s: array-like Array with the correlation function. Works also with other decaying scores that are defined in the range [0,1]=[min_skill,max_skill]. X_t: array-like Array with the forecast lead times in the desired unit, e.g. [min, hour]. """ if lifetime["rule"] == "trapz": lf = np.trapz(X_s, x=X_t) elif lifetime["rule"] == "simpson": lf = simpson(X_s, x=X_t) elif lifetime["rule"] == "1/e": euler_number = 1.0 / exp(1.0) X_s_ = np.array(X_s) is_euler_reached = np.sum(X_s_ <= euler_number) > 0 if is_euler_reached: idx_b = np.argmax(X_s_ <= euler_number) if idx_b > 0: idx_a = idx_b - 1 fraction_score = ( (euler_number - X_s[idx_b]) * (X_t[idx_a] - X_t[idx_b]) / (X_s[idx_a] - X_s[idx_b]) ) lf = X_t[idx_b] + fraction_score else: # if all values are below the 1/e value, return min lead time lf = np.min(X_t) else: # if all values are above the 1/e value, return max lead time lf = np.max(X_t) lifetime["lifetime_sum"] += lf lifetime["n"] += 1 def lifetime_compute(lifetime): """ Compute the average value from the lifetime object. Parameters ---------- lifetime: dict A lifetime object created with lifetime_init. Returns ------- out: float The computed lifetime. """ return 1.0 * lifetime["lifetime_sum"] / lifetime["n"] ================================================ FILE: pysteps/verification/plots.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.plots ========================== Methods for plotting verification results. .. autosummary:: :toctree: ../generated/ plot_intensityscale plot_rankhist plot_reldiag plot_ROC """ from matplotlib import cm import matplotlib.pylab as plt from mpl_toolkits.axes_grid1.inset_locator import inset_axes import numpy as np from pysteps.verification import ensscores, probscores, spatialscores def plot_intensityscale(intscale, fig=None, vminmax=None, kmperpixel=None, unit=None): """ Plot a intensity-scale verification table with a color bar and axis labels. Parameters ---------- intscale: dict The intensity-scale object initialized with :py:func:`pysteps.verification.spatialscores.intensity_scale_init` and accumulated with :py:func:`pysteps.verification.spatialscores.intensity_scale_accum`. fig: matplotlib.figure.Figure, optional The figure object to use for plotting. If not supplied, a new figure is created. vminmax: tuple of floats, optional The minimum and maximum values for the intensity-scale skill score in the plot. Defaults to the data extent. kmperpixel: float, optional The conversion factor from pixels to kilometers. If supplied, the unit of the shown spatial scales is km instead of pixels. unit: string, optional The unit of the intensity thresholds. """ if fig is None: fig = plt.figure() ax = fig.gca() SS = spatialscores.intensity_scale_compute(intscale) vmin = vmax = None if vminmax is not None: vmin = np.min(vminmax) vmax = np.max(vminmax) im = ax.imshow(SS, vmin=vmin, vmax=vmax, interpolation="nearest", cmap=cm.jet) cb = fig.colorbar(im) cb.set_label(intscale["label"]) if unit is None: ax.set_xlabel("Intensity threshold") else: ax.set_xlabel("Intensity threshold [%s]" % unit) if kmperpixel is None: ax.set_ylabel("Spatial scale [pixels]") else: ax.set_ylabel("Spatial scale [km]") ax.set_xticks(np.arange(SS.shape[1])) ax.set_xticklabels(intscale["thrs"]) ax.set_yticks(np.arange(SS.shape[0])) if kmperpixel is None: scales = intscale["scales"] else: scales = np.array(intscale["scales"]) * kmperpixel ax.set_yticklabels(scales) def plot_rankhist(rankhist, ax=None): """ Plot a rank histogram. Parameters ---------- rankhist: dict A rank histogram object created by ensscores.rankhist_init. ax: axis handle, optional Axis handle for the figure. If set to None, the handle is taken from the current figure (matplotlib.pylab.gca()). """ if ax is None: ax = plt.gca() r = ensscores.rankhist_compute(rankhist) x = np.linspace(0, 1, rankhist["num_ens_members"] + 1) ax.bar(x, r, width=1.0 / len(x), align="edge", color="gray", edgecolor="black") ax.set_xticks(x[::3] + (x[1] - x[0])) ax.set_xticklabels(np.arange(1, len(x) + 1)[::3]) ax.set_xlim(0, 1 + 1.0 / len(x)) ax.set_ylim(0, np.max(r) * 1.25) ax.set_xlabel("Rank of observation (among ensemble members)") ax.set_ylabel("Relative frequency") ax.grid(True, axis="y", ls=":") def plot_reldiag(reldiag, ax=None): """ Plot a reliability diagram. Parameters ---------- reldiag: dict A reldiag object created by probscores.reldiag_init. ax: axis handle, optional Axis handle for the figure. If set to None, the handle is taken from the current figure (matplotlib.pylab.gca()). """ if ax is None: ax = plt.gca() # Plot the reliability diagram. f = 1.0 * reldiag["Y_sum"] / reldiag["num_idx"] r = 1.0 * reldiag["X_sum"] / reldiag["num_idx"] mask = np.logical_and(np.isfinite(r), np.isfinite(f)) ax.plot(r[mask], f[mask], "kD-") ax.plot([0, 1], [0, 1], "k--") ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.grid(True, ls=":") ax.set_xlabel("Forecast probability") ax.set_ylabel("Observed relative frequency") # Plot sharpness diagram into an inset figure. iax = inset_axes(ax, width="35%", height="20%", loc=4, borderpad=3.5) bw = reldiag["bin_edges"][2] - reldiag["bin_edges"][1] iax.bar( reldiag["bin_edges"][:-1], reldiag["sample_size"], width=bw, align="edge", color="gray", edgecolor="black", ) iax.set_yscale("log") iax.set_xticks(reldiag["bin_edges"]) iax.set_xticklabels(["%.1f" % max(v, 1e-6) for v in reldiag["bin_edges"]]) yt_min = int(max(np.floor(np.log10(min(reldiag["sample_size"][:-1]))), 1)) yt_max = int(np.ceil(np.log10(max(reldiag["sample_size"][:-1])))) t = [pow(10.0, k) for k in range(yt_min, yt_max)] iax.set_yticks([int(t_) for t_ in t]) iax.set_xlim(0.0, 1.0) iax.set_ylim(t[0], 5 * t[-1]) iax.set_ylabel("log10(samples)") iax.yaxis.tick_right() iax.yaxis.set_label_position("right") iax.tick_params(axis="both", which="major", labelsize=6) def plot_ROC(ROC, ax=None, opt_prob_thr=False): """ Plot a ROC curve. Parameters ---------- ROC: dict A ROC curve object created by probscores.ROC_curve_init. ax: axis handle, optional Axis handle for the figure. If set to None, the handle is taken from the current figure (matplotlib.pylab.gca()). opt_prob_thr: bool, optional If set to True, plot the optimal probability threshold that maximizes the difference between the hit rate (POD) and false alarm rate (POFD). """ if ax is None: ax = plt.gca() POFD, POD, area = probscores.ROC_curve_compute(ROC, compute_area=True) p_thr = ROC["prob_thrs"] ax.plot([0, 1], [0, 1], "k--") ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_xlabel("False alarm rate (POFD)") ax.set_ylabel("Probability of detection (POD)") ax.grid(True, ls=":") ax.plot(POFD, POD, "kD-") if opt_prob_thr: opt_prob_thr_idx = np.argmax(np.array(POD) - np.array(POFD)) ax.scatter( [POFD[opt_prob_thr_idx]], [POD[opt_prob_thr_idx]], c="r", s=150, facecolors=None, edgecolors="r", ) for p_thr_, x, y in zip(p_thr, POFD, POD): ax.text(x + 0.02, y - 0.02, "%.2f" % p_thr_, fontsize=7) ================================================ FILE: pysteps/verification/probscores.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.probscores =============================== Evaluation and skill scores for probabilistic forecasts. .. autosummary:: :toctree: ../generated/ CRPS CRPS_init CRPS_accum CRPS_compute reldiag reldiag_init reldiag_accum reldiag_compute ROC_curve ROC_curve_init ROC_curve_accum ROC_curve_compute """ import numpy as np def CRPS(X_f, X_o): """ Compute the continuous ranked probability score (CRPS). Parameters ---------- X_f: array_like Array of shape (k,m,n,...) containing the values from an ensemble forecast of k members with shape (m,n,...). X_o: array_like Array of shape (m,n,...) containing the observed values corresponding to the forecast. Returns ------- out: float The computed CRPS. References ---------- :cite:`Her2000` """ X_f = X_f.copy() X_o = X_o.copy() crps = CRPS_init() CRPS_accum(crps, X_f, X_o) return CRPS_compute(crps) def CRPS_init(): """ Initialize a CRPS object. Returns ------- out: dict The CRPS object. """ return {"CRPS_sum": 0.0, "n": 0.0} def CRPS_accum(CRPS, X_f, X_o): """ Compute the average continuous ranked probability score (CRPS) for a set of forecast ensembles and the corresponding observations and accumulate the result to the given CRPS object. Parameters ---------- CRPS: dict The CRPS object. X_f: array_like Array of shape (k,m,n,...) containing the values from an ensemble forecast of k members with shape (m,n,...). X_o: array_like Array of shape (m,n,...) containing the observed values corresponding to the forecast. References ---------- :cite:`Her2000` """ X_f = np.vstack([X_f[i, :].flatten() for i in range(X_f.shape[0])]).T X_o = X_o.flatten() mask = np.logical_and(np.all(np.isfinite(X_f), axis=1), np.isfinite(X_o)) X_f = X_f[mask, :].copy() X_f.sort(axis=1) X_o = X_o[mask] n = X_f.shape[0] m = X_f.shape[1] alpha = np.zeros((n, m + 1)) beta = np.zeros((n, m + 1)) for i in range(1, m): mask = X_o > X_f[:, i] alpha[mask, i] = X_f[mask, i] - X_f[mask, i - 1] beta[mask, i] = 0.0 mask = np.logical_and(X_f[:, i] > X_o, X_o > X_f[:, i - 1]) alpha[mask, i] = X_o[mask] - X_f[mask, i - 1] beta[mask, i] = X_f[mask, i] - X_o[mask] mask = X_o < X_f[:, i - 1] alpha[mask, i] = 0.0 beta[mask, i] = X_f[mask, i] - X_f[mask, i - 1] mask = X_o < X_f[:, 0] alpha[mask, 0] = 0.0 beta[mask, 0] = X_f[mask, 0] - X_o[mask] mask = X_f[:, -1] < X_o alpha[mask, -1] = X_o[mask] - X_f[mask, -1] beta[mask, -1] = 0.0 p = 1.0 * np.arange(m + 1) / m res = np.sum(alpha * p**2.0 + beta * (1.0 - p) ** 2.0, axis=1) CRPS["CRPS_sum"] += np.sum(res) CRPS["n"] += len(res) def CRPS_compute(CRPS): """ Compute the averaged values from the given CRPS object. Parameters ---------- CRPS: dict A CRPS object created with CRPS_init. Returns ------- out: float The computed CRPS. """ return 1.0 * CRPS["CRPS_sum"] / CRPS["n"] def reldiag(P_f, X_o, X_min, n_bins=10, min_count=10): """ Compute the x- and y- coordinates of the points in the reliability diagram. Parameters ---------- P_f: array-like Forecast probabilities for exceeding the intensity threshold specified in the reliability diagram object. X_o: array-like Observed values. X_min: float Precipitation intensity threshold for yes/no prediction. n_bins: int Number of bins to use in the reliability diagram. min_count: int Minimum number of samples required for each bin. A zero value is assigned if the number of samples in a bin is smaller than bin_count. Returns ------- out: tuple Two-element tuple containing the x- and y-coordinates of the points in the reliability diagram. """ P_f = P_f.copy() X_o = X_o.copy() rdiag = reldiag_init(X_min, n_bins, min_count) reldiag_accum(rdiag, P_f, X_o) return reldiag_compute(rdiag) def reldiag_init(X_min, n_bins=10, min_count=10): """ Initialize a reliability diagram object. Parameters ---------- X_min: float Precipitation intensity threshold for yes/no prediction. n_bins: int Number of bins to use in the reliability diagram. min_count: int Minimum number of samples required for each bin. A zero value is assigned if the number of samples in a bin is smaller than bin_count. Returns ------- out: dict The reliability diagram object. References ---------- :cite:`BS2007` """ reldiag = {} reldiag["X_min"] = X_min reldiag["bin_edges"] = np.linspace(-1e-6, 1 + 1e-6, int(n_bins + 1)) reldiag["n_bins"] = n_bins reldiag["X_sum"] = np.zeros(n_bins) reldiag["Y_sum"] = np.zeros(n_bins, dtype=int) reldiag["num_idx"] = np.zeros(n_bins, dtype=int) reldiag["sample_size"] = np.zeros(n_bins, dtype=int) reldiag["min_count"] = min_count return reldiag def reldiag_accum(reldiag, P_f, X_o): """Accumulate the given probability-observation pairs into the reliability diagram. Parameters ---------- reldiag: dict A reliability diagram object created with reldiag_init. P_f: array-like Forecast probabilities for exceeding the intensity threshold specified in the reliability diagram object. X_o: array-like Observed values. """ mask = np.logical_and(np.isfinite(P_f), np.isfinite(X_o)) P_f = P_f[mask] X_o = X_o[mask] idx = np.digitize(P_f, reldiag["bin_edges"], right=True) x = [] y = [] num_idx = [] ss = [] for k in range(1, len(reldiag["bin_edges"])): I_k = np.where(idx == k)[0] if len(I_k) >= reldiag["min_count"]: X_o_above_thr = (X_o[I_k] >= reldiag["X_min"]).astype(int) y.append(np.sum(X_o_above_thr)) x.append(np.sum(P_f[I_k])) num_idx.append(len(I_k)) ss.append(len(I_k)) else: y.append(0.0) x.append(0.0) num_idx.append(0.0) ss.append(0) reldiag["X_sum"] += np.array(x) reldiag["Y_sum"] += np.array(y, dtype=int) reldiag["num_idx"] += np.array(num_idx, dtype=int) reldiag["sample_size"] += ss def reldiag_compute(reldiag): """ Compute the x- and y- coordinates of the points in the reliability diagram. Parameters ---------- reldiag: dict A reliability diagram object created with reldiag_init. Returns ------- out: tuple Two-element tuple containing the x- and y-coordinates of the points in the reliability diagram. """ f = 1.0 * reldiag["Y_sum"] / reldiag["num_idx"] r = 1.0 * reldiag["X_sum"] / reldiag["num_idx"] return r, f def ROC_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False): """ Compute the ROC curve and its area from the given ROC object. Parameters ---------- P_f: array_like Forecasted probabilities for exceeding the threshold specified in the ROC object. Non-finite values are ignored. X_o: array_like Observed values. Non-finite values are ignored. X_min: float Precipitation intensity threshold for yes/no prediction. n_prob_thrs: int The number of probability thresholds to use. The interval [0,1] is divided into n_prob_thrs evenly spaced values. compute_area: bool If True, compute the area under the ROC curve (between 0.5 and 1). Returns ------- out: tuple A two-element tuple containing the probability of detection (POD) and probability of false detection (POFD) for the probability thresholds specified in the ROC curve object. If compute_area is True, return the area under the ROC curve as the third element of the tuple. """ P_f = P_f.copy() X_o = X_o.copy() roc = ROC_curve_init(X_min, n_prob_thrs) ROC_curve_accum(roc, P_f, X_o) return ROC_curve_compute(roc, compute_area) def ROC_curve_init(X_min, n_prob_thrs=10): """ Initialize a ROC curve object. Parameters ---------- X_min: float Precipitation intensity threshold for yes/no prediction. n_prob_thrs: int The number of probability thresholds to use. The interval [0,1] is divided into n_prob_thrs evenly spaced values. Returns ------- out: dict The ROC curve object. """ ROC = {} ROC["X_min"] = X_min ROC["hits"] = np.zeros(n_prob_thrs, dtype=int) ROC["misses"] = np.zeros(n_prob_thrs, dtype=int) ROC["false_alarms"] = np.zeros(n_prob_thrs, dtype=int) ROC["corr_neg"] = np.zeros(n_prob_thrs, dtype=int) ROC["prob_thrs"] = np.linspace(0.0, 1.0, int(n_prob_thrs)) return ROC def ROC_curve_accum(ROC, P_f, X_o): """Accumulate the given probability-observation pairs into the given ROC object. Parameters ---------- ROC: dict A ROC curve object created with ROC_curve_init. P_f: array_like Forecasted probabilities for exceeding the threshold specified in the ROC object. Non-finite values are ignored. X_o: array_like Observed values. Non-finite values are ignored. """ mask = np.logical_and(np.isfinite(P_f), np.isfinite(X_o)) P_f = P_f[mask] X_o = X_o[mask] for i, p in enumerate(ROC["prob_thrs"]): mask = np.logical_and(P_f >= p, X_o >= ROC["X_min"]) ROC["hits"][i] += np.sum(mask.astype(int)) mask = np.logical_and(P_f < p, X_o >= ROC["X_min"]) ROC["misses"][i] += np.sum(mask.astype(int)) mask = np.logical_and(P_f >= p, X_o < ROC["X_min"]) ROC["false_alarms"][i] += np.sum(mask.astype(int)) mask = np.logical_and(P_f < p, X_o < ROC["X_min"]) ROC["corr_neg"][i] += np.sum(mask.astype(int)) def ROC_curve_compute(ROC, compute_area=False): """ Compute the ROC curve and its area from the given ROC object. Parameters ---------- ROC: dict A ROC curve object created with ROC_curve_init. compute_area: bool If True, compute the area under the ROC curve (between 0.5 and 1). Returns ------- out: tuple A two-element tuple containing the probability of detection (POD) and probability of false detection (POFD) for the probability thresholds specified in the ROC curve object. If compute_area is True, return the area under the ROC curve as the third element of the tuple. """ POD_vals = [] POFD_vals = [] for i in range(len(ROC["prob_thrs"])): POD_vals.append(1.0 * ROC["hits"][i] / (ROC["hits"][i] + ROC["misses"][i])) POFD_vals.append( 1.0 * ROC["false_alarms"][i] / (ROC["corr_neg"][i] + ROC["false_alarms"][i]) ) if compute_area: # Compute the total area of parallelepipeds under the ROC curve. area = (1.0 - POFD_vals[0]) * (1.0 + POD_vals[0]) / 2.0 for i in range(len(ROC["prob_thrs"]) - 1): area += ( (POFD_vals[i] - POFD_vals[i + 1]) * (POD_vals[i + 1] + POD_vals[i]) / 2.0 ) area += POFD_vals[-1] * POD_vals[-1] / 2.0 return POFD_vals, POD_vals, area else: return POFD_vals, POD_vals ================================================ FILE: pysteps/verification/salscores.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.salscores ============================== The Spatial-Amplitude-Location (SAL) score by :cite:`WPHF2008`. .. autosummary:: :toctree: ../generated/ sal sal_structure sal_amplitude sal_location """ from math import sqrt, hypot import numpy as np from scipy.ndimage import center_of_mass from pysteps.exceptions import MissingOptionalDependency from pysteps.feature import tstorm as tstorm_detect try: import pandas as pd PANDAS_IMPORTED = True except ImportError: PANDAS_IMPORTED = False try: from skimage.measure import regionprops_table SKIMAGE_IMPORTED = True except ImportError: SKIMAGE_IMPORTED = False # regionprops property names changed with scikit-image v0.19, buld old names # will continue to work for backwards compatibility # see https://github.com/scikit-image/scikit-image/releases/tag/v0.19.0 REGIONPROPS = [ "label", "weighted_centroid", "max_intensity", "intensity_image", ] def sal( prediction, observation, thr_factor=0.067, # default to 1/15 as in the reference paper thr_quantile=0.95, tstorm_kwargs=None, ): """ Compute the Structure-Amplitude-Location (SAL) spatial verification metric. Parameters ---------- prediction: array-like Array of shape (m,n) with prediction data. NaNs are ignored. observation: array-like Array of shape (m,n) with observation data. NaNs are ignored. thr_factor: float, optional Factor used to compute the detection threshold as in eq. 1 of :cite:`WHZ2009`. If not None, this is used to identify coherent objects enclosed by the threshold contour `thr_factor * thr_quantile(precip)`. thr_quantile: float, optional The wet quantile between 0 and 1 used to define the detection threshold. Required if `thr_factor` is not None. tstorm_kwargs: dict, optional Optional dictionary containing keyword arguments for the tstorm feature detection algorithm. If None, default values are used. See the documentation of :py:func:`pysteps.feature.tstorm.detection`. Returns ------- sal: tuple of floats A 3-element tuple containing the structure, amplitude, location components of the SAL score. References ---------- :cite:`WPHF2008` :cite:`WHZ2009` :cite:`Feldmann2021` Notes ----- This implementation uses the thunderstorm detection algorithm by :cite:`Feldmann2021` for the identification of precipitation objects within the considered domain. See also -------- :py:func:`pysteps.verification.salscores.sal_structure`, :py:func:`pysteps.verification.salscores.sal_amplitude`, :py:func:`pysteps.verification.salscores.sal_location`, :py:mod:`pysteps.feature.tstorm` """ prediction = np.copy(prediction) observation = np.copy(observation) structure = sal_structure( prediction, observation, thr_factor, thr_quantile, tstorm_kwargs ) amplitude = sal_amplitude(prediction, observation) location = sal_location( prediction, observation, thr_factor, thr_quantile, tstorm_kwargs ) return structure, amplitude, location def sal_structure( prediction, observation, thr_factor=None, thr_quantile=None, tstorm_kwargs=None ): """ Compute the structure component for SAL based on :cite:`WPHF2008`. Parameters ---------- prediction: array-like Array of shape (m,n) with prediction data. NaNs are ignored. observation: array-like Array of shape (m,n) with observation data. NaNs are ignored. thr_factor: float, optional Factor used to compute the detection threshold as in eq. 1 of :cite:`WHZ2009`. If not None, this is used to identify coherent objects enclosed by the threshold contour `thr_factor * thr_quantile(precip)`. thr_quantile: float, optional The wet quantile between 0 and 1 used to define the detection threshold. Required if `thr_factor` is not None. tstorm_kwargs: dict, optional Optional dictionary containing keyword arguments for the tstorm feature detection algorithm. If None, default values are used. See the documentation of :py:func:`pysteps.feature.tstorm.detection`. Returns ------- structure: float The structure component with value between -2 to 2 and 0 denotes perfect forecast in terms of structure. The returned value is NaN if no objects are detected in neither the prediction nor the observation. See also -------- :py:func:`pysteps.verification.salscores.sal`, :py:func:`pysteps.verification.salscores.sal_amplitude`, :py:func:`pysteps.verification.salscores.sal_location`, :py:mod:`pysteps.feature.tstorm` """ prediction_objects = _sal_detect_objects( prediction, thr_factor, thr_quantile, tstorm_kwargs ) observation_objects = _sal_detect_objects( observation, thr_factor, thr_quantile, tstorm_kwargs ) prediction_volume = _sal_scaled_volume(prediction_objects) observation_volume = _sal_scaled_volume(observation_objects) nom = prediction_volume - observation_volume denom = prediction_volume + observation_volume return np.divide(nom, (0.5 * denom)) def sal_amplitude(prediction, observation): """ Compute the amplitude component for SAL based on :cite:`WPHF2008`. This component is the normalized difference of the domain-averaged precipitation in observation and forecast. Parameters ---------- prediction: array-like Array of shape (m,n) with prediction data. NaNs are ignored. observation: array-like Array of shape (m,n) with observation data. NaNs are ignored. Returns ------- amplitude: float Amplitude parameter with value between -2 to 2 and 0 denotes perfect forecast in terms of amplitude. The returned value is NaN if no objects are detected in neither the prediction nor the observation. See also -------- :py:func:`pysteps.verification.salscores.sal`, :py:func:`pysteps.verification.salscores.sal_structure`, :py:func:`pysteps.verification.salscores.sal_location` """ mean_obs = np.nanmean(observation) mean_pred = np.nanmean(prediction) return (mean_pred - mean_obs) / (0.5 * (mean_pred + mean_obs)) def sal_location( prediction, observation, thr_factor=None, thr_quantile=None, tstorm_kwargs=None ): """ Compute the first parameter of location component for SAL based on :cite:`WPHF2008`. This parameter indicates the normalized distance between the center of mass in observation and forecast. Parameters ---------- prediction: array-like Array of shape (m,n) with prediction data. NaNs are ignored. observation: array-like Array of shape (m,n) with observation data. NaNs are ignored. thr_factor: float, optional Factor used to compute the detection threshold as in eq. 1 of :cite:`WHZ2009`. If not None, this is used to identify coherent objects enclosed by the threshold contour `thr_factor * thr_quantile(precip)`. thr_quantile: float, optional The wet quantile between 0 and 1 used to define the detection threshold. Required if `thr_factor` is not None. tstorm_kwargs: dict, optional Optional dictionary containing keyword arguments for the tstorm feature detection algorithm. If None, default values are used. See the documentation of :py:func:`pysteps.feature.tstorm.detection`. Returns ------- location: float The location component with value between 0 to 2 and 0 denotes perfect forecast in terms of location. The returned value is NaN if no objects are detected in either the prediction or the observation. See also -------- :py:func:`pysteps.verification.salscores.sal`, :py:func:`pysteps.verification.salscores.sal_structure`, :py:func:`pysteps.verification.salscores.sal_amplitude`, :py:mod:`pysteps.feature.tstorm` """ return _sal_l1_param(prediction, observation) + _sal_l2_param( prediction, observation, thr_factor, thr_quantile, tstorm_kwargs ) def _sal_l1_param(prediction, observation): """ Compute the first parameter of location component for SAL based on :cite:`WPHF2008`. This parameter indicates the normalized distance between the center of mass in observation and forecast. Parameters ---------- prediction: array-like Array of shape (m,n) with prediction data. NaNs are ignored. observation: array-like Array of shape (m,n) with observation data. NaNs are ignored. Returns ------- location_1: float The first parameter of location component which has a value between 0 to 1. """ maximum_distance = sqrt( ((observation.shape[0]) ** 2) + ((observation.shape[1]) ** 2) ) obi = center_of_mass(np.nan_to_num(observation)) fori = center_of_mass(np.nan_to_num(prediction)) dist = hypot(fori[1] - obi[1], fori[0] - obi[0]) return dist / maximum_distance def _sal_l2_param(prediction, observation, thr_factor, thr_quantile, tstorm_kwargs): """ Calculate the second parameter of location component for SAL based on :cite:`WPHF2008`. Parameters ---------- prediction: array-like Array of shape (m,n) with prediction data. NaNs are ignored. observation: array-like Array of shape (m,n) with observation data. NaNs are ignored. thr_factor: float Factor used to compute the detection threshold as in eq. 1 of :cite:`WHZ2009`. If not None, this is used to identify coherent objects enclosed by the threshold contour `thr_factor * thr_quantile(precip)`. thr_quantile: float The wet quantile between 0 and 1 used to define the detection threshold. Required if `thr_factor` is not None. tstorm_kwargs: dict Optional dictionary containing keyword arguments for the tstorm feature detection algorithm. If None, default values are used. See the documentation of :py:func:`pysteps.feature.tstorm.detection`. Returns ------- location_2: float The secibd parameter of location component with value between 0 to 1. """ maximum_distance = sqrt( ((observation.shape[0]) ** 2) + ((observation.shape[1]) ** 2) ) obs_r = _sal_weighted_distance(observation, thr_factor, thr_quantile, tstorm_kwargs) forc_r = _sal_weighted_distance(prediction, thr_factor, thr_quantile, tstorm_kwargs) location_2 = 2 * ((abs(obs_r - forc_r)) / maximum_distance) return float(location_2) def _sal_detect_objects(precip, thr_factor, thr_quantile, tstorm_kwargs): """ Detect coherent precipitation objects using a multi-threshold approach from :cite:`Feldmann2021`. Parameters ---------- precip: array-like Array of shape (m,n) containing input data. Nan values are ignored. thr_factor: float Factor used to compute the detection threshold as in eq. 1 of :cite:`WHZ2009`. If not None, this is used to identify coherent objects enclosed by the threshold contour `thr_factor * thr_quantile(precip)`. thr_quantile: float The wet quantile between 0 and 1 used to define the detection threshold. Required if `thr_factor` is not None. tstorm_kwargs: dict Optional dictionary containing keyword arguments for the tstorm feature detection algorithm. If None, default values are used. See the documentation of :py:func:`pysteps.feature.tstorm.detection`. Returns ------- precip_objects: pd.DataFrame Dataframe containing all detected cells and their respective properties. """ if not PANDAS_IMPORTED: raise MissingOptionalDependency( "The pandas package is required for the SAL " "verification method but it is not installed" ) if not SKIMAGE_IMPORTED: raise MissingOptionalDependency( "The scikit-image package is required for the SAL " "verification method but it is not installed" ) if thr_factor is not None and thr_quantile is None: raise ValueError("You must pass thr_quantile, too") if tstorm_kwargs is None: tstorm_kwargs = dict() if thr_factor is not None: zero_value = np.nanmin(precip) threshold = thr_factor * np.nanquantile( precip[precip > zero_value], thr_quantile ) tstorm_kwargs = { "minmax": tstorm_kwargs.get("minmax", threshold), "maxref": tstorm_kwargs.get("maxref", threshold + 1e-5), "mindiff": tstorm_kwargs.get("mindiff", 1e-5), "minref": tstorm_kwargs.get("minref", threshold), } _, labels = tstorm_detect.detection(precip, **tstorm_kwargs) labels = labels.astype(int) precip_objects = pd.DataFrame( regionprops_table(labels, intensity_image=precip, properties=REGIONPROPS) ) return precip_objects def _sal_scaled_volume(precip_objects): """ Calculate the scaled volume based on :cite:`WPHF2008`. Parameters ---------- precip_objects: pd.DataFrame Dataframe containing all detected cells and their respective properties as returned by the :py:func:`pysteps.verification.salsscores._sal_detect_objects` function. Returns ------- total_scaled_volum: float The total scaled volume of precipitation objects. """ if not PANDAS_IMPORTED: raise MissingOptionalDependency( "The pandas package is required for the SAL " "verification method but it is not installed" ) objects_volume_scaled = [] for _, precip_object in precip_objects.iterrows(): intensity_sum = np.nansum(precip_object.intensity_image) max_intensity = precip_object.max_intensity if intensity_sum == 0: intensity_vol = 0 else: volume_scaled = intensity_sum / max_intensity tot_vol = intensity_sum * volume_scaled intensity_vol = tot_vol objects_volume_scaled.append( {"intensity_vol": intensity_vol, "intensity_sum_obj": intensity_sum} ) df_vols = pd.DataFrame(objects_volume_scaled) if df_vols.empty or (df_vols["intensity_sum_obj"] == 0).all(): total_scaled_volum = 0 else: total_scaled_volum = np.nansum(df_vols.intensity_vol) / np.nansum( df_vols.intensity_sum_obj ) return total_scaled_volum def _sal_weighted_distance(precip, thr_factor, thr_quantile, tstorm_kwargs): """ Compute the weighted averaged distance between the centers of mass of the individual objects and the center of mass of the total precipitation field. Parameters ---------- precip: array-like Array of shape (m,n). NaNs are ignored. thr_factor: float Factor used to compute the detection threshold as in eq. 1 of :cite:`WHZ2009`. If not None, this is used to identify coherent objects enclosed by the threshold contour `thr_factor * thr_quantile(precip)`. thr_quantile: float The wet quantile between 0 and 1 used to define the detection threshold. Required if `thr_factor` is not None. tstorm_kwargs: dict Optional dictionary containing keyword arguments for the tstorm feature detection algorithm. If None, default values are used. See the documentation of :py:func:`pysteps.feature.tstorm.detection`. Returns ------- weighted_distance: float The weighted averaged distance between the centers of mass of the individual objects and the center of mass of the total precipitation field. The returned value is NaN if no objects are detected. """ if not PANDAS_IMPORTED: raise MissingOptionalDependency( "The pandas package is required for the SAL " "verification method but it is not installed" ) precip_objects = _sal_detect_objects( precip, thr_factor, thr_quantile, tstorm_kwargs ) if len(precip_objects) == 0: return np.nan centroid_total = center_of_mass(np.nan_to_num(precip)) r = [] for i in precip_objects.label - 1: xd = (precip_objects["weighted_centroid-1"][i] - centroid_total[1]) ** 2 yd = (precip_objects["weighted_centroid-0"][i] - centroid_total[0]) ** 2 dst = sqrt(xd + yd) sumr = (np.nansum(precip_objects.intensity_image[i])) * dst sump = np.nansum(precip_objects.intensity_image[i]) r.append({"sum_dist": sumr, "sum_p": sump}) rr = pd.DataFrame(r) return (np.nansum(rr.sum_dist)) / (np.nansum(rr.sum_p)) ================================================ FILE: pysteps/verification/spatialscores.py ================================================ # -- coding: utf-8 -- """ pysteps.verification.spatialscores ================================== Skill scores for spatial forecasts. .. autosummary:: :toctree: ../generated/ intensity_scale intensity_scale_init intensity_scale_accum intensity_scale_merge intensity_scale_compute binary_mse binary_mse_init binary_mse_accum binary_mse_merge binary_mse_compute fss fss_init fss_accum fss_merge fss_compute """ import collections import numpy as np from scipy.ndimage import uniform_filter from pysteps.exceptions import MissingOptionalDependency from pysteps.verification.salscores import sal # make SAL accessible from this module try: import pywt pywt_imported = True except ImportError: pywt_imported = False def intensity_scale(X_f, X_o, name, thrs, scales=None, wavelet="Haar"): """ Compute an intensity-scale verification score. Parameters ---------- X_f: array_like Array of shape (m, n) containing the forecast field. X_o: array_like Array of shape (m, n) containing the verification observation field. name: string A string indicating the name of the spatial verification score to be used: +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | FSS | Fractions skill score | +------------+--------------------------------------------------------+ | BMSE | Binary mean squared error | +------------+--------------------------------------------------------+ thrs: float or array_like Scalar or 1-D array of intensity thresholds for which to compute the verification. scales: float or array_like, optional Scalar or 1-D array of spatial scales in pixels, required if ``name="FSS"``. wavelet: str, optional The name of the wavelet function to use in the BMSE. Defaults to the Haar wavelet, as described in Casati et al. 2004. See the documentation of PyWavelets for a list of available options. Returns ------- out: array_like The two-dimensional array containing the intensity-scale skill scores for each spatial scale and intensity threshold. References ---------- :cite:`CRS2004`, :cite:`RL2008`, :cite:`EWWM2013` See also -------- pysteps.verification.spatialscores.binary_mse, pysteps.verification.spatialscores.fss """ intscale = intensity_scale_init(name, thrs, scales, wavelet) intensity_scale_accum(intscale, X_f, X_o) return intensity_scale_compute(intscale) def intensity_scale_init(name, thrs, scales=None, wavelet="Haar"): """ Initialize an intensity-scale verification object. Parameters ---------- name: string A string indicating the name of the spatial verification score to be used: +------------+--------------------------------------------------------+ | Name | Description | +============+========================================================+ | FSS | Fractions skill score | +------------+--------------------------------------------------------+ | BMSE | Binary mean squared error | +------------+--------------------------------------------------------+ thrs: float or array_like Scalar or 1-D array of intensity thresholds for which to compute the verification. scales: float or array_like, optional Scalar or 1-D array of spatial scales in pixels, required if ``name="FSS"``. wavelet: str, optional The name of the wavelet function, required if ``name="BMSE"``. Defaults to the Haar wavelet, as described in Casati et al. 2004. See the documentation of PyWavelets for a list of available options. Returns ------- out: dict The intensity-scale object. """ if name.lower() == "fss" and scales is None: message = "an array of spatial scales must be provided for the FSS," message += " but %s was passed" % scales raise ValueError(message) if name.lower() == "bmse" and wavelet is None: message = "the name of a wavelet must be provided for the BMSE," message += " but %s was passed" % wavelet raise ValueError(message) # catch scalars when passed as arguments def get_iterable(x): if isinstance(x, collections.abc.Iterable): return np.copy(x) else: return np.copy((x,)) intscale = {} intscale["name"] = name intscale["thrs"] = np.sort(get_iterable(thrs)) if scales is not None: intscale["scales"] = np.sort(get_iterable(scales))[::-1] else: intscale["scales"] = scales intscale["wavelet"] = wavelet for i, thr in enumerate(intscale["thrs"]): if name.lower() == "bmse": intscale[thr] = binary_mse_init(thr, intscale["wavelet"]) elif name.lower() == "fss": intscale[thr] = {} for j, scale in enumerate(intscale["scales"]): intscale[thr][scale] = fss_init(thr, scale) if name.lower() == "fss": intscale["label"] = "Fractions skill score" del intscale["wavelet"] elif name.lower() == "bmse": intscale["label"] = "Binary MSE skill score" intscale["scales"] = None else: raise ValueError("unknown method %s" % name) return intscale def intensity_scale_accum(intscale, X_f, X_o): """ Compute and update the intensity-scale verification scores. Parameters ---------- intscale: dict The intensity-scale object initialized with :py:func:`pysteps.verification.spatialscores.intensity_scale_init`. X_f: array_like Array of shape (m, n) containing the forecast field. X_o: array_like Array of shape (m, n) containing the verification observation field. """ name = intscale["name"] thrs = intscale["thrs"] scales = intscale["scales"] for i, thr in enumerate(thrs): if name.lower() == "bmse": binary_mse_accum(intscale[thr], X_f, X_o) elif name.lower() == "fss": for j, scale in enumerate(scales): fss_accum(intscale[thr][scale], X_f, X_o) if scales is None: intscale["scales"] = intscale[thrs[0]]["scales"] def intensity_scale_merge(intscale_1, intscale_2): """ Merge two intensity-scale verification objects. Parameters ---------- intscale_1: dict Am intensity-scale object initialized with :py:func:`pysteps.verification.spatialscores.intensity_scale_init` and populated with :py:func:`pysteps.verification.spatialscores.intensity_scale_accum`. intscale_2: dict Another intensity-scale object initialized with :py:func:`pysteps.verification.spatialscores.intensity_scale_init` and populated with :py:func:`pysteps.verification.spatialscores.intensity_scale_accum`. Returns ------- out: dict The merged intensity-scale object. """ # checks if intscale_1["name"] != intscale_2["name"]: raise ValueError( "cannot merge: the intensity scale methods are not same %s!=%s" % (intscale_1["name"], intscale_2["name"]) ) intscale = intscale_1.copy() name = intscale["name"] thrs = intscale["thrs"] scales = intscale["scales"] for i, thr in enumerate(thrs): if name.lower() == "bmse": intscale[thr] = binary_mse_merge(intscale[thr], intscale_2[thr]) elif name.lower() == "fss": for j, scale in enumerate(scales): intscale[thr][scale] = fss_merge( intscale[thr][scale], intscale_2[thr][scale] ) return intscale def intensity_scale_compute(intscale): """ Return the intensity scale matrix. Parameters ---------- intscale: dict The intensity-scale object initialized with :py:func:`pysteps.verification.spatialscores.intensity_scale_init` and accumulated with :py:func:`pysteps.verification.spatialscores.intensity_scale_accum`. Returns ------- out: array_like The two-dimensional array of shape (j, k) containing the intensity-scale skill scores for **j** spatial scales and **k** intensity thresholds. """ name = intscale["name"] thrs = intscale["thrs"] scales = intscale["scales"] SS = np.zeros((scales.size, thrs.size)) for i, thr in enumerate(thrs): if name.lower() == "bmse": SS[:, i] = binary_mse_compute(intscale[thr], False) elif name.lower() == "fss": for j, scale in enumerate(scales): SS[j, i] = fss_compute(intscale[thr][scale]) return SS def binary_mse(X_f, X_o, thr, wavelet="haar", return_scales=True): """ Compute the MSE of the binary error as a function of spatial scale. This method uses PyWavelets for decomposing the error field between the forecasts and observations into multiple spatial scales. Parameters ---------- X_f: array_like Array of shape (m, n) containing the forecast field. X_o: array_like Array of shape (m, n) containing the verification observation field. thr: sequence The intensity threshold for which to compute the verification. wavelet: str, optional The name of the wavelet function to use. Defaults to the Haar wavelet, as described in Casati et al. 2004. See the documentation of PyWavelets for a list of available options. return_scales: bool, optional Whether to return the spatial scales resulting from the wavelet decomposition. Returns ------- SS: array One-dimensional array containing the binary MSE for each spatial scale. scales: list, optional If ``return_scales=True``, return the spatial scales in pixels resulting from the wavelet decomposition. References ---------- :cite:`CRS2004` """ bmse = binary_mse_init(thr, wavelet) binary_mse_accum(bmse, X_f, X_o) return binary_mse_compute(bmse, return_scales) def binary_mse_init(thr, wavelet="haar"): """ Initialize a binary MSE (BMSE) verification object. Parameters ---------- thr: float The intensity threshold. wavelet: str, optional The name of the wavelet function to use. Defaults to the Haar wavelet, as described in Casati et al. 2004. See the documentation of PyWavelets for a list of available options. Returns ------- bmse: dict The initialized BMSE verification object. """ bmse = dict(thr=thr, wavelet=wavelet, scales=None, mse=None, eps=0, n=0) return bmse def binary_mse_accum(bmse, X_f, X_o): """Accumulate forecast-observation pairs to an BMSE object. Parameters ----------- bmse: dict The BMSE object initialized with :py:func:`pysteps.verification.spatialscores.binary_mse_init`. X_f: array_like Array of shape (m, n) containing the forecast field. X_o: array_like Array of shape (m, n) containing the observation field. """ if not pywt_imported: raise MissingOptionalDependency( "PyWavelets package is required for the binary MSE spatial " "verification method but it is not installed" ) if len(X_f.shape) != 2 or len(X_o.shape) != 2 or X_f.shape != X_o.shape: message = "X_f and X_o must be two-dimensional arrays" message += " having the same shape" raise ValueError(message) thr = bmse["thr"] wavelet = bmse["wavelet"] X_f = X_f.copy() X_f[~np.isfinite(X_f)] = thr - 1 X_o = X_o.copy() X_o[~np.isfinite(X_o)] = thr - 1 w = pywt.Wavelet(wavelet) I_f = (X_f >= thr).astype(float) I_o = (X_o >= thr).astype(float) E_decomp = _wavelet_decomp(I_f - I_o, w) n_scales = len(E_decomp) if bmse["scales"] is None: bmse["scales"] = pow(2, np.arange(n_scales))[::-1] bmse["mse"] = np.zeros(n_scales) # update eps eps = 1.0 * np.sum((X_o >= thr).astype(int)) / X_o.size if np.isfinite(eps): bmse["eps"] = (bmse["eps"] * bmse["n"] + eps) / (bmse["n"] + 1) # update mse for j in range(n_scales): mse = np.mean(E_decomp[j] ** 2) if np.isfinite(mse): bmse["mse"][j] = (bmse["mse"][j] * bmse["n"] + mse) / (bmse["n"] + 1) bmse["n"] += 1 def binary_mse_merge(bmse_1, bmse_2): """ Merge two BMSE objects. Parameters ---------- bmse_1: dict A BMSE object initialized with :py:func:`pysteps.verification.spatialscores.binary_mse_init`. and populated with :py:func:`pysteps.verification.spatialscores.binary_mse_accum`. bmse_2: dict Another BMSE object initialized with :py:func:`pysteps.verification.spatialscores.binary_mse_init`. and populated with :py:func:`pysteps.verification.spatialscores.binary_mse_accum`. Returns ------- out: dict The merged BMSE object. """ # checks if bmse_1["thr"] != bmse_2["thr"]: raise ValueError( "cannot merge: the thresholds are not same %s!=%s" % (bmse_1["thr"], bmse_2["thr"]) ) if bmse_1["wavelet"] != bmse_2["wavelet"]: raise ValueError( "cannot merge: the wavelets are not same %s!=%s" % (bmse_1["wavelet"], bmse_2["wavelet"]) ) if list(bmse_1["scales"]) != list(bmse_2["scales"]): raise ValueError( "cannot merge: the scales are not same %s!=%s" % (bmse_1["scales"], bmse_2["scales"]) ) # merge the BMSE objects bmse = bmse_1.copy() bmse["eps"] = (bmse["eps"] * bmse["n"] + bmse_2["eps"] * bmse_2["n"]) / ( bmse["n"] + bmse_2["n"] ) for j, scale in enumerate(bmse["scales"]): bmse["mse"][j] = ( bmse["mse"][j] * bmse["n"] + bmse_2["mse"][j] * bmse_2["n"] ) / (bmse["n"] + bmse_2["n"]) bmse["n"] += bmse_2["n"] return bmse def binary_mse_compute(bmse, return_scales=True): """ Compute the BMSE. Parameters ---------- bmse: dict The BMSE object initialized with :py:func:`pysteps.verification.spatialscores.binary_mse_init` and accumulated with :py:func:`pysteps.verification.spatialscores.binary_mse_accum`. return_scales: bool, optional Whether to return the spatial scales resulting from the wavelet decomposition. Returns ------- BMSE: array_like One-dimensional array containing the binary MSE for each spatial scale. scales: list, optional If ``return_scales=True``, return the spatial scales in pixels resulting from the wavelet decomposition. """ scales = bmse["scales"] n_scales = len(scales) eps = bmse["eps"] BMSE = np.zeros(n_scales) for j in range(n_scales): mse = bmse["mse"][j] BMSE[j] = 1 - mse / (2 * eps * (1 - eps) / n_scales) BMSE[~np.isfinite(BMSE)] = np.nan if return_scales: return BMSE, scales else: return BMSE def fss(X_f, X_o, thr, scale): """ Compute the fractions skill score (FSS) for a deterministic forecast field and the corresponding observation field. Parameters ---------- X_f: array_like Array of shape (m, n) containing the forecast field. X_o: array_like Array of shape (m, n) containing the observation field. thr: float The intensity threshold. scale: int The spatial scale in pixels. In practice, the scale represents the size of the moving window that it is used to compute the fraction of pixels above the threshold. Returns ------- out: float The fractions skill score between 0 and 1. References ---------- :cite:`RL2008`, :cite:`EWWM2013` """ fss = fss_init(thr, scale) fss_accum(fss, X_f, X_o) return fss_compute(fss) def fss_init(thr, scale): """ Initialize a fractions skill score (FSS) verification object. Parameters ---------- thr: float The intensity threshold. scale: float The spatial scale in pixels. In practice, the scale represents the size of the moving window that it is used to compute the fraction of pixels above the threshold. Returns ------- fss: dict The initialized FSS verification object. """ fss = dict(thr=thr, scale=scale, sum_fct_sq=0.0, sum_fct_obs=0.0, sum_obs_sq=0.0) return fss def fss_accum(fss, X_f, X_o): """Accumulate forecast-observation pairs to an FSS object. Parameters ----------- fss: dict The FSS object initialized with :py:func:`pysteps.verification.spatialscores.fss_init`. X_f: array_like Array of shape (m, n) containing the forecast field. X_o: array_like Array of shape (m, n) containing the observation field. """ if len(X_f.shape) != 2 or len(X_o.shape) != 2 or X_f.shape != X_o.shape: message = "X_f and X_o must be two-dimensional arrays" message += " having the same shape" raise ValueError(message) X_f = X_f.copy() X_f[~np.isfinite(X_f)] = fss["thr"] - 1 X_o = X_o.copy() X_o[~np.isfinite(X_o)] = fss["thr"] - 1 # Convert to binary fields with the given intensity threshold I_f = (X_f >= fss["thr"]).astype(float) I_o = (X_o >= fss["thr"]).astype(float) # Compute fractions of pixels above the threshold within a square # neighboring area by applying a 2D moving average to the binary fields if fss["scale"] > 1: S_f = uniform_filter(I_f, size=fss["scale"], mode="constant", cval=0.0) S_o = uniform_filter(I_o, size=fss["scale"], mode="constant", cval=0.0) else: S_f = I_f S_o = I_o fss["sum_obs_sq"] += np.nansum(S_o**2) fss["sum_fct_obs"] += np.nansum(S_f * S_o) fss["sum_fct_sq"] += np.nansum(S_f**2) def fss_merge(fss_1, fss_2): """ Merge two FSS objects. Parameters ---------- fss_1: dict A FSS object initialized with :py:func:`pysteps.verification.spatialscores.fss_init`. and populated with :py:func:`pysteps.verification.spatialscores.fss_accum`. fss_2: dict Another FSS object initialized with :py:func:`pysteps.verification.spatialscores.fss_init`. and populated with :py:func:`pysteps.verification.spatialscores.fss_accum`. Returns ------- out: dict The merged FSS object. """ # checks if fss_1["thr"] != fss_2["thr"]: raise ValueError( "cannot merge: the thresholds are not same %s!=%s" % (fss_1["thr"], fss_2["thr"]) ) if fss_1["scale"] != fss_2["scale"]: raise ValueError( "cannot merge: the scales are not same %s!=%s" % (fss_1["scale"], fss_2["scale"]) ) # merge the FSS objects fss = fss_1.copy() fss["sum_obs_sq"] += fss_2["sum_obs_sq"] fss["sum_fct_obs"] += fss_2["sum_fct_obs"] fss["sum_fct_sq"] += fss_2["sum_fct_sq"] return fss def fss_compute(fss): """ Compute the FSS. Parameters ---------- fss: dict An FSS object initialized with :py:func:`pysteps.verification.spatialscores.fss_init` and accumulated with :py:func:`pysteps.verification.spatialscores.fss_accum`. Returns ------- out: float The computed FSS value. """ numer = fss["sum_fct_sq"] - 2.0 * fss["sum_fct_obs"] + fss["sum_obs_sq"] denom = fss["sum_fct_sq"] + fss["sum_obs_sq"] return 1.0 - numer / denom def _wavelet_decomp(X, w): c = pywt.wavedec2(X, w) X_out = [] for k in range(len(c)): c_ = c[:] for k_ in set(range(len(c))).difference([k]): c_[k_] = tuple([np.zeros_like(v) for v in c[k_]]) X_k = pywt.waverec2(c_, w) X_out.append(X_k) return X_out ================================================ FILE: pysteps/visualization/__init__.py ================================================ # -*- coding: utf-8 -*- """Methods for plotting precipitation and motion fields.""" from .motionfields import * from .precipfields import * from .animations import * from .spectral import * from .thunderstorms import * ================================================ FILE: pysteps/visualization/animations.py ================================================ # -*- coding: utf-8 -*- """ pysteps.visualization.animations ================================ Functions to produce animations for pysteps. .. autosummary:: :toctree: ../generated/ animate """ import os import warnings import matplotlib.pylab as plt import pysteps as st PRECIP_VALID_TYPES = ("ensemble", "mean", "prob") MOTION_VALID_METHODS = ("quiver", "streamplot") def animate( precip_obs, precip_fct=None, timestamps_obs=None, timestep_min=None, motion_field=None, ptype="ensemble", motion_plot="quiver", geodata=None, title=None, prob_thr=None, display_animation=True, nloops=1, time_wait=0.2, savefig=False, fig_dpi=100, fig_format="png", path_outputs="", precip_kwargs=None, motion_kwargs=None, map_kwargs=None, ): """ Function to animate observations and forecasts in pysteps. It also allows to export the individual frames as figures, which is useful for constructing animated GIFs or similar. .. _Axes: https://matplotlib.org/api/axes_api.html#matplotlib.axes.Axes Parameters ---------- precip_obs: array-like Three-dimensional array containing the time series of observed precipitation fields. precip_fct: array-like, optional The three or four-dimensional (for ensembles) array containing the time series of forecasted precipitation field. timestamps_obs: list of datetimes, optional List of datetime objects corresponding to the time stamps of the fields in precip_obs. timestep_min: float, optional The time resolution in minutes of the forecast. motion_field: array-like, optional Three-dimensional array containing the u and v components of the motion field. motion_plot: string, optional The method to plot the motion field. See plot methods in :py:mod:`pysteps.visualization.motionfields`. geodata: dictionary or None, optional Dictionary containing geographical information about the field. If geodata is not None, it must contain the following key-value pairs: .. tabularcolumns:: |p{1.5cm}|L| +----------------+----------------------------------------------------+ | Key | Value | +================+====================================================+ | projection | PROJ.4-compatible projection definition | +----------------+----------------------------------------------------+ | x1 | x-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | y1 | y-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | x2 | x-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | y2 | y-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | yorigin | a string specifying the location of the first | | | element in the data raster w.r.t. y-axis: | | | 'upper' = upper border, 'lower' = lower border | +----------------+----------------------------------------------------+ title: str or None, optional If not None, print the string as title on top of the plot. ptype: {'ensemble', 'mean', 'prob'}, str, optional Type of the plot to animate. 'ensemble' = ensemble members, 'mean' = ensemble mean, 'prob' = exceedance probability (using threshold defined in prob_thrs). prob_thr: float, optional Intensity threshold for the exceedance probability maps. Applicable if ptype = 'prob'. display_animation: bool, optional If set to True, display the animation (set to False if only interested in saving the animation frames). nloops: int, optional The number of loops in the animation. time_wait: float, optional The time in seconds between one frame and the next. Applicable if display_animation is True. savefig: bool, optional If set to True, save the individual frames into path_outputs. fig_dpi: float, optional The resolution in dots per inch. Applicable if savefig is True. fig_format: str, optional Filename extension. Applicable if savefig is True. path_outputs: string, optional Path to folder where to save the frames. Applicable if savefig is True. precip_kwargs: dict, optional Optional parameters that are supplied to :py:func:`pysteps.visualization.precipfields.plot_precip_field`. motion_kwargs: dict, optional Optional parameters that are supplied to :py:func:`pysteps.visualization.motionfields.quiver` or :py:func:`pysteps.visualization.motionfields.streamplot`. map_kwargs: dict, optional Optional parameters that need to be passed to :py:func:`pysteps.visualization.basemaps.plot_geography`. Returns ------- None """ if precip_kwargs is None: precip_kwargs = {} if motion_kwargs is None: motion_kwargs = {} if map_kwargs is None: map_kwargs = {} if precip_fct is not None: if len(precip_fct.shape) == 3: precip_fct = precip_fct[None, ...] n_lead_times = precip_fct.shape[1] n_members = precip_fct.shape[0] else: n_lead_times = 0 n_members = 1 if title is not None and isinstance(title, str): title_first_line = title + "\n" else: title_first_line = "" if motion_plot not in MOTION_VALID_METHODS: raise ValueError( f"Invalid motion plot method '{motion_plot}'." f"Supported: {str(MOTION_VALID_METHODS)}" ) if ptype not in PRECIP_VALID_TYPES: raise ValueError( f"Invalid precipitation type '{ptype}'." f"Supported: {str(PRECIP_VALID_TYPES)}" ) if timestamps_obs is not None: if len(timestamps_obs) != precip_obs.shape[0]: raise ValueError( f"The number of timestamps does not match the size of precip_obs: " f"{len(timestamps_obs)} != {precip_obs.shape[0]}" ) if precip_fct is not None: reftime_str = timestamps_obs[-1].strftime("%Y%m%d%H%M") else: reftime_str = timestamps_obs[0].strftime("%Y%m%d%H%M") else: reftime_str = None if ptype == "prob" and prob_thr is None: raise ValueError("ptype 'prob' needs a prob_thr value") if ptype != "ensemble": n_members = 1 n_obs = precip_obs.shape[0] loop = 0 while loop < nloops: for n in range(n_members): for i in range(n_obs + n_lead_times): plt.clf() # Observations if i < n_obs and (display_animation or n == 0): title = title_first_line + "Analysis" if timestamps_obs is not None: title += ( f" valid for {timestamps_obs[i].strftime('%Y-%m-%d %H:%M')}" ) plt.clf() if ptype == "prob": prob_field = st.postprocessing.ensemblestats.excprob( precip_obs[None, i, ...], prob_thr ) ax = st.plt.plot_precip_field( prob_field, ptype="prob", geodata=geodata, probthr=prob_thr, title=title, map_kwargs=map_kwargs, **precip_kwargs, ) else: ax = st.plt.plot_precip_field( precip_obs[i, :, :], geodata=geodata, title=title, map_kwargs=map_kwargs, **precip_kwargs, ) if motion_field is not None: if motion_plot == "quiver": st.plt.quiver( motion_field, ax=ax, geodata=geodata, **motion_kwargs ) elif motion_plot == "streamplot": st.plt.streamplot( motion_field, ax=ax, geodata=geodata, **motion_kwargs ) if savefig & (loop == 0): figtags = [reftime_str, ptype, f"f{i:02d}"] figname = "_".join([tag for tag in figtags if tag]) filename = os.path.join(path_outputs, f"{figname}.{fig_format}") plt.savefig(filename, bbox_inches="tight", dpi=fig_dpi) print("saved: ", filename) # Forecasts elif i >= n_obs and precip_fct is not None: title = title_first_line + "Forecast" if timestamps_obs is not None: title += f" valid for {timestamps_obs[-1].strftime('%Y-%m-%d %H:%M')}" if timestep_min is not None: title += " +%02d min" % ((1 + i - n_obs) * timestep_min) else: title += " +%02d" % (1 + i - n_obs) plt.clf() if ptype == "prob": prob_field = st.postprocessing.ensemblestats.excprob( precip_fct[:, i - n_obs, :, :], prob_thr ) ax = st.plt.plot_precip_field( prob_field, ptype="prob", geodata=geodata, probthr=prob_thr, title=title, map_kwargs=map_kwargs, **precip_kwargs, ) elif ptype == "mean": ens_mean = st.postprocessing.ensemblestats.mean( precip_fct[:, i - n_obs, :, :] ) ax = st.plt.plot_precip_field( ens_mean, geodata=geodata, title=title, map_kwargs=map_kwargs, **precip_kwargs, ) else: ax = st.plt.plot_precip_field( precip_fct[n, i - n_obs, ...], geodata=geodata, title=title, map_kwargs=map_kwargs, **precip_kwargs, ) if motion_field is not None: if motion_plot == "quiver": st.plt.quiver( motion_field, ax=ax, geodata=geodata, **motion_kwargs ) elif motion_plot == "streamplot": st.plt.streamplot( motion_field, ax=ax, geodata=geodata, **motion_kwargs ) if ptype == "ensemble" and n_members > 1: plt.text( 0.01, 0.99, "m %02d" % (n + 1), transform=ax.transAxes, ha="left", va="top", ) if savefig & (loop == 0): figtags = [reftime_str, ptype, f"f{i:02d}", f"m{n + 1:02d}"] figname = "_".join([tag for tag in figtags if tag]) filename = os.path.join(path_outputs, f"{figname}.{fig_format}") plt.savefig(filename, bbox_inches="tight", dpi=fig_dpi) print("saved: ", filename) if display_animation: plt.pause(time_wait) if display_animation: plt.pause(2 * time_wait) loop += 1 plt.close() ================================================ FILE: pysteps/visualization/basemaps.py ================================================ # -*- coding: utf-8 -*- """ pysteps.visualization.basemaps ============================== Methods for plotting geographical maps using Cartopy. .. autosummary:: :toctree: ../generated/ plot_geography plot_map_cartopy """ from matplotlib import gridspec import matplotlib.pylab as plt import numpy as np import warnings from pysteps.exceptions import MissingOptionalDependency try: import cartopy.crs as ccrs import cartopy.feature as cfeature from cartopy.mpl.geoaxes import GeoAxesSubplot CARTOPY_IMPORTED = True except ImportError: CARTOPY_IMPORTED = False try: import pyproj PYPROJ_IMPORTED = True except ImportError: PYPROJ_IMPORTED = False from . import utils VALID_BASEMAPS = ("cartopy",) ######################### # Basemap features zorder # - ocean: 0 # - land: 0 # - lakes: 0 # - rivers_lake_centerlines: 0 # - coastline: 15 # - cultural: 15 # - reefs: 15 # - minor_islands: 15 def plot_geography( proj4str, extent, lw=0.5, drawlonlatlines=False, drawlonlatlabels=True, plot_map="cartopy", scale="50m", subplot=None, **kwargs, ): """ Plot geographical map in a chosen projection using cartopy. .. _SubplotSpec: https://matplotlib.org/api/_as_gen/matplotlib.gridspec.SubplotSpec.html Parameters ---------- proj4str: str The PROJ.4-compatible projection string. extent: scalars (left, right, bottom, top) The bounding box in proj4str coordinates. lw: float, optional` Linewidth of the map (administrative boundaries and coastlines). drawlonlatlines: bool, optional If set to True, draw longitude and latitude lines. drawlonlatlabels: bool, optional If set to True, draw longitude and latitude labels. Valid only if 'drawlonlatlines' is True. plot_map: {'cartopy', None}, optional The type of basemap, either 'cartopy' or None. If None, the figure axis is returned without any basemap drawn. Default `'cartopy'`. scale: {'10m', '50m', '110m'}, optional The scale (resolution). Applicable if 'plot_map' is 'cartopy'. The available options are '10m', '50m', and '110m'. Default ``'50m'``. subplot: tuple of int (nrows, ncols, index) or SubplotSpec_ instance, optional The subplot where to plot the basemap. By the default, the basemap will replace the current axis. Returns ------- ax: fig Axes Cartopy axes. """ if len(kwargs) > 0: warnings.warn( "plot_geography: The following keywords are ignored:\n" + str(kwargs) + "\nIn version 1.5, passing unsupported arguments will raise an error.", DeprecationWarning, ) if plot_map is None: return plt.gca() if plot_map not in VALID_BASEMAPS: raise ValueError( f"unsupported plot_map method {plot_map}. Supported basemaps: " f"{VALID_BASEMAPS}" ) if plot_map == "cartopy" and not CARTOPY_IMPORTED: warnings.warn( "The cartopy package is required to plot the geographical map but it is " "not installed. Ignoring the geographic information." ) return plt.gca() if not PYPROJ_IMPORTED: warnings.warn( "the pyproj package is required to plot the geographical map " "but it is not installed" ) return plt.gca() crs = utils.proj4_to_cartopy(proj4str) ax = plot_map_cartopy( crs, extent, scale, drawlonlatlines=drawlonlatlines, drawlonlatlabels=drawlonlatlabels, lw=lw, subplot=subplot, ) return ax def plot_map_cartopy( crs, extent, cartopy_scale, drawlonlatlines=False, drawlonlatlabels=True, lw=0.5, subplot=None, ): """ Plot coastlines, countries, rivers and meridians/parallels using cartopy. .. _SubplotSpec: https://matplotlib.org/api/_as_gen/matplotlib.gridspec.SubplotSpec.html Parameters ---------- crs: object Instance of a crs class defined in cartopy.crs. It can be created using utils.proj4_to_cartopy. extent: scalars (left, right, bottom, top) The coordinates of the bounding box. drawlonlatlines: bool Whether to plot longitudes and latitudes. drawlonlatlabels: bool, optional If set to True, draw longitude and latitude labels. Valid only if 'drawlonlatlines' is True. cartopy_scale: {'10m', '50m', '110m'} The scale (resolution) of the map. The available options are '10m', '50m', and '110m'. lw: float Line width. subplot: tuple of int (nrows, ncols, index) or SubplotSpec_ instance, optional The subplot where to place the basemap. By the default, the basemap will replace the current axis. Returns ------- ax: axes Cartopy axes. Compatible with matplotlib. """ if not CARTOPY_IMPORTED: raise MissingOptionalDependency( "the cartopy package is required to plot the geographical map" " but it is not installed" ) if subplot is None: ax = plt.gca() else: if isinstance(subplot, gridspec.SubplotSpec): ax = plt.subplot(subplot, projection=crs) else: ax = plt.subplot(*subplot, projection=crs) if not isinstance(ax, GeoAxesSubplot): ax = plt.subplot(ax.get_subplotspec(), projection=crs) # cax.clear() ax.set_axis_off() ax.add_feature( cfeature.NaturalEarthFeature( "physical", "ocean", scale="50m" if cartopy_scale == "10m" else cartopy_scale, edgecolor="none", facecolor=np.array([0.59375, 0.71484375, 0.8828125]), ), zorder=0, ) ax.add_feature( cfeature.NaturalEarthFeature( "physical", "land", scale=cartopy_scale, edgecolor="none", facecolor=np.array([0.9375, 0.9375, 0.859375]), ), zorder=0, ) ax.add_feature( cfeature.NaturalEarthFeature( "physical", "coastline", scale=cartopy_scale, edgecolor="black", facecolor="none", linewidth=lw, ), zorder=15, ) ax.add_feature( cfeature.NaturalEarthFeature( "physical", "lakes", scale=cartopy_scale, edgecolor="none", facecolor=np.array([0.59375, 0.71484375, 0.8828125]), ), zorder=0, ) ax.add_feature( cfeature.NaturalEarthFeature( "physical", "rivers_lake_centerlines", scale=cartopy_scale, edgecolor=np.array([0.59375, 0.71484375, 0.8828125]), facecolor="none", ), zorder=0, ) ax.add_feature( cfeature.NaturalEarthFeature( "cultural", "admin_0_boundary_lines_land", scale=cartopy_scale, edgecolor="black", facecolor="none", linewidth=lw, ), zorder=15, ) if cartopy_scale in ["10m", "50m"]: ax.add_feature( cfeature.NaturalEarthFeature( "physical", "reefs", scale="10m", edgecolor="black", facecolor="none", linewidth=lw, ), zorder=15, ) ax.add_feature( cfeature.NaturalEarthFeature( "physical", "minor_islands", scale="10m", edgecolor="black", facecolor="none", linewidth=lw, ), zorder=15, ) if drawlonlatlines: grid_lines = ax.gridlines( crs=ccrs.PlateCarree(), draw_labels=drawlonlatlabels, dms=True ) grid_lines.top_labels = grid_lines.right_labels = False grid_lines.y_inline = grid_lines.x_inline = False grid_lines.rotate_labels = False ax.set_extent(extent, crs) return ax ================================================ FILE: pysteps/visualization/motionfields.py ================================================ # -- coding: utf-8 -- """ pysteps.visualization.motionfields ================================== Functions to plot motion fields. .. autosummary:: :toctree: ../generated/ motion_plot quiver streamplot """ from pysteps.visualization import utils VALID_PLOT_TYPES = ("quiver", "streamplot", "stream") ################################# # Motion plots zorder definitions # - quiver: 20 # - stream function: 30 def motion_plot( uv_motion_field, plot_type="quiver", ax=None, geodata=None, axis="on", plot_kwargs=None, map_kwargs=None, step=20, ): """ Function to plot a motion field as arrows (quiver) or as stream lines (streamplot). .. _`quiver_doc`: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.quiver.htm .. _`streamplot_doc`: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.streamplot.html Parameters ---------- uv_motion_field: array-like Array of shape (2,m,n) containing the input motion field. plot_type: str Plot type. "quiver" or "streamplot". ax: axis object Optional axis object to use for plotting. geodata: dictionary or None Optional dictionary containing geographical information about the field. If geodata is not None, it must contain the following key-value pairs: .. tabularcolumns:: |p{1.5cm}|L| +----------------+----------------------------------------------------+ | Key | Value | +================+====================================================+ | projection | PROJ.4-compatible projection definition | +----------------+----------------------------------------------------+ | x1 | x-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | y1 | y-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | x2 | x-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | y2 | y-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | yorigin | a string specifying the location of the first | | | element in the data raster w.r.t. y-axis: | | | 'upper' = upper border, 'lower' = lower border | +----------------+----------------------------------------------------+ axis: {'off','on'}, optional Whether to turn off or on the x and y axis. step: int Optional resample step to control the density of the arrows. plot_kwargs: dict, optional Optional dictionary containing keyword arguments passed to `quiver()` or `streamplot`. For more information, see the `quiver_doc`_ and `streamplot_doc`_ matplotlib's documentation. map_kwargs: dict Optional parameters that need to be passed to :py:func:`pysteps.visualization.basemaps.plot_geography`. Returns ------- out: axis object Figure axes. Needed if one wants to add e.g. text inside the plot. """ if plot_type not in VALID_PLOT_TYPES: raise ValueError( f"Invalid plot_type: {plot_type}.\nSupported: {str(VALID_PLOT_TYPES)}" ) if plot_kwargs is None: plot_kwargs = {} if map_kwargs is None: map_kwargs = {} # Assumes the input dimensions are lat/lon _, nlat, nlon = uv_motion_field.shape x_grid, y_grid, extent, _, _ = utils.get_geogrid(nlat, nlon, geodata=geodata) ax = utils.get_basemap_axis(extent, ax=ax, geodata=geodata, map_kwargs=map_kwargs) ########################################################### # Undersample the number of grid points to use in the plots skip = (slice(None, None, step), slice(None, None, step)) dx = uv_motion_field[0, :, :][skip] dy = uv_motion_field[1, :, :][skip].copy() x_grid = x_grid[skip] y_grid = y_grid[skip] # If we have yorigin"="upper" we flip the y axes for the motion field in the y axis. if geodata is None or geodata["yorigin"] == "upper": dy *= -1 if plot_type.lower() == "quiver": ax.quiver(x_grid, y_grid, dx, dy, angles="xy", zorder=20, **plot_kwargs) else: ax.streamplot(x_grid, y_grid, dx, dy, zorder=30, **plot_kwargs) # Quiver sometimes do not produce tight axes ax.autoscale(enable=True, axis="both", tight=True) if geodata is None or axis == "off": ax.xaxis.set_ticks([]) ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticks([]) ax.yaxis.set_ticklabels([]) return ax def quiver( uv_motion_field, ax=None, geodata=None, axis="on", step=20, quiver_kwargs=None, map_kwargs=None, ): """Function to plot a motion field as arrows. Wrapper for :func:`pysteps.visualization.motionfields.motion_plot` passing `plot_type="quiver"`. .. _`quiver_doc`: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.quiver.html Parameters ---------- uv_motion_field: array-like Array of shape (2, m,n) containing the input motion field. quiver_kwargs: dict, optional Optional dictionary containing keyword arguments for the quiver method. This argument is passed to See the `quiver_doc`_ matplotlib's documentation. Other parameters ---------------- See :py::func:`pysteps.visualization.motionfields.motion_plot`. Returns ------- out: axis object0 Figure axes. Needed if one wants to add e.g. text inside the plot. """ if quiver_kwargs is None: quiver_kwargs = dict() return motion_plot( uv_motion_field, plot_type="quiver", ax=ax, geodata=geodata, axis=axis, step=step, plot_kwargs=quiver_kwargs, map_kwargs=map_kwargs, ) def streamplot( uv_motion_field, ax=None, geodata=None, axis="on", streamplot_kwargs=None, map_kwargs=None, step=20, ): """ Function to plot a motion field as streamlines. Wrapper for :func:`pysteps.visualization.motionfields.motion_plot` passing `plot_type="streamplot"`. .. _`streamplot_doc`: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.streamplot.html Parameters ---------- uv_motion_field: array-like Array of shape (2, m,n) containing the input motion field. streamplot_kwargs: dict, optional Optional dictionary containing keyword arguments for the quiver method. This argument is passed to See the `streamplot_doc`_ matplotlib's documentation. Other parameters ---------------- See :py:func:`pysteps.visualization.motionfields.motion_plot`. Returns ------- out: axis object Figure axes. Needed if one wants to add e.g. text inside the plot. """ if streamplot_kwargs is None: streamplot_kwargs = dict() return motion_plot( uv_motion_field, plot_type="streamplot", ax=ax, geodata=geodata, axis=axis, step=step, plot_kwargs=streamplot_kwargs, map_kwargs=map_kwargs, ) ================================================ FILE: pysteps/visualization/precipfields.py ================================================ # -*- coding: utf-8 -*- """ pysteps.visualization.precipfields ================================== Methods for plotting precipitation fields. .. autosummary:: :toctree: ../generated/ plot_precip_field get_colormap """ import copy import warnings import matplotlib.pylab as plt import numpy as np from matplotlib import pyplot, colors from pysteps.visualization.utils import get_geogrid, get_basemap_axis PRECIP_VALID_TYPES = ("intensity", "depth", "prob") PRECIP_VALID_UNITS = ("mm/h", "mm", "dBZ") ############################ # precipitation plots zorder # - precipitation: 10 def plot_precip_field( precip, ptype="intensity", ax=None, geodata=None, units="mm/h", bbox=None, colorscale="pysteps", probthr=None, title=None, colorbar=True, axis="on", cax=None, map_kwargs=None, colormap_config=None, ): """ Function to plot a precipitation intensity or probability field with a colorbar. .. _Axes: https://matplotlib.org/api/axes_api.html#matplotlib.axes.Axes .. _SubplotSpec: https://matplotlib.org/api/_as_gen/matplotlib.gridspec.SubplotSpec.html Parameters ---------- precip: array-like Two-dimensional array containing the input precipitation field or an exceedance probability map. ptype: {'intensity', 'depth', 'prob'}, optional Type of the map to plot: 'intensity' = precipitation intensity field, 'depth' = precipitation depth (accumulation) field, 'prob' = exceedance probability field. geodata: dictionary or None, optional Optional dictionary containing geographical information about the field. Required is map is not None. If geodata is not None, it must contain the following key-value pairs: .. tabularcolumns:: |p{1.5cm}|L| +-----------------+---------------------------------------------------+ | Key | Value | +=================+===================================================+ | projection | PROJ.4-compatible projection definition | +-----------------+---------------------------------------------------+ | x1 | x-coordinate of the lower-left corner of the data | | | raster | +-----------------+---------------------------------------------------+ | y1 | y-coordinate of the lower-left corner of the data | | | raster | +-----------------+---------------------------------------------------+ | x2 | x-coordinate of the upper-right corner of the | | | data raster | +-----------------+---------------------------------------------------+ | y2 | y-coordinate of the upper-right corner of the | | | data raster | +-----------------+---------------------------------------------------+ | yorigin | a string specifying the location of the first | | | element in the data raster w.r.t. y-axis: | | | 'upper' = upper border, 'lower' = lower border | +-----------------+---------------------------------------------------+ units : {'mm/h', 'mm', 'dBZ'}, optional Units of the input array. If ptype is 'prob', this specifies the unit of the intensity threshold. bbox : tuple, optional Four-element tuple specifying the coordinates of the bounding box. Use this for plotting a subdomain inside the input grid. The coordinates are of the form (lower left x, lower left y ,upper right x, upper right y). If 'geodata' is not None, the bbox is in map coordinates, otherwise it represents image pixels. colorscale : {'pysteps', 'STEPS-BE', 'STEPS-NL', 'BOM-RF3'}, optional Which colorscale to use. Applicable if units is 'mm/h', 'mm' or 'dBZ'. probthr : float, optional Intensity threshold to show in the color bar of the exceedance probability map. Required if ptype is "prob" and colorbar is True. title : str, optional If not None, print the title on top of the plot. colorbar : bool, optional If set to True, add a colorbar on the right side of the plot. axis : {'off','on'}, optional Whether to turn off or on the x and y axis. cax : Axes_ object, optional Axes into which the colorbar will be drawn. If no axes is provided the colorbar axes are created next to the plot. colormap_config : ColormapConfig, optional Custom colormap configuration. If provided, this will override the colorscale parameter. The ColormapConfig class must have the following attributes: cmap, norm, clevs. Other parameters ---------------- map_kwargs: dict Optional parameters that need to be passed to :py:func:`pysteps.visualization.basemaps.plot_geography`. Returns ------- ax : fig Axes_ Figure axes. Needed if one wants to add e.g. text inside the plot. """ if map_kwargs is None: map_kwargs = {} if ptype not in PRECIP_VALID_TYPES: raise ValueError( f"Invalid precipitation type '{ptype}'." f"Supported: {str(PRECIP_VALID_TYPES)}" ) if units not in PRECIP_VALID_UNITS: raise ValueError( f"Invalid precipitation units '{units}." f"Supported: {str(PRECIP_VALID_UNITS)}" ) if ptype == "prob" and colorbar and probthr is None: raise ValueError("ptype='prob' but probthr not specified") if len(precip.shape) != 2: raise ValueError("The input is not two-dimensional array") # Assumes the input dimensions are lat/lon nlat, nlon = precip.shape x_grid, y_grid, extent, regular_grid, origin = get_geogrid( nlat, nlon, geodata=geodata ) ax = get_basemap_axis(extent, ax=ax, geodata=geodata, map_kwargs=map_kwargs) precip = np.ma.masked_invalid(precip) # Handle colormap configuration if colormap_config is None: cmap, norm, clevs, clevs_str = get_colormap(ptype, units, colorscale) else: cmap, norm, clevs = _validate_colormap_config(colormap_config, ptype) clevs_str = _dynamic_formatting_floats(clevs) # Plot the precipitation field if regular_grid: im = _plot_field(precip, ax, extent, cmap, norm, origin=origin) else: im = _plot_field(precip, ax, extent, cmap, norm, x_grid=x_grid, y_grid=y_grid) plt.title(title) # Add colorbar if colorbar: if ptype in ["intensity", "depth"]: extend = "max" else: extend = "neither" cbar = plt.colorbar( im, ticks=clevs, spacing="uniform", extend=extend, shrink=0.8, cax=cax ) if clevs_str is not None: cbar.ax.set_yticklabels(clevs_str) if ptype == "intensity": cbar.set_label(f"Precipitation intensity [{units}]") elif ptype == "depth": cbar.set_label(f"Precipitation depth [{units}]") else: cbar.set_label(f"P(R > {probthr:.1f} {units})") if geodata is None or axis == "off": ax.xaxis.set_ticks([]) ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticks([]) ax.yaxis.set_ticklabels([]) if bbox is not None: ax.set_xlim(bbox[0], bbox[2]) ax.set_ylim(bbox[1], bbox[3]) return ax def _plot_field(precip, ax, extent, cmap, norm, origin=None, x_grid=None, y_grid=None): precip = precip.copy() if (x_grid is None) or (y_grid is None): im = ax.imshow( precip, cmap=cmap, norm=norm, extent=extent, interpolation="nearest", origin=origin, zorder=10, ) else: im = ax.pcolormesh( x_grid, y_grid, precip, cmap=cmap, norm=norm, zorder=10, ) return im def get_colormap(ptype, units="mm/h", colorscale="pysteps"): """ Function to generate a colormap (cmap) and norm. Parameters ---------- ptype : {'intensity', 'depth', 'prob'}, optional Type of the map to plot: 'intensity' = precipitation intensity field, 'depth' = precipitation depth (accumulation) field, 'prob' = exceedance probability field. units : {'mm/h', 'mm', 'dBZ'}, optional Units of the input array. If ptype is 'prob', this specifies the unit of the intensity threshold. colorscale : {'pysteps', 'STEPS-BE', 'STEPS-NL', 'BOM-RF3'}, optional Which colorscale to use. Applicable if units is 'mm/h', 'mm' or 'dBZ'. Returns ------- cmap : Colormap instance colormap norm : colors.Normalize object Colors norm clevs: list(float) List of precipitation values defining the color limits. clevs_str: list(str) List of precipitation values defining the color limits (with correct number of decimals). """ if ptype in ["intensity", "depth"]: # Get list of colors color_list, clevs, clevs_str = _get_colorlist(units, colorscale) cmap = colors.LinearSegmentedColormap.from_list( "cmap", color_list, len(clevs) - 1 ) if colorscale == "BOM-RF3": cmap.set_over("black", 1) if colorscale == "pysteps": cmap.set_over("darkred", 1) if colorscale == "STEPS-NL": cmap.set_over("darkmagenta", 1) if colorscale == "STEPS-BE": cmap.set_over("black", 1) norm = colors.BoundaryNorm(clevs, cmap.N) cmap.set_bad("gray", alpha=0.5) cmap.set_under("none") return cmap, norm, clevs, clevs_str if ptype == "prob": cmap = copy.copy(plt.get_cmap("OrRd", 10)) cmap.set_bad("gray", alpha=0.5) cmap.set_under("none") clevs = np.linspace(0, 1, 11) clevs[0] = 1e-3 # to set zeros to transparent norm = colors.BoundaryNorm(clevs, cmap.N) clevs_str = [f"{clev:.1f}" for clev in clevs] return cmap, norm, clevs, clevs_str return pyplot.get_cmap("jet"), colors.Normalize(), None, None def _get_colorlist(units="mm/h", colorscale="pysteps"): """ Function to get a list of colors to generate the colormap. Parameters ---------- units : str Units of the input array (mm/h, mm or dBZ) colorscale : str Which colorscale to use (BOM-RF3, pysteps, STEPS-BE, STEPS-NL) Returns ------- color_list : list(str) List of color strings. clevs : list(float) List of precipitation values defining the color limits. clevs_str : list(str) List of precipitation values defining the color limits (with correct number of decimals). """ if colorscale == "BOM-RF3": color_list = np.array( [ (255, 255, 255), # 0.0 (245, 245, 255), # 0.2 (180, 180, 255), # 0.5 (120, 120, 255), # 1.5 (20, 20, 255), # 2.5 (0, 216, 195), # 4.0 (0, 150, 144), # 6.0 (0, 102, 102), # 10 (255, 255, 0), # 15 (255, 200, 0), # 20 (255, 150, 0), # 30 (255, 100, 0), # 40 (255, 0, 0), # 50 (200, 0, 0), # 60 (120, 0, 0), # 75 (40, 0, 0), ] ) # > 100 color_list = color_list / 255.0 if units == "mm/h": clevs = [ 0.0, 0.2, 0.5, 1.5, 2.5, 4, 6, 10, 15, 20, 30, 40, 50, 60, 75, 100, 150, ] elif units == "mm": clevs = [ 0.0, 0.2, 0.5, 1.5, 2.5, 4, 5, 7, 10, 15, 20, 25, 30, 35, 40, 45, 50, ] else: raise ValueError("Wrong units in get_colorlist: %s" % units) elif colorscale == "pysteps": # pinkHex = '#%02x%02x%02x' % (232, 215, 242) redgrey_hex = "#%02x%02x%02x" % (156, 126, 148) color_list = [ redgrey_hex, "#640064", "#AF00AF", "#DC00DC", "#3232C8", "#0064FF", "#009696", "#00C832", "#64FF00", "#96FF00", "#C8FF00", "#FFFF00", "#FFC800", "#FFA000", "#FF7D00", "#E11900", ] if units in ["mm/h", "mm"]: clevs = [ 0.08, 0.16, 0.25, 0.40, 0.63, 1, 1.6, 2.5, 4, 6.3, 10, 16, 25, 40, 63, 100, 160, ] elif units == "dBZ": clevs = np.arange(10, 65, 5) else: raise ValueError("Wrong units in get_colorlist: %s" % units) elif colorscale == "STEPS-NL": redgrey_hex = "#%02x%02x%02x" % (156, 126, 148) color_list = [ "lightgrey", "lightskyblue", "deepskyblue", "blue", "darkblue", "yellow", "gold", "darkorange", "red", "darkred", ] if units in ["mm/h", "mm"]: clevs = [0.1, 0.5, 1.0, 1.6, 2.5, 4.0, 6.4, 10.0, 16.0, 25.0, 40.0] else: raise ValueError("Wrong units in get_colorlist: %s" % units) elif colorscale == "STEPS-BE": color_list = [ "cyan", "deepskyblue", "dodgerblue", "blue", "chartreuse", "limegreen", "green", "darkgreen", "yellow", "gold", "orange", "red", "magenta", "darkmagenta", ] if units in ["mm/h", "mm"]: clevs = [0.1, 0.25, 0.4, 0.63, 1, 1.6, 2.5, 4, 6.3, 10, 16, 25, 40, 63, 100] elif units == "dBZ": clevs = np.arange(10, 65, 5) else: raise ValueError("Wrong units in get_colorlist: %s" % units) else: print("Invalid colorscale", colorscale) raise ValueError("Invalid colorscale " + colorscale) # Generate color level strings with correct amount of decimal places clevs_str = _dynamic_formatting_floats(clevs) return color_list, clevs, clevs_str def _dynamic_formatting_floats(float_array, colorscale="pysteps"): """Function to format the floats defining the class limits of the colorbar.""" float_array = np.array(float_array, dtype=float) labels = [] for label in float_array: if 0.1 <= label < 1: if colorscale == "pysteps": formatting = ",.2f" else: formatting = ",.1f" elif 0.01 <= label < 0.1: formatting = ",.2f" elif 0.001 <= label < 0.01: formatting = ",.3f" elif 0.0001 <= label < 0.001: formatting = ",.4f" elif label >= 1 and label.is_integer(): formatting = "i" else: formatting = ",.1f" if formatting != "i": labels.append(format(label, formatting)) else: labels.append(str(int(label))) return labels def _validate_colormap_config(colormap_config, ptype): """Validate the colormap configuration provided by the user.""" # Ensure colormap_config has the necessary attributes required_attrs = ["cmap", "norm", "clevs"] missing_attrs = [ attr for attr in required_attrs if not hasattr(colormap_config, attr) ] if missing_attrs: raise ValueError( f"colormap_config is missing required attributes: {', '.join(missing_attrs)}" ) # Ensure that ptype is appropriate when colormap_config is provided if ptype not in ["intensity", "depth"]: raise ValueError( "colormap_config is only supported for ptype='intensity' or 'depth'" ) cmap = colormap_config.cmap clevs = colormap_config.clevs # Validate that the number of colors matches len(clevs) if isinstance(cmap, colors.ListedColormap): num_colors = len(cmap.colors) else: num_colors = cmap.N expected_colors = len(clevs) if num_colors != expected_colors: raise ValueError( f"Number of colors in colormap (N={num_colors}) does not match len(clevs) (N={expected_colors})." ) return colormap_config.cmap, colormap_config.norm, colormap_config.clevs ================================================ FILE: pysteps/visualization/spectral.py ================================================ # -*- coding: utf-8 -*- """ pysteps.visualization.spectral ============================== Methods for plotting Fourier spectra. .. autosummary:: :toctree: ../generated/ plot_spectrum1d """ import matplotlib.pylab as plt import numpy as np def plot_spectrum1d( fft_freq, fft_power, x_units=None, y_units=None, wavelength_ticks=None, color="k", lw=1.0, label=None, ax=None, **kwargs, ): """ Function to plot in log-log a radially averaged Fourier spectrum. Parameters ---------- fft_freq: array-like 1d array containing the Fourier frequencies computed with the function :py:func:`pysteps.utils.spectral.rapsd`. fft_power: array-like 1d array containing the radially averaged Fourier power spectrum computed with the function :py:func:`pysteps.utils.spectral.rapsd`. x_units: str, optional Units of the X variable (distance, e.g. "km"). y_units: str, optional Units of the Y variable (amplitude, e.g. "dBR"). wavelength_ticks: array-like, optional List of wavelengths where to show xticklabels. color: str, optional Line color. lw: float, optional Line width. label: str, optional Label (for legend). ax: Axes, optional Plot axes. Returns ------- ax: Axes Plot axes """ # Check input dimensions n_freq = len(fft_freq) n_pow = len(fft_power) if n_freq != n_pow: raise ValueError( f"Dimensions of the 1d input arrays must be equal. {n_freq} vs {n_pow}" ) if ax is None: ax = plt.subplot(111) # Plot spectrum in log-log scale ax.plot( 10 * np.log10(fft_freq), 10 * np.log10(fft_power), color=color, linewidth=lw, label=label, **kwargs, ) # X-axis if wavelength_ticks is not None: wavelength_ticks = np.array(wavelength_ticks) freq_ticks = 1 / wavelength_ticks ax.set_xticks(10 * np.log10(freq_ticks)) ax.set_xticklabels(wavelength_ticks) if x_units is not None: ax.set_xlabel(f"Wavelength [{x_units}]") else: if x_units is not None: ax.set_xlabel(f"Frequency [1/{x_units}]") # Y-axis if y_units is not None: # { -> {{ with f-strings power_units = rf"$10log_{{ 10 }}(\frac{{ {y_units}^2 }}{{ {x_units} }})$" ax.set_ylabel(f"Power {power_units}") return ax ================================================ FILE: pysteps/visualization/thunderstorms.py ================================================ # -*- coding: utf-8 -*- """ pysteps.visualization.tstorm ============================ Methods for plotting thunderstorm cells. Created on Wed Nov 4 11:09:44 2020 @author: mfeldman .. autosummary:: :toctree: ../generated/ plot_track plot_cart_contour """ import matplotlib.pyplot as plt import numpy as np ################################ # track and contour plots zorder # - precipitation: 40 def plot_track(track_list, geodata=None, ref_shape=None): """ Plot storm tracks. .. _Axes: https://matplotlib.org/api/axes_api.html#matplotlib.axes.Axes Parameters ---------- track_list: list List of tracks provided by DATing. geodata: dictionary or None, optional Optional dictionary containing geographical information about the field. If not None, plots the contours in a georeferenced frame. ref_shape: (vertical, horizontal) Shape of the 2D precipitation field used to find the cells' contours. This is only needed only if `geodata=None`. IMPORTANT: If `geodata=None` it is assumed that the y-origin of the reference precipitation fields is the upper-left corner (yorigin="upper"). Returns ------- ax: fig Axes_ Figure axes. """ ax = plt.gca() pix2coord = _pix2coord_factory(geodata, ref_shape) color = iter(plt.cm.spring(np.linspace(0, 1, len(track_list)))) for track in track_list: cen_x, cen_y = pix2coord(track.cen_x, track.cen_y) ax.plot(cen_x, cen_y, c=next(color), zorder=40) return ax def plot_cart_contour(contours, geodata=None, ref_shape=None): """ Plots input image with identified cell contours. Also, this function can be user to add points of interest to a plot. .. _Axes: https://matplotlib.org/api/axes_api.html#matplotlib.axes.Axes Parameters ---------- contours: list or dataframe-element list of identified cell contours. geodata: dictionary or None, optional Optional dictionary containing geographical information about the field. If not None, plots the contours in a georeferenced frame. ref_shape: (vertical, horizontal) Shape of the 2D precipitation field used to find the cells' contours. This is only needed only if `geodata=None`. IMPORTANT: If `geodata=None` it is assumed that the y-origin of the reference precipitation fields is the upper-left corner (yorigin="upper"). Returns ------- ax: fig Axes_ Figure axes. """ ax = plt.gca() pix2coord = _pix2coord_factory(geodata, ref_shape) contours = list(contours) for contour in contours: for c in contour: x, y = pix2coord(c[:, 1], c[:, 0]) ax.plot(x, y, color="black", zorder=40) return ax def _pix2coord_factory(geodata, ref_shape): """ Construct the pix2coord transformation function.""" if geodata is not None: def pix2coord(x_input, y_input): x = geodata["x1"] + geodata["xpixelsize"] * x_input if geodata["yorigin"] == "lower": y = geodata["y1"] + geodata["ypixelsize"] * y_input else: y = geodata["y2"] - geodata["ypixelsize"] * y_input return x, y else: if ref_shape is None: raise ValueError("'ref_shape' can't be None when not geodata is available.") # Default pix2coord function when no geographical information is present. def pix2coord(x_input, y_input): # yorigin is "upper" by default return x_input, ref_shape[0] - y_input return pix2coord ================================================ FILE: pysteps/visualization/utils.py ================================================ """ pysteps.visualization.utils =========================== Miscellaneous utility functions for the visualization module. .. autosummary:: :toctree: ../generated/ parse_proj4_string proj4_to_cartopy reproject_geodata get_geogrid get_basemap_axis """ import warnings import matplotlib.pylab as plt import numpy as np from pysteps.exceptions import MissingOptionalDependency from pysteps.visualization import basemaps try: import cartopy.crs as ccrs from cartopy.mpl.geoaxes import GeoAxesSubplot PYPROJ_PROJECTION_TO_CARTOPY = dict( tmerc=ccrs.TransverseMercator, laea=ccrs.LambertAzimuthalEqualArea, lcc=ccrs.LambertConformal, merc=ccrs.Mercator, utm=ccrs.UTM, stere=ccrs.Stereographic, aea=ccrs.AlbersEqualArea, aeqd=ccrs.AzimuthalEquidistant, # Note: ccrs.epsg(2056) doesn't work because the projection # limits are too strict. # We'll use the Stereographic projection as an alternative. somerc=ccrs.Stereographic, geos=ccrs.Geostationary, ) CARTOPY_IMPORTED = True except ImportError: CARTOPY_IMPORTED = False PYPROJ_PROJECTION_TO_CARTOPY = dict() GeoAxesSubplot = None ccrs = None try: import pyproj PYPROJ_IMPORTED = True except ImportError: PYPROJ_IMPORTED = False PYPROJ_PROJ_KWRDS_TO_CARTOPY = { "lon_0": "central_longitude", "lat_0": "central_latitude", "lat_ts": "true_scale_latitude", "x_0": "false_easting", "y_0": "false_northing", "k": "scale_factor", "zone": "zone", } PYPROJ_GLOB_KWRDS_TO_CARTOPY = { "a": "semimajor_axis", "b": "semiminor_axis", "datum": "datum", "ellps": "ellipse", "f": "flattening", "rf": "inverse_flattening", } def parse_proj4_string(proj4str): """ Construct a dictionary from a PROJ.4 projection string. Parameters ---------- proj4str: str A PROJ.4-compatible projection string. Returns ------- out: dict Dictionary, where keys and values are parsed from the projection parameter tokens beginning with '+'. """ if not PYPROJ_IMPORTED: raise MissingOptionalDependency( "pyproj package is required for parse_proj4_string function utility " "but it is not installed" ) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) # Ignore the warning raised by to_dict() about losing information. proj_dict = pyproj.Proj(proj4str).crs.to_dict() return proj_dict def proj4_to_cartopy(proj4str): """ Convert a PROJ.4 projection string into a Cartopy coordinate reference system (crs) object. Parameters ---------- proj4str: str A PROJ.4-compatible projection string. Returns ------- out: object Instance of a crs class defined in cartopy.crs. """ if not CARTOPY_IMPORTED: raise MissingOptionalDependency( "cartopy package is required for proj4_to_cartopy function " "utility but it is not installed" ) if not PYPROJ_IMPORTED: raise MissingOptionalDependency( "pyproj package is required for proj4_to_cartopy function utility " "but it is not installed" ) proj = pyproj.Proj(proj4str) try: # pyproj >= 2.2.0 is_geographic = proj.crs.is_geographic except AttributeError: # pyproj < 2.2.0 is_geographic = proj.is_latlong() if is_geographic: return ccrs.PlateCarree() proj_dict = parse_proj4_string(proj4str) cartopy_crs_kwargs = dict() globe_kwargs = dict() cartopy_crs = None globe = None for key, value in proj_dict.items(): if key == "proj": if value in PYPROJ_PROJECTION_TO_CARTOPY: cartopy_crs = PYPROJ_PROJECTION_TO_CARTOPY[value] else: raise ValueError(f"Unsupported projection: {value}") if key in PYPROJ_PROJ_KWRDS_TO_CARTOPY: cartopy_crs_kwargs[PYPROJ_PROJ_KWRDS_TO_CARTOPY[key]] = value if key in PYPROJ_GLOB_KWRDS_TO_CARTOPY: globe_kwargs[PYPROJ_GLOB_KWRDS_TO_CARTOPY[key]] = value # issubset: <= if {"lat_1", "lat_2"} <= proj_dict.keys(): cartopy_crs_kwargs["standard_parallels"] = ( proj_dict["lat_1"], proj_dict["lat_2"], ) if "R" in proj_dict.keys(): globe_kwargs["semimajor_axis"] = proj_dict["R"] globe_kwargs["semiminor_axis"] = proj_dict["R"] if globe_kwargs: globe = ccrs.Globe(**globe_kwargs) if isinstance(cartopy_crs, ccrs.Mercator): cartopy_crs_kwargs.pop("false_easting", None) cartopy_crs_kwargs.pop("false_northing", None) return cartopy_crs(globe=globe, **cartopy_crs_kwargs) def reproject_geodata(geodata, t_proj4str, return_grid=None): """ Reproject geodata and optionally create a grid in a new projection. Parameters ---------- geodata: dictionary Dictionary containing geographical information about the field. It must contain the attributes projection, x1, x2, y1, y2, xpixelsize, ypixelsize, as defined in the documentation of pysteps.io.importers. t_proj4str: str The target PROJ.4-compatible projection string (fallback). return_grid: {None, 'coords', 'quadmesh'}, optional Whether to return the coordinates of the projected grid. The default return_grid=None does not compute the grid, return_grid='coords' returns the centers of projected grid points, return_grid='quadmesh' returns the coordinates of the quadrilaterals (e.g. to be used by pcolormesh). Returns ------- geodata: dictionary Dictionary containing the reprojected geographical information and optionally the required X_grid and Y_grid. It also includes a fixed boolean attribute regular_grid=False to indicate that the reprojected grid has no regular spacing. """ if not PYPROJ_IMPORTED: raise MissingOptionalDependency( "pyproj package is required for reproject_geodata function utility" " but it is not installed" ) geodata = geodata.copy() s_proj4str = geodata["projection"] extent = (geodata["x1"], geodata["x2"], geodata["y1"], geodata["y2"]) shape = ( int((geodata["y2"] - geodata["y1"]) / geodata["ypixelsize"]), int((geodata["x2"] - geodata["x1"]) / geodata["xpixelsize"]), ) s_srs = pyproj.Proj(s_proj4str) t_srs = pyproj.Proj(t_proj4str) x1 = extent[0] x2 = extent[1] y1 = extent[2] y2 = extent[3] # Reproject grid on fall-back projection if return_grid is not None: if return_grid == "coords": y_coord = ( np.linspace(y1, y2, shape[0], endpoint=False) + geodata["ypixelsize"] / 2.0 ) x_coord = ( np.linspace(x1, x2, shape[1], endpoint=False) + geodata["xpixelsize"] / 2.0 ) elif return_grid == "quadmesh": y_coord = np.linspace(y1, y2, shape[0] + 1) x_coord = np.linspace(x1, x2, shape[1] + 1) else: raise ValueError("unknown return_grid value %s" % return_grid) x_grid, y_grid = np.meshgrid(x_coord, y_coord) x_grid, y_grid = pyproj.transform( s_srs, t_srs, x_grid.flatten(), y_grid.flatten() ) x_grid = x_grid.reshape((y_coord.size, x_coord.size)) y_grid = y_grid.reshape((y_coord.size, x_coord.size)) geodata["X_grid"] = x_grid geodata["Y_grid"] = y_grid # Reproject extent on fall-back projection x1, y1 = pyproj.transform(s_srs, t_srs, x1, y1) x2, y2 = pyproj.transform(s_srs, t_srs, x2, y2) # update geodata geodata["projection"] = t_proj4str geodata["x1"] = x1 geodata["x2"] = x2 geodata["y1"] = y1 geodata["y2"] = y2 geodata["regular_grid"] = False geodata["xpixelsize"] = None geodata["ypixelsize"] = None return geodata def get_geogrid(nlat, nlon, geodata=None): """ Get the geogrid data. If geodata is None, a regular grid is returned. In this case, it is assumed that the origin of the 2D input data is the upper left corner ("upper"). Parameters ---------- nlat: int Number of grid points along the latitude axis nlon: int Number of grid points along the longitude axis geodata: geodata: dictionary or None Optional dictionary containing geographical information about the field. If geodata is not None, it must contain the following key-value pairs: .. tabularcolumns:: |p{1.5cm}|L| +----------------+----------------------------------------------------+ | Key | Value | +================+====================================================+ | projection | PROJ.4-compatible projection definition | +----------------+----------------------------------------------------+ | x1 | x-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | y1 | y-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | x2 | x-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | y2 | y-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | yorigin | a string specifying the location of the first | | | element in the data raster w.r.t. y-axis: | | | 'upper' = upper border, 'lower' = lower border | +----------------+----------------------------------------------------+ Returns ------- x_grid: 2D array X grid with dimensions of (nlat, nlon) with the same `y-origin` as the one specified in the geodata (or "upper" if geodata is None). y_grid: 2D array Y grid with dimensions of (nlat, nlon) with the same `y-origin` as the one specified in the geodata (or "upper" if geodata is None). extent: tuple Four-element tuple specifying the extent of the domain according to (lower left x, upper right x, lower left y, upper right y). regular_grid: bool True if the grid is regular. False otherwise. origin: str Place the [0, 0] index of the array to plot in the upper left or lower left corner of the axes. """ if geodata is not None: regular_grid = geodata.get("regular_grid", True) x_lims = sorted((geodata["x1"], geodata["x2"])) x, xpixelsize = np.linspace( x_lims[0], x_lims[1], nlon, endpoint=False, retstep=True ) x += xpixelsize / 2.0 y_lims = sorted((geodata["y1"], geodata["y2"])) y, ypixelsize = np.linspace( y_lims[0], y_lims[1], nlat, endpoint=False, retstep=True ) y += ypixelsize / 2.0 extent = (geodata["x1"], geodata["x2"], geodata["y1"], geodata["y2"]) x_grid, y_grid = np.meshgrid(x, y) if geodata["yorigin"] == "upper": y_grid = np.flipud(y_grid) return x_grid, y_grid, extent, regular_grid, geodata["yorigin"] # Default behavior: return a simple regular grid # Assume yorigin = upper x_grid, y_grid = np.meshgrid(np.arange(nlon), np.arange(nlat)) y_grid = np.flipud(y_grid) extent = (0, nlon - 1, 0, nlat - 1) regular_grid = True return x_grid, y_grid, extent, regular_grid, "upper" def get_basemap_axis(extent, geodata=None, ax=None, map_kwargs=None): """ Safely get a basemap axis. If ax is None, the current axis is returned. If geodata is not None and ax is not a cartopy axis already, it creates a basemap axis and return it. Parameters ---------- extent: tuple Four-element tuple specifying the extent of the domain according to (lower left x, upper right x, lower left y, upper right y). geodata: geodata: dictionary or None Optional dictionary containing geographical information about the field. If geodata is not None, it must contain the following key-value pairs: .. tabularcolumns:: |p{1.5cm}|L| +----------------+----------------------------------------------------+ | Key | Value | +================+====================================================+ | projection | PROJ.4-compatible projection definition | +----------------+----------------------------------------------------+ | x1 | x-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | y1 | y-coordinate of the lower-left corner of the data | | | raster | +----------------+----------------------------------------------------+ | x2 | x-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | y2 | y-coordinate of the upper-right corner of the data | | | raster | +----------------+----------------------------------------------------+ | yorigin | a string specifying the location of the first | | | element in the data raster w.r.t. y-axis: | | | 'upper' = upper border, 'lower' = lower border | +----------------+----------------------------------------------------+ ax: axis object Optional axis object to use for plotting. map_kwargs: dict Optional parameters that need to be passed to :py:func:`pysteps.visualization.basemaps.plot_geography`. Returns ------- ax: axis object """ if map_kwargs is None: map_kwargs = dict() if ax is None: # If no axes is passed, use the current axis. ax = plt.gca() # Create the cartopy axis if the axis is not a cartopy axis. if geodata is not None: if not CARTOPY_IMPORTED: warnings.warn( "cartopy package is required for the get_geogrid function " "but it is not installed. Ignoring geographical information." ) return ax if not PYPROJ_IMPORTED: warnings.warn( "pyproj package is required for the get_geogrid function " "but it is not installed. Ignoring geographical information." ) return ax if not isinstance(ax, GeoAxesSubplot): # Check `ax` is not a GeoAxesSubplot axis to avoid overwriting the map. ax = basemaps.plot_geography(geodata["projection"], extent, **map_kwargs) return ax ================================================ FILE: requirements.txt ================================================ numpy opencv-python pillow pyproj scipy matplotlib jsmin jsonschema netCDF4 ================================================ FILE: requirements_dev.txt ================================================ # Base dependencies python>=3.10 numpy opencv-python pillow pyproj scipy matplotlib jsmin jsonschema netCDF4 # Optional dependencies dask pyfftw cartopy>=0.18 h5py scikit-image scikit-learn pandas rasterio # Testing pytest ================================================ FILE: setup.py ================================================ # -*- coding: utf-8 -*- import sys from setuptools import find_packages, setup from setuptools.extension import Extension try: from Cython.Build import cythonize except ImportError: raise RuntimeError( "Cython required for running the package installation\n" + "Try installing it with:\n" + "$> pip install cython" ) try: import numpy except ImportError: raise RuntimeError( "Numpy required for running the package installation\n" + "Try installing it with:\n" + "$> pip install numpy" ) # Define common arguments used to compile the extensions common_link_args = ["-fopenmp"] common_compile_args = ["-fopenmp", "-O3", "-ffast-math"] common_include = [numpy.get_include()] if sys.platform.startswith("darwin"): common_link_args.append("-Wl,-rpath,/usr/local/opt/gcc/lib/gcc/9/") extensions_data = { "pysteps.motion._proesmans": {"sources": ["pysteps/motion/_proesmans.pyx"]}, "pysteps.motion._vet": {"sources": ["pysteps/motion/_vet.pyx"]}, } extensions = [] for name, data in extensions_data.items(): include = data.get("include", common_include) extra_compile_args = data.get("extra_compile_args", common_compile_args) extra_link_args = data.get("extra_link_args", common_link_args) pysteps_extension = Extension( name, sources=data["sources"], depends=data.get("depends", []), include_dirs=include, language=data.get("language", "c"), define_macros=data.get("macros", []), extra_compile_args=extra_compile_args, extra_link_args=extra_link_args, ) extensions.append(pysteps_extension) external_modules = cythonize(extensions, force=True, language_level=3) requirements = [ "numpy", "jsmin", "scipy", "matplotlib", "jsonschema", ] setup( name="pysteps", version="1.20.0", author="PySteps developers", packages=find_packages(), license="LICENSE", include_package_data=True, description="Python framework for short-term ensemble prediction systems", long_description=open("README.rst").read(), long_description_content_type="text/x-rst", url="https://pysteps.github.io/", project_urls={ "Source": "https://github.com/pySTEPS/pysteps", "Issues": "https://github.com/pySTEPS/pysteps/issues", "CI": "https://github.com/pySTEPS/pysteps/actions", "Changelog": "https://github.com/pySTEPS/pysteps/releases", "Documentation": "https://pysteps.readthedocs.io", }, classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Atmospheric Science", "Topic :: Scientific/Engineering :: Hydrology", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ], ext_modules=external_modules, setup_requires=requirements, install_requires=requirements, ) ================================================ FILE: tox.ini ================================================ # Tox configuration file for pysteps projects # Need conda, tox and tox-conda installed to run # # In conda run: # > conda install -c conda-forge tox tox-conda # # Alternatively, you can install them using pip: # > pip install tox tox-conda # # Then, to run the tests, from the repo’s root run: # # > tox # Run pytests # > tox -e install # Test package installation # > tox -e black # Test for black formatting warnings [tox] envlist = py37, py38, py39 [testenv] description = Run the pysteps's test suite deps = -r{toxinidir}/requirements.txt cython dask toolz pillow pyfftw h5py PyWavelets scikit-learn gitpython pytest pytest-cov codecov conda_deps = netCDF4 pyproj cartopy pygrib rasterio conda_channels = conda-forge setenv = PYSTEPS_DATA_PATH = {toxworkdir}/pysteps-data PYSTEPSRC = {toxworkdir}/pysteps-data/pystepsrc PACKAGE_ROOT = {toxinidir} PROJ_LIB={envdir}/share/proj commands = python {toxinidir}/ci/fetch_pysteps_data.py pytest --pyargs pysteps --cov=pysteps -ra --disable-warnings [test_no_cov] commands = python {toxinidir}/ci/fetch_pysteps_data.py pytest --pyargs pysteps --disable-warnings [testenv:install] description = Test the installation of the package in a clean environment and run minimal tests deps = pytest conda_deps = changedir = {homedir} commands = pip install -U {toxinidir}/ python -c "import pysteps" # Test the pysteps plugin support pip install cookiecutter cookiecutter -f --no-input https://github.com/pySTEPS/cookiecutter-pysteps-plugin -o {temp_dir}/ # NB: this should match the default name for a cookiecutter-generated plugin! pip install {temp_dir}/pysteps-importer-institution-name python {toxinidir}/ci/test_plugin_support.py # Check the compiled modules python -c "from pysteps import motion" python -c "from pysteps.motion import vet" python -c "from pysteps.motion import proesmans" [testenv:install_full] description = Test the installation of the package in an environment with all the dependencies changedir = {homedir} commands = {[testenv:install]commands} {[test_no_cov]commands} [testenv:pypi] description = Test the installation of the package from the PyPI in a clean environment deps = pytest conda_deps = changedir = {homedir} commands = pip install --no-cache-dir pysteps python -c "import pysteps" {[test_no_cov]commands} [testenv:pypi_test] description = Test the installation of the package from the test-PyPI in a clean environment deps = pytest conda_deps = changedir = {homedir} commands = pip install --no-cache-dir --index-url https://test.pypi.org/simple/ --extra-index-url=https://pypi.org/simple/ pysteps python -c "import pysteps" {[test_no_cov]commands} [testenv:pypi_test_full] description = Test the installation of the package from the test-PyPI in an environment with all the dependencies changedir = {homedir} commands = {[testenv:pypi_test]commands} [testenv:docs] description = Build the html documentation using sphinx usedevelop = True deps = -r{toxinidir}/requirements.txt -r{toxinidir}/doc/requirements.txt cython conda_channels = conda-forge default changedir = doc setenv = PYSTEPS_DATA_PATH = {toxworkdir}/pysteps-data PYSTEPSRC = {toxworkdir}/pysteps-data/pystepsrc commands = python {toxinidir}/ci/fetch_pysteps_data.py sphinx-build -b html source _build [testenv:black] deps = black commands = black --check pysteps