Full Code of google/weather-tools for AI

main cc9d5b2f2bae cached
238 files
183.3 MB
247.3k tokens
1312 symbols
1 requests
Download .txt
Showing preview only (1,089K chars total). Download the full file or copy to clipboard to get everything.
Repository: google/weather-tools
Branch: main
Commit: cc9d5b2f2bae
Files: 238
Total size: 183.3 MB

Directory structure:
gitextract_haxxml5e/

├── .github/
│   └── workflows/
│       ├── ci.yml
│       └── publish.yml
├── .gitignore
├── .read-the-docs.yaml
├── CONTRIBUTING.md
├── Configuration.md
├── Dockerfile
├── Efficient-Requests.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── Runners.md
├── Runtime-Container.md
├── bin/
│   ├── install-branch
│   └── post-push
├── ci3.8.yml
├── ci3.9.yml
├── configs/
│   ├── era5_example_config.cfg
│   ├── era5_example_config_local_run.cfg
│   ├── era5_example_config_local_run.json
│   ├── era5_example_config_preproc.cfg
│   ├── era5_example_config_using_date.cfg
│   ├── era5_example_monthly_soil.cfg
│   ├── mars_example_config.cfg
│   ├── mars_example_config.json
│   ├── multiple_multiple_licenses/
│   │   ├── era5_pressure500.cfg
│   │   ├── era5_pressure600.cfg
│   │   └── era5_pressure700.cfg
│   ├── multiple_single_license/
│   │   ├── era5_pressure500.cfg
│   │   ├── era5_pressure600.cfg
│   │   └── era5_pressure700.cfg
│   ├── s2s_operational_forecast_example.cfg
│   ├── seasonal_forecast_example_config.cfg
│   ├── tigge_example_config.cfg
│   └── yesterdays_surface_example.cfg
├── docs/
│   ├── Makefile
│   ├── Private-IP-Configuration.md
│   ├── _static/
│   │   └── custom.css
│   ├── conf.py
│   ├── download_pipeline.md
│   ├── index.md
│   ├── loader_pipeline.md
│   ├── make.bat
│   ├── modules.md
│   ├── requirements.txt
│   └── splitter_pipeline.md
├── environment.yml
├── pyproject.toml
├── setup.cfg
├── setup.py
├── tox.ini
├── weather_dl/
│   ├── MANIFEST.in
│   ├── README.md
│   ├── __init__.py
│   ├── download_pipeline/
│   │   ├── __init__.py
│   │   ├── clients.py
│   │   ├── clients_test.py
│   │   ├── config.py
│   │   ├── config_test.py
│   │   ├── fetcher.py
│   │   ├── fetcher_test.py
│   │   ├── manifest.py
│   │   ├── manifest_test.py
│   │   ├── parsers.py
│   │   ├── parsers_test.py
│   │   ├── partition.py
│   │   ├── partition_test.py
│   │   ├── pipeline.py
│   │   ├── pipeline_test.py
│   │   ├── stores.py
│   │   ├── stores_test.py
│   │   ├── util.py
│   │   └── util_test.py
│   ├── setup.py
│   └── weather-dl
├── weather_dl_v2/
│   ├── README.md
│   ├── __init__.py
│   ├── cli/
│   │   ├── CLI-Documentation.md
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── VERSION.txt
│   │   ├── app/
│   │   │   ├── __init__.py
│   │   │   ├── cli_config.py
│   │   │   ├── data/
│   │   │   │   └── cli_config.json
│   │   │   ├── main.py
│   │   │   ├── services/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── download_service.py
│   │   │   │   ├── license_service.py
│   │   │   │   ├── network_service.py
│   │   │   │   └── queue_service.py
│   │   │   ├── subcommands/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.py
│   │   │   │   ├── download.py
│   │   │   │   ├── license.py
│   │   │   │   └── queue.py
│   │   │   └── utils.py
│   │   ├── environment.yml
│   │   ├── setup.py
│   │   └── vm-startup.sh
│   ├── cloudbuild.yml
│   ├── config.json
│   ├── downloader_kubernetes/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── VERSION.txt
│   │   ├── downloader.py
│   │   ├── downloader_config.py
│   │   ├── environment.yml
│   │   ├── manifest.py
│   │   └── util.py
│   ├── fastapi-server/
│   │   ├── API-Interactions.md
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── VERSION.txt
│   │   ├── __init__.py
│   │   ├── config_processing/
│   │   │   ├── config.py
│   │   │   ├── manifest.py
│   │   │   ├── parsers.py
│   │   │   ├── partition.py
│   │   │   ├── pipeline.py
│   │   │   ├── stores.py
│   │   │   └── util.py
│   │   ├── database/
│   │   │   ├── __init__.py
│   │   │   ├── download_handler.py
│   │   │   ├── license_handler.py
│   │   │   ├── manifest_handler.py
│   │   │   ├── queue_handler.py
│   │   │   ├── session.py
│   │   │   └── storage_handler.py
│   │   ├── environment.yml
│   │   ├── example.cfg
│   │   ├── license_dep/
│   │   │   ├── deployment_creator.py
│   │   │   └── license_deployment.yaml
│   │   ├── logging.conf
│   │   ├── main.py
│   │   ├── routers/
│   │   │   ├── download.py
│   │   │   ├── license.py
│   │   │   └── queues.py
│   │   ├── server.yaml
│   │   ├── server_config.py
│   │   └── tests/
│   │       ├── __init__.py
│   │       ├── integration/
│   │       │   ├── __init__.py
│   │       │   ├── test_download.py
│   │       │   ├── test_license.py
│   │       │   └── test_queues.py
│   │       └── test_data/
│   │           ├── example.cfg
│   │           └── not_exist.cfg
│   └── license_deployment/
│       ├── Dockerfile
│       ├── README.md
│       ├── VERSION.txt
│       ├── __init__.py
│       ├── clients.py
│       ├── config.py
│       ├── database.py
│       ├── deployment_config.py
│       ├── downloader.yaml
│       ├── environment.yml
│       ├── fetch.py
│       ├── job_creator.py
│       ├── manifest.py
│       └── util.py
├── weather_mv/
│   ├── MANIFEST.in
│   ├── README.md
│   ├── __init__.py
│   ├── loader_pipeline/
│   │   ├── __init__.py
│   │   ├── bq.py
│   │   ├── bq_test.py
│   │   ├── ee.py
│   │   ├── ee_test.py
│   │   ├── execution_test.py
│   │   ├── metrics.py
│   │   ├── pipeline.py
│   │   ├── pipeline_test.py
│   │   ├── regrid.py
│   │   ├── regrid_test.py
│   │   ├── sinks.py
│   │   ├── sinks_test.py
│   │   ├── streaming.py
│   │   ├── streaming_test.py
│   │   ├── util.py
│   │   └── util_test.py
│   ├── setup.py
│   ├── test_data/
│   │   ├── test_data.zarr/
│   │   │   ├── .zattrs
│   │   │   ├── .zgroup
│   │   │   ├── .zmetadata
│   │   │   ├── cape/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0.0.0
│   │   │   ├── d2m/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0.0.0
│   │   │   ├── latitude/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0
│   │   │   ├── longitude/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0
│   │   │   └── time/
│   │   │       ├── .zarray
│   │   │       ├── .zattrs
│   │   │       └── 0
│   │   ├── test_data_20180101.nc
│   │   ├── test_data_corrupt_grib
│   │   ├── test_data_grib_multiple_edition_single_timestep.bz2
│   │   ├── test_data_grib_single_timestep
│   │   ├── test_data_has_nan.nc
│   │   ├── test_data_single_point.nc
│   │   └── test_data_tif_time.tif
│   └── weather-mv
├── weather_sp/
│   ├── MANIFEST.in
│   ├── README.md
│   ├── __init__.py
│   ├── setup.py
│   ├── splitter_pipeline/
│   │   ├── __init__.py
│   │   ├── file_name_utils.py
│   │   ├── file_name_utils_test.py
│   │   ├── file_splitters.py
│   │   ├── file_splitters_test.py
│   │   ├── pipeline.py
│   │   ├── pipeline_test.py
│   │   └── streaming.py
│   ├── test_data/
│   │   ├── era5_sample.grib
│   │   ├── era5_sample.nc
│   │   └── era5_sample_grib
│   └── weather-sp
└── xql/
    ├── README.md
    ├── main.py
    ├── setup.py
    └── src/
        ├── __init__.py
        ├── weather_lm/
        │   ├── __init__.py
        │   ├── constant.py
        │   ├── gemini.py
        │   ├── template.py
        │   └── utils.py
        └── xql/
            ├── __init__.py
            ├── apply.py
            ├── constant.py
            ├── open.py
            ├── utils.py
            └── where.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/ci.yml
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CI

on:
  # Triggers the workflow on push or pull request events but only for the main branch
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  # Allows you to run this workflow manually from the Actions tab
  workflow_dispatch:

env:
  CDSAPI_URL: https://cds.climate.copernicus.eu/api
  CDSAPI_KEY: 1234567-ab12-34cd-9876-4o4fake90909  # A fake key for testing
jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.8", "3.9"]
    steps:
    - name: Cancel previous
      uses: styfle/cancel-workflow-action@0.7.0
      with:
        access_token: ${{ github.token }}
      if: ${{github.ref != 'refs/head/main'}}
    - uses: actions/checkout@v2
    - name: conda cache
      uses: actions/cache@v3
      env:
        # Increase this value to reset cache if etc/example-environment.yml has not changed
        CACHE_NUMBER: 0
      with:
        path: ~/conda_pkgs_dir
        key:
          ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ matrix.python-version }}-${{ hashFiles('ci3.8.yml') }}
    - name: Setup conda environment
      uses: conda-incubator/setup-miniconda@v3
      with:
        python-version: ${{ matrix.python-version }}
        channels: conda-forge
        environment-file: ci${{ matrix.python-version}}.yml
        activate-environment: weather-tools
        miniforge-variant: Miniforge3
        miniforge-version: latest
        use-mamba: true
    - name: Check MetView's installation
      shell: bash -l {0}
      run: python -m metview selfcheck
    - name: Run unit tests
      shell: bash -l {0}
      run: pytest --memray --ignore=weather_dl_v2 # Ignoring dl-v2 as it only supports py3.10
  lint:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
    steps:
      - name: Cancel previous
        uses: styfle/cancel-workflow-action@0.7.0
        with:
          access_token: ${{ github.token }}
        if: ${{github.ref != 'refs/head/main'}}
      - uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Get pip cache dir
        id: pip-cache
        run: |
          python -m pip install --upgrade pip wheel
          echo "dir=$(pip cache dir)" >> "$GITHUB_OUTPUT"
      - name: Install linter
        run: |
          pip install ruff==0.1.2
      - name: Lint project
        run: ruff check .
  type-check:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.8"]
    steps:
      - name: Cancel previous
        uses: styfle/cancel-workflow-action@0.7.0
        with:
          access_token: ${{ github.token }}
        if: ${{github.ref != 'refs/head/main'}}
      - uses: actions/checkout@v2
      - name: conda cache
        uses: actions/cache@v3
        env:
          # Increase this value to reset cache if etc/example-environment.yml has not changed
          CACHE_NUMBER: 0
        with:
          path: ~/conda_pkgs_dir
          key:
            ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ matrix.python-version }}-${{ hashFiles('ci3.8.yml') }}
      - name: Setup conda environment
        uses: conda-incubator/setup-miniconda@v3
        with:
          python-version: ${{ matrix.python-version }}
          channels: conda-forge
          environment-file: ci${{ matrix.python-version}}.yml
          activate-environment: weather-tools
          miniforge-variant: Miniforge3
          miniforge-version: latest
          use-mamba: true
      - name: Install weather-tools[test]
        run: |
          conda run -n weather-tools pip install -e .[test] --use-deprecated=legacy-resolver
      - name: Run type checker
        run: conda run -n weather-tools pytype

================================================
FILE: .github/workflows/publish.yml
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Build and Upload weather-tools to PyPI

on:
  release:
    types: [published]

jobs:
  build-artifacts:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python
        uses: actions/setup-python@v2.3.1
        with:
          python-version: 3.8

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install setuptools setuptools-scm wheel twine check-manifest

      - name: Build tarball and wheels
        run: |
          git clean -xdf
          git restore -SW .
          python -m build --sdist --wheel .
      - name: Check built artifacts
        run: |
          python -m twine check dist/*
          pwd
          if [ -f dist/weather_tools-0.0.0.tar.gz ]; then
            echo "❌ INVALID VERSION NUMBER"
            exit 1
          else
            echo "✅ Looks good"
          fi
      - uses: actions/upload-artifact@v2
        with:
          name: releases
          path: dist

  test-built-dist:
    needs: build-artifacts
    runs-on: ubuntu-latest
    steps:
      - uses: actions/setup-python@v2.3.1
        name: Install Python
        with:
          python-version: 3.8
      - uses: actions/download-artifact@v4.1.7
        with:
          name: releases
          path: dist
      - name: List contents of built dist
        run: |
          ls -ltrh
          ls -ltrh dist
      - name: Publish package to TestPyPI
        if: github.event_name == 'push'
        uses: pypa/gh-action-pypi-publish@v1.4.2
        with:
          user: __token__
          password: ${{ secrets.TESTPYPI_TOKEN }}
          repository_url: https://test.pypi.org/legacy/
          verbose: true

      - name: Check uploaded package
        if: github.event_name == 'push'
        run: |
          sleep 3
          python -m pip install --upgrade pip
          python -m pip install --extra-index-url https://test.pypi.org/simple --upgrade weather-tools
          weather-dl --help && weather-mv --help && weather-sp --help
  
  upload-to-pypi:
    needs: test-built-dist
    if: github.event_name == 'release'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v4.1.7
        with:
          name: releases
          path: dist
      - name: Publish package to PyPI
        uses: pypa/gh-action-pypi-publish@v1.4.2
        with:
          user: __token__
          password: ${{ secrets.PYPI_TOKEN }}
          verbose: true

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# JetBrains / PyCharm
.idea/

# VSCode
.vscode/
launch.json


================================================
FILE: .read-the-docs.yaml
================================================
# .readthedocs.yaml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

build:
  image: latest

# Build documentation in the docs/ directory with Sphinx
sphinx:
  configuration: docs/conf.py

# Optionally set the version of Python and requirements required to build your docs
python:
  version: "3.8"
  install:
    - requirements: docs/requirements.txt
    - method: pip
      path: .
  system_packages: false


================================================
FILE: CONTRIBUTING.md
================================================
# How to Contribute

We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to
follow.

## Contributor License Agreement

Contributions to this project must be accompanied by a Contributor License Agreement (CLA). You (or your employer)
retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to
<https://cla.developers.google.com/> to see your current agreements on file or to sign a new one.

You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it again.

## Code Reviews

All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose.
Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests.

## Community Guidelines

This project follows
[Google's Open Source Community Guidelines](https://opensource.google/conduct/).

## Pull Request Etiquette

We're thrilled to have your pull request! However, we ask that you file an issue (or check if one exist) before
submitting a change-list (CL).

In general, we're happier reviewing many small PRs over fewer, larger changes. Please try to
submit [small CLs](https://google.github.io/eng-practices/review/developer/small-cls.html).

## Project Structure

```
bin/  # Tools for development

configs/  # Save example and general purpose configs.

docs/  # Sphinx documentation, deployed to Github pages (or readthedocs).

weather_dl/  # Main python package, other pipelines can come later.
    __init__.py  
    download_pipeline/  # sources for pipeline
        __init__.py 
        ...  
        <module>.py
        <module>_test.py  # We'll follow this naming pattern for tests.
    weather-dl  # script to run pipeline
    setup.py  # packages sources for execution in beam worker
              # pipeline-specific requirements managed here
    
setup.py  # Project is pip-installable, project requirements managed here.
```

## Developer Installation

1. Set up a Python development environment with *Anaconda* or
   [*Miniconda*](https://docs.conda.io/en/latest/miniconda.html).

    * On a Mac, you can run `brew install miniconda`.

    * Use Python version 3.8.5+ for development. We have not yet upgraded the project to use Python 3.9
      (see [#166](https://github.com/google/weather-tools/issues/166)).

3. Clone the repo and install dependencies with anaconda.

   ```shell
   # clone with HTTPS
   git clone http://github.com/google/weather-tools.git
   # clone with SSH
   git clone git@github.com:google/weather-tools.git
   cd weather-tools
   conda env create -f environment.yml
   conda activate weather-tools
   pip install -e ".[dev]"
   ```

4. Install `gcloud`, the Google Cloud CLI, following these [instructions](https://cloud.google.com/sdk/docs/install).

5. Acquire adequate permissions for the project.
    * Run `gcloud auth application-default login`.

    * Make sure your account has *write* permissions to the storage bucket as well as the permissions to create a
      Dataflow job.

    * Make sure that both Dataflow and Cloud Storage are enabled on your Google Cloud Platform project.

### Windows Developer Instructions

Windows support for each CLI is currently under development (
See [#64](https://github.com/google/weather-tools/issues/64)). However, there are workarounds available for running the
nweather tools outside of installation with `pip`.

First, the would-be pip-installed script can be run directly with python like so:

```shell
python weather_dl/weather-dl --help
```

## Testing

For testing at development time, we make use of three tools:

* `pytype` for checking types:
   ```shell
   # check everything
   pytype
   # check a specific tool
   pytype weather_dl
   ```

* `ruff` for linting:
   ```shell 
   # lint everything
   ruff check .
   # lint a specific tool
   ruff check weather_mv
   ```

* `pytest` for running tests:
   ```shell
   # test everything 
   pytest
   # test a specific tool
   pytest weather_sp
   ```

If you'd like to automate running these checks, we provide a post-push git hook:

```shell
cp bin/post-push .git/hooks/
```

This script can be run manually, too.

If you'd like to locally run these checks for all versions of python this project supports, you can use
`tox`. Tox will make use of the python versions installed on your machine and create virtual test environments on your
behalf.

```shell
tox
```

In addition, we provide a simple script to install _other_ branches locally. Run `bin/install-branch <branch-name>` to
pip install that branches working copy of weather-tools. Hopefully, this script facilitates testing of work-in-progress
contributions.

Please review the [Beam testing docs](https://beam.apache.org/documentation/pipelines/test-your-pipeline/) for guidance
in how to write tests for the pipeline.

## Documentation

Documents are generated with Sphinx and the myst-parser. To generate the documents locally, simply invoke `make`:

```shell
cd docs
rm -r _build
make html
```

> Note: Due to the idiosyncrasies of how Sphinx detects updates and our use of symbolic links, we recommend deleting the
> `_build` folder.

Or, you can run the following subshell command to re-generate everything without having to leave the project root:

```shell
(cd docs && rm -r _build && make html)
```

After the docs are re-generated, you can view them by starting a local file server, for example:

```shell
python -m http.server -d docs/_build/html
```

## Versions & Releasing

We aim to represent the version of each tool using [semver.org](https://semver.org/) semantic versions. To that end, we
will abide by the following pattern:

- When making a change to a particular tool, please remember to update its semantic version in the `setup.py` file.
- The version for _all the tools_ should be incremented on update to _any_ tool. If one tool changes, the
  whole `google-weather-tools` package should have its version incremented.
- The representation for the version of all of `google-weather-tools` is
  via [git tags](https://git-scm.com/book/en/v2/Git-Basics-Tagging). To update the version of the package, please make
  an annotated tag with a short description of the change.
- When it's time for release, choose the latest tag and fill out the release description. Check out previous release
  notes to get an idea of how to structure the next one. These release notes are the primary changelog for the project. 


================================================
FILE: Configuration.md
================================================
# Configuration Files

Config files describe both _what_ to download and _how_ it should be downloaded. To this end, configs have two sections:
`selection` that describes the data desired from data-sources and `parameters` that define the details of the download.
By convention, the `parameters` section comes first.

Configuration files can be written in `*.cfg` or `*.json`, but typically, they're written in the former format (i.e.
Python's native config language, which is similar to INI format).

Before jumping into the details of each section, let's look at a few example configs.

## Examples

The following demonstrate how to download weather data from ECMWF's Copernicus (CDS) and Meteorological Archival and
Retrieval System (MARS) catalogues.

### Download Era5 Pressure Level Reanalysis from Copernicus

```
[parameters]
client=cds  ; choose a data source client
dataset=reanalysis-era5-pressure-levels  ; specify a dataset to download from the data source (CDS-specific)
target_path=gs://ecmwf-output-test/era5/{}/{}/{}-pressure-{}.nc  ; create a template for the output file path
partition_keys=  ; define how we should partition the download by the "keys" in the `selection` section.
    year         ; See docs below for more explanation
    month
    day
    pressure_level
[selection]
product_type=ensemble_mean
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    500
year=    ; we can specify a list of values using multiple lines
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00
```

### Download Yesterday's Surface Temperatures from MARS.

```
[parameters]
client=mars  ; download from MARS (this is the default data source).
target_path=all.an  ; Download all the data from the selection section into one file.
[selection]
class   = od
type    = analysis
levtype = surface
date    = -1  ; Yesterday -- see link to MARS request syntax in docs linked below.
time    = 00/06/12/18  ; We can specify multiple values using `/` delimiters -- see MARS request syntax docs
param   = z/sp
```

## `parameters` Section

_Parameters for the pipeline_

These describe which data source to download, where the data should live, and how the download should be partitioned.

* `client`: (required) Select the weather API client. Supported values are `cds` for Copernicus, and `mars` for MARS.
* `dataset`: (optional) Name of the target dataset. Allowed options are dictated by the client.
* `target_path`: (required) Download artifact filename template. Can use Python string format symbols. Must have the
  same number of format symbols as the number of partition keys.
* `partition_keys`: (optional) This determines how download jobs will be divided.
    * Value can be a single item or a list.
    * Each value must appear as a key in the `selection` section.
    * Each downloader will receive a config file with every parameter listed in the `selection`, _except_ for the fields
      specified by the `partition_keys`.
    * The downloader config will contain one instance of the cross-product of every key in `partition_keys`.
        * E.g. `['year', 'month']` will lead to a config set like `[(2015, 01), (2015, 02), (2015, 03), ...]`.
    * The list of keys will be used to format the `target_path`.

> **NOTE**: `target_path` template is totally compatible with Python's standard string formatting.
> This includes being able to use named arguments (e.g. 'gs://bucket/{year}/{month}/{day}.nc') as well as specifying formats for strings 
> (e.g. 'gs://bucket/{year:04d}/{month:02d}/{day:02d}.nc').

### Creating a date-based directory hierarchy

The date-based directory hierarchy can be created using Python's standard string formatting.
Below are some examples of how to use `target_path` with Python's standard string formatting.

<details>
<summary><strong>Examples</strong></summary>

Note that any parameters that are not relevant to the target path have been omitted.

```
[parameters]
target_path=gs://ecmwf-output-test/era5/{date:%%Y/%%m/%%d}.nc
partition_keys=
     date
[selection]
date=2017-01-01/to/2017-01-02
```

will create  
`gs://ecmwf-output-test/era5/2017/01/01.nc` and  
`gs://ecmwf-output-test/era5/2017/01/02.nc`.

```
[parameters]
target_path=gs://ecmwf-output-test/era5/{date:%%Y/%%m/%%d}-pressure-{pressure_level}.nc
partition_keys=
     date
     pressure_level
[selection]
pressure_level=
    500
date=2017-01-01/to/2017-01-02
```

will create  
`gs://ecmwf-output-test/era5/2017/01/01-pressure-500.nc` and   
`gs://ecmwf-output-test/era5/2017/01/02-pressure-500.nc`.

```
[parameters]
target_path=gs://ecmwf-output-test/pressure-{pressure_level}/era5/{date:%%Y/%%m/%%d}.nc
partition_keys=
     date
     pressure_level
[selection]
pressure_level=
    500
date=2017-01-01/to/2017-01-02
```

will create  
`gs://ecmwf-output-test/pressure-500/era5/2017/01/01.nc` and  
`gs://ecmwf-output-test/pressure-500/era5/2017/01/02.nc`.

```
[parameters]
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
[selection]
pressure_level=
    500
year=
    2017
month=
    01
day=
    01
    02
```

will create  
`gs://ecmwf-output-test/era5/2017/01/01-pressure-500.nc` and  
`gs://ecmwf-output-test/era5/2017/01/02-pressure-500.nc`.

> **Note**: Replacing the `target_path` of the above example with this `target_path=gs://ecmwf-output-test/era5/{year}/{month}/{day}-pressure-
>{pressure_level}.nc`
>
> will create
>
> `gs://ecmwf-output-test/era5/2017/1/1-pressure-500.nc` and  
> `gs://ecmwf-output-test/era5/2017/1/2-pressure-500.nc`.

In addition to the above, the table below presents further partitioning examples based on recent enhancements to the weather-dl tool, particularly around date and related keywords.

<table>
  <colgroup>
    <col style="width:15%">
    <col style="width:35%">
    <col style="width:25%">
    <col style="width:25%">
  </colgroup>
  <thead>
    <tr>
      <td><h5>Keyword</h5></td>
      <td><h5>Description</h5></td>
      <td><h5>Sample Config</h5></td>
      <td><h5>Output Partitions</h5></td>
    </tr>
    <tr>
      <td>date</td>
      <td>Specifies the calendar date(s) for which data is retrieved in an ECMWF MARS request.</td>
      <td>[parameters]<br/>target_path={date}.nc<br/>partition_keys=date<br/>[selection]<br/>date=2017-01-01/to/2017-01-02</td>
      <td>2017-01-01.nc<br/>2017-01-02.nc</td>
    </tr>
    <tr>
      <td>date [step] [time] [var]</td>
      <td>Along with specifying date in the ECMWF MARS request, users can also partition data using one or more of [forecast-step, initialization-time, or varirable].</td>
      <td>
        <h5>Case-1</h5>[parameters]<br/>target_path={date}_{time}.nc<br/>partition_keys=<br/>date<br/>time<br/>[selection]<br/>date=2025-01-01/to/2025-01-08/by/5<br/>time=00/12<br/>
        <h5>Case-2</h5>[parameters]<br/>target_path={date}_{step}.nc<br/>partition_keys=<br/>date<br/>step<br/>[selection]<br/>date=2024-12-31/to/2024-01-26/by/-3<br/>step=0<br/>
      </td>
      <td>
        <h5>Case-1</h5>2025-01-01_00:00:00.nc<br/>2025-01-01_12:00:00.nc<br/>2025-01-06_00:00:00.nc<br/>2025-01-06_12:00:00.nc<br/>
        <h5>Case-2</h5>2024-12-31_0.nc<br/>2024-12-28_0.nc<br/>
      </td>
    </tr>
    <tr>
      <td>year, month, day</td>
      <td>Specifies dates in a decomposed form (separate fields) for ECMWF MARS requests, enabling flexible selection across ranges and combinations. Supports multiple <b>year</b> and <b>month</b> inputs, allowing users to define broad time spans without enumerating each period.</td>
      <td>
        <h5>Case-1</h5>[parameters]<br/>target_path={year}/{year}-{month:02d}.grb2<br/>partition_keys=<br/>year<br/>month<br/>[selection]<br/>year=2021<br/>month=1/to/2<br/>day=all<br/>
        <h5>Case-2: Full year</h5>[parameters]<br/>target_path=full_{year}.nc<br/>partition_keys=year<br/>[selection]<br/>year=2001/to/2002<br/>month=1/to/12<br/>day=all<br/>
        <h5>Case-3: Odd months</h5>[parameters]<br/>target_path=odd_{year}.nc<br/>partition_keys=year<br/>[selection]<br/>year=2001/to/2002<br/>month=1/to/12/by/2<br/>day=all<br/>
        <h5>Case-4: Specific days</h5>[parameters]<br/>target_path=misc_{year}.nc<br/>partition_keys=year<br/>[selection]<br/>year=2001/to/2002<br/>month=1/to/12<br/>day=1/5/10/15<br/>
      </td>
      <td>
        <h5>Case-1</h5>2021/2021-01.grb2<br/>2021/2021-02.grb2<br/>
        <h5>Case-2</h5>full_2001.nc<br/>full_2002.nc<br/>
        <h5>Case-3</h5>odd_2001.nc<br/>odd_2002.nc<br/>
        <h5>Case-4</h5>misc_2001.nc<br/>misc_2002.nc<br/>
      </td>
    </tr>
    <tr>
      <!-- https://github.com/google/weather-tools/pull/503 -->
      <td>year-month</td>
      <td>Added support for a year‑month key, allowing users to specify downloads using month granularity instead of ranged formats.</td>
      <td>[parameters]<br/>target_path={year-month}.gb<br/>partition_keys=year-month<br/>[selection]<br/>year-month=2024-11/to/2025-02</td>
      <td>2024-11.gb<br/>2024-12.gb<br/>2025-01.gb<br/>2025-02.gb</td>
    </tr>
    <tr>
      <!-- https://github.com/google/weather-tools/pull/525 -->
      <td>date_range</td>
      <td>Added support for specifying one or more date-range values, enabling users to download data across multiple date intervals in a single run.<br/><br/>Note: date_range must be specified in partition_keys.</td>
      <td>[parameters]<br/>target_path={date_range}.nc<br/>partition_keys=date_range<br/>[selection]<br/>date_range=<br/>2017-01-01/to/2017-01-10<br/>2017-01-21/to/2017-01-31</td>
      <td>2017-01-01_to_2017-01-10.nc<br/>2017-01-21_to_2017-01-31.nc</td>
    </tr>
    <tr>
      <!-- https://github.com/google/weather-tools/issues/262 -->
      <!-- https://github.com/google/weather-tools/issues/334 -->
      <!-- https://github.com/google/weather-tools/blob/main/configs/s2s_operational_forecast_example.cfg -->
      <!-- https://critique.corp.google.com/cl/748174885/depot/google3/research/weather/anthromet/download_configs/enstrophy/ifs-ext-reforecast-pressure-levels-inst-all-levs.cfg -->
      <td>hdate</td>
      <td>This parameter allows weather‑dl to explicitly specify historical target dates for downloads, giving users precise control over which past dates are retrieved.</td>
      <td>[parameters]<br/>target_path={date}.gb<br/>[selection]<br/>date=2020-01-02<br/>hdate=1/to/3</td>
      <td>2019-01-02<br/>2018-01-02<br/>2017-01-02</td>
    </tr>
    <tr>
      <!-- https://github.com/google/weather-tools/pull/528 -->
      <td>(no partition keys specified)</td>
      <td>Introduced support for creating a single output-file when no partition_keys are specified, ensuring that all data is written into one consolidated file.</td>
      <td>[parameters]<br/>target_path=data.nc<br/>[selection]<br/>date=2017-01-01/to/2017-01-05<br/>time=00/06/12/18</td>
      <td>data.nc</td>
    </tr>
  </thead>
</table>

</details>

### Subsections

Sometimes, we'd like to alternate passing certain parameters to each client. For example, certain data sources have
limits on the number of API requests that can be made, enforcing a maximum per license. In these cases, the user can
specify a parameters subsection. The downloader will overwrite the base parameters with the key-value pairs in each
subsection, evenly alternating between each parameter set across the partitions.

To specify a subsection, create a new section with the following naming pattern: `[parameters.<subsection-name>]`.
The `<subsection-name>` can be any string, but it's recommended to chose a name that describes the grouping of values in
the section.

Here's an example of this type of configuration:

```
[parameters]
dataset=ecmwf-mars-output
target_template=gs://ecmwf-downloads/hres-single-level/{}.nc
partition_keys=
    date
[parameters.deepmind]
api_key=KKKKK1
api_url=UUUUU1
[parameters.research]
api_key=KKKKK2
api_url=UUUUU2
[parameters.cloud]
api_key=KKKKK3
api_url=UUUUU3
```

## `selection` Section

_Parameters used to select desired data_

These will be passed as request parameters to the specified API client. Selections are dependent on how each data
source's catalog is structured.

### Copernicus / CDS

**License**: By using Copernicus / CDS Dataset, users agree to the terms and conditions specified in dataset. i.e. [License](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels?tab=download#manage-licences).

**Catalog**: [https://cds.climate.copernicus.eu/datasets](https://cds.climate.copernicus.eu/datasets)

Visit the follow to register / acquire API credentials:
_[Install the CDS API key](https://cds.climate.copernicus.eu/how-to-api)_. After, please set
the `api_url` and `api_key` arguments in the `parameters` section of your configuration. Alternatively, one can set
these values as environment variables:

```shell
export CDSAPI_URL=$api_url
export CDSAPI_KEY=$api_key
```

For CDS parameter options, check out
the [Copernicus documentation](https://cds.climate.copernicus.eu/datasets).
See [this example](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-pressure-levels?tab=overview)
for what kind of requests one can make.

### MARS

**License**: By using MARS Dataset, users agree to the terms and conditions specified in [License](https://www.ecmwf.int/en/forecasts/accessing-forecasts/licences-available) document.

**Catalog**: [https://apps.ecmwf.int/archive-catalogue/](https://apps.ecmwf.int/archive-catalogue/)

Visit the following to register / acquire API credentials:
_[Install ECWMF Key](https://confluence.ecmwf.int/display/WEBAPI/Access+MARS#AccessMARS-key)_. After, please set
the `api_url`, `api_key`, and `api_email` arguments in the `parameters` section of your configuration. Alternatively,
one can set these values as environment variables:

```shell
export MARSAPI_URL=$api_url
export MARSAPI_EMAIL=$api_email
export MARSAPI_KEY=$api_key
```

For MARS parameter options, first read up on
[MARS request syntax](https://confluence.ecmwf.int/display/WEBAPI/Brief+MARS+request+syntax). For a full range of what
data can be requested, please consult the [MARS catalog](https://apps.ecmwf.int/archive-catalogue/).
See [these examples](https://confluence.ecmwf.int/display/UDOC/MARS+example+requests)
to discover the kinds of requests that can be made.

> **NOTE**: MARS data is stored on tape drives. It takes longer for multiple workers to request data than a single
> worker. Thus, it's recommended _not_ to set a partition key when writing MARS data configurations.

================================================
FILE: Dockerfile
================================================

# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
ARG py_version=3.8
FROM apache/beam_python${py_version}_sdk:2.40.0 as beam_sdk
FROM continuumio/miniconda3:v25.11.1

# Add the mamba solver for faster builds
RUN conda install -n base conda-libmamba-solver
RUN conda config --set solver libmamba

# Create conda env using environment.yml
ARG weather_tools_git_rev=main
RUN git clone https://github.com/google/weather-tools.git /weather
WORKDIR /weather
RUN git checkout "${weather_tools_git_rev}"
RUN rm -r /weather/weather_*/test_data/
RUN conda env create -f environment.yml --debug && \
    conda clean --all -f --yes

# Activate the conda env and update the PATH
ARG CONDA_ENV_NAME=weather-tools
RUN echo "source activate ${CONDA_ENV_NAME}" >> ~/.bashrc
ENV PATH /opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH

# Install gcloud alpha
RUN apt-get update -y
RUN gcloud components install alpha --quiet

# Copy files from official SDK image, including script/dependencies.
COPY --from=beam_sdk /opt/apache/beam /opt/apache/beam

# Set the entrypoint to Apache Beam SDK launcher.
ENTRYPOINT ["/opt/apache/beam/boot"]


================================================
FILE: Efficient-Requests.md
================================================
# Writing Efficient Data Requests

TODO([#26](https://github.com/googlestaging/weather-tools/issues/26)). In the mean-time, please consult this ECMWF
documentation:
* [Web API Retrieval Efficiency](https://confluence.ecmwf.int/display/WEBAPI/Retrieval+efficiency)
* [Era 5 daily data retrieval efficiency](https://confluence.ecmwf.int/display/WEBAPI/ERA5+daily+retrieval+efficiency)


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
global-exclude *.nc
global-exclude *.gb *.grib
global-exclude *.bz2
global-exclude *.yaml *.yml
global-exclude *_test.py
prune .github
prune bin
prune docs
prune weather_*/test_data
exclude tox.ini


================================================
FILE: README.md
================================================
# weather-tools

Apache Beam pipelines to make weather data accessible and useful.

[![CI](https://github.com/googlestaging/weather-tools/actions/workflows/ci.yml/badge.svg)](https://github.com/googlestaging/weather-tools/actions/workflows/ci.yml)
[![Documentation Status](https://readthedocs.org/projects/weather-tools/badge/?version=latest)](https://weather-tools.readthedocs.io/en/latest/?badge=latest)

## Introduction

This project contributes a series of command-line tools to make common data engineering tasks easier for researchers in
climate and weather. These solutions were born out of the need to improve repeated work performed by research teams
across Alphabet.

The first tool created was the weather downloader (`weather-dl`). This makes it easier to ingest data from the European
Center for Medium Range Forecasts (ECMWF). `weather-dl` enables users to describe very specifically what data they'd
like to ingest from ECMWF's catalogs. It also offers them control over how to parallelize requests, empowering users to
[retrieve data efficiently](Efficient-Requests.md). Downloads are driven from a
[configuration file](Configuration.md), which can be reviewed (and version-controlled) independently of pipeline or
analysis code.

We also provide two additional tools to aid climate and weather researchers: the weather mover (`weather-mv`) and the
weather splitter (`weather-sp`). These CLIs are still in their alpha stages of development. Yet, they have been used for
production workflows for several partner teams.

We created the weather mover (`weather-mv`) to load geospatial data from cloud buckets
into [Google BigQuery](https://cloud.google.com/bigquery). This enables rapid exploratory analysis and visualization of
weather data: From BigQuery, scientists can load arbitrary climate data fields into a Pandas or XArray dataframe via a
simple SQL query.

The weather splitter (`weather-sp`) helps normalize how archival weather data is stored in cloud buckets:
Whether you're trying to merge two datasets with overlapping variables — or, you simply need
to [open Grib data from XArray](https://github.com/ecmwf/cfgrib/issues/2), it's really useful to split datasets into
their component variables.

## Installing

It is currently recommended that you create a local python environment (with
[Anaconda](https://www.anaconda.com/products/individual)) and install the
sources as follows:

  ```shell
conda env create --name weather-tools --file=environment.yml
conda activate weather-tools
  ```

> Note: Due to its use of 3rd-party binary dependencies such as GDAL and MetView, `weather-tools`
> is transitioning from PyPi to Conda for its main release channel. The instructions above
> are a temporary workaround before our Conda-forge release.

From here, you can use the `weather-*` tools from your python environment. Currently, the following tools are available:

- [⛈ `weather-dl`](weather_dl/README.md) (_beta_) – Download weather data (namely, from ECMWF's API).
- [⛅️ `weather-mv`](weather_mv/README.md) (_alpha_) – Load weather data into analytics engines, like BigQuery.
- [🌪 `weather-sp`](weather_sp/README.md) (_alpha_) – Split weather data by arbitrary dimensions.

## Quickstart

In this tutorial, we will
download the [Era 5 pressure level dataset](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-pressure-levels?tab=overview)
and ingest it into Google BigQuery using `weather-dl` and `weather-mv`, respectively.

### Prerequisites

1. [Register here](https://www.ecmwf.int/) and [here](https://cds.climate.copernicus.eu/) for a license from
   ECMWF's [Copernicus (CDS) API](https://cds.climate.copernicus.eu/api-how-to).
2. User must agree to the Terms of Use of a dataset before downloading any data out of dataset.(E.g.: accept [terms & condition](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels?tab=download#manage-licences) from here.)
3. Install your license by copying your API url & key from [this page](https://cds.climate.copernicus.eu/how-to-api) to a new file `$HOME/.cdsapirc`.[^1] The file should look like this:
   ```
   url: https://cds.climate.copernicus.eu/api
   key: <YOUR_USER_ID>:<YOUR_API_KEY>
   ```
4. If you do not already have a Google Cloud project, create one by following
   [these steps](https://cloud.google.com/docs/get-started). If you are working on
   an existing project, make sure your user has the [BigQuery Admin role](https://cloud.google.com/bigquery/docs/access-control#bigquery.admin).
   To learn more about granting IAM roles to users in Google Cloud, visit the
   [official docs](https://cloud.google.com/iam/docs/granting-changing-revoking-access#grant-single-role).
5. Create an empty BigQuery Dataset. This can be done using
   the [Google Cloud Console](https://cloud.google.com/bigquery/docs/quickstarts/quickstart-cloud-console#create_a_dataset)
   or via the [`bq` CLI tool](https://cloud.google.com/bigquery/docs/quickstarts/quickstart-command-line). 
   For example:
   ```shell
   bq mk --project_id=$PROJECT_ID $DATASET_ID
   ```
6. Follow [these steps](https://cloud.google.com/storage/docs/creating-buckets) 
   to create a bucket for staging temporary files in [Google Cloud Storage](https://cloud.google.com/storage).

### Steps

For the purpose of this tutorial, we will use your local machine to run the
data pipelines. Note that all `weather-tools` can also be run in [Cloud Dataflow](https://cloud.google.com/dataflow)
which is easier to scale and fully managed.

1. Use `weather-dl` to download the *Era 5 pressure level* dataset.
   ```bash
   weather-dl configs/era5_example_config_local_run.cfg \
      --local-run # Use the local machine
   ```

   > Recommendation: Pass the `-d, --dry-run` flag to any of these commands to preview the effects.

   **NOTE:** By default, local downloads are saved to the `./local_run` directory unless another file system is specified.
   The recommended output location for `weather-dl` is [Cloud Storage](https://cloud.google.com/storage).
   The source and destination of the download are configured using the `.cfg` configuration file which is passed to the command.
   To learn more about this configuration file's format and features,
   see [this reference](Configuration.md). To learn more about the `weather-dl` command, visit [here](weather_dl/README.md).

2. *(optional)* Split your downloaded dataset up with `weather-sp`:

   ```shell
    weather-sp --input-pattern "./local_run/era5-*.nc" \
       --output-dir "split_data" 
   ```

   Visit the `weather-sp` [docs](weather_sp/README.md) for more information.

3. Use `weather-mv` to ingest the downloaded data into BigQuery, in a structured format.

   ```bash
   weather-mv bigquery --uris "./local_run/**.nc" \ # or "./split_data/**.nc" if weather-sp is used
      --output_table "$PROJECT.$DATASET_ID.$TABLE_ID" \ # The path to the destination BigQuery table
      --temp_location "gs://$BUCKET/tmp" \  # Needed for stage temporary files before writing to BigQuery
      --direct_num_workers 2
   ```

   See [these docs](weather_mv/README.md) for more about the `weather-mv` command.

That's it! After the pipeline is completed, you should be able to query the ingested 
dataset in [BigQuery SQL workspace](https://cloud.google.com/bigquery/docs/bigquery-web-ui)
and analyze it using [BigQuery ML](https://cloud.google.com/bigquery-ml/docs/introduction).

## Contributing

The weather tools are under active development, and contributions are welcome! Please check out
our [guide](CONTRIBUTING.md) to get started.

## License

This is not an official Google product.

```
Copyright 2021 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
```

[^1]: Note that you need to be logged in for the [CDS API page](https://cds.climate.copernicus.eu/api-how-to#install-the-cds-api-key) to actually show your user ID and API key. Otherwise, it will display a placeholder, which is confusing to some users.


================================================
FILE: Runners.md
================================================
# Choosing a Beam Runner

All tools use Apache Beam pipelines. By default, pipelines run locally using the `DirectRunner`. You can optionally
choose to run the pipelines on [Google Cloud Dataflow](https://cloud.google.com/dataflow) by selection
the `DataflowRunner`.

When working with GCP, it's recommended you set the project ID up front with the command:

```shell
gcloud config set project <your-id>
```

## _Direct Runner options_:

* `--direct_num_workers`: The number of workers to use. We recommend 2 for local development.

Example run:

```shell
weather-mv -i gs://netcdf_file.nc \
  -o $PROJECT.$DATASET_ID.$TABLE_ID \
  -t gs://$BUCKET/tmp  \
  --direct_num_workers 2
```

For a full list of how to configure the direct runner, please review
[this page](https://beam.apache.org/documentation/runners/direct/).

## _Dataflow options_:

* `--runner`: The `PipelineRunner` to use. This field can be either `DirectRunner` or `DataflowRunner`.
  Default: `DirectRunner` (local mode)
* `--project`: The project ID for your Google Cloud Project. This is required if you want to run your pipeline using the
  Dataflow managed service (i.e. `DataflowRunner`).
* `--temp_location`: Cloud Storage path for temporary files. Must be a valid Cloud Storage URL, beginning with `gs://`.
* `--region`: Specifies a regional endpoint for deploying your Dataflow jobs. Default: `us-central1`.
* `--job_name`: The name of the Dataflow job being executed as it appears in Dataflow's jobs list and job details.

Example run:

```shell
weather-dl configs/seasonal_forecast_example_config.cfg \
  --runner DataflowRunner \
  --project $PROJECT \
  --region $REGION \
  --temp_location gs://$BUCKET/tmp/
```

For a full list of how to configure the Dataflow pipeline, please review
[this table](https://cloud.google.com/dataflow/docs/reference/pipeline-options).

## Monitoring

When running Dataflow, you
can [monitor jobs through UI](https://cloud.google.com/dataflow/docs/guides/using-monitoring-intf),
or [via Dataflow's CLI commands](https://cloud.google.com/dataflow/docs/guides/using-command-line-intf):

For example, to see all outstanding Dataflow jobs, simply run:

```shell
gcloud dataflow jobs list
```

To describe stats about a particular Dataflow job, run:

```shell
gcloud dataflow jobs describe $JOBID
```

In addition, Dataflow provides a series
of [Beta CLI commands](https://cloud.google.com/sdk/gcloud/reference/beta/dataflow).

These can be used to keep track of job metrics, like so:

```shell
JOBID=<enter job id here>
gcloud beta dataflow metrics list $JOBID --source=user
```

You can even [view logs via the beta commands](https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/logs/list):

```shell
gcloud beta dataflow logs list $JOBID
```



================================================
FILE: Runtime-Container.md
================================================
# Runtime Containers

_How to build & public custom Dataflow images in GCS_

*Pre-requisites*: Install the gcloud CLI ([instructions are here](https://cloud.google.com/sdk/docs/install)).

Then, log in to your cloud account:

```shell
gcloud auth login
```

> Please follow all the instructions from the CLI. This will involve running an
> auth script on your local machine, which will open a browser window to log you
> in.

Last, make sure you have adequate permissions to use Google Cloud Build (see
[IAM options here](https://cloud.google.com/build/docs/iam-roles-permissions)).
[This documentation](https://cloud.google.com/build/docs/securing-builds/configure-access-to-resources)
will help you configure your project to use Cloud Build.

*Updating the image*: Please modify the `Dockerfile` in the root directory. Then, build and upload the image with Google
Cloud Build (updating the tag, as is appropriate):

```shell
export PROJECT=<your-project-here>
export REPO=weather-tools
export IMAGE_URI=gcr.io/$PROJECT/$REPO
export TAG="0.0.0" # Please increment on every update.
# from the project root...

# dev release
gcloud builds submit . --tag "$IMAGE_URI:dev"

# release:
gcloud builds submit . --tag "$IMAGE_URI:$TAG"  && gcloud builds submit weather_mv/ --tag "$IMAGE_URI:latest"
```


================================================
FILE: bin/install-branch
================================================
#!/usr/bin/env sh
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set the first argument as the name of the branch. If unset, default to "main".
pip install -e "git+http://github.com/google/weather-tools.git@${1:-main}#egg=google-weather-tools"


================================================
FILE: bin/post-push
================================================
#!/usr/bin/env sh
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
MAG="\033[95m"
BLD="\033[1m"
END="\033[0m"

# End script early on error
set -e

# In project root...
cd "$(dirname $0)/.."

flake8
pytype
pytest

echo "$MAG$BLD--- Presubmit: Success ---$END"
exit 0


================================================
FILE: ci3.8.yml
================================================
name: weather-tools
channels:
  - conda-forge
  - defaults
dependencies:
  - python=3.8.13
  - apache-beam=2.40.0
  - pytest=7.2.0
  - pytest-subtests=0.8.0
  - cfgrib=0.9.10.2
  - dask=2022.10.0
  - dataclasses=0.8
  - distributed=2022.10.0
  - eccodes=2.27.0
  - requests=2.28.1
  - netcdf4=1.6.1
  - rioxarray=0.13.4
  - xarray-beam=0.6.2
  - ecmwf-api-client=1.6.3
  - fsspec=2022.11.0
  - gcsfs=2022.11.0
  - gdal=3.5.1
  - pyproj=3.4.0
  - geojson=2.5.0=py_0
  - simplejson=3.17.6
  - metview-batch=5.17.0
  - numpy=1.22.4
  - pandas=1.5.1
  - pip=24.3.1
  - pygrib=2.1.4
  - xarray==2023.1.0
  - ruff==0.1.2
  - google-cloud-sdk=410.0.0
  - aria2=1.36.0
  - zarr=2.15.0
  - google-cloud-monitoring=2.22.2
  - pillow=10.4.0
  - cdsapi=0.7.5
  - pip:
    - cython==0.29.34
    - earthengine-api==0.1.329
    - pyparsing==3.1.4
    - .[test]


================================================
FILE: ci3.9.yml
================================================
name: weather-tools
channels:
  - conda-forge
  - defaults
dependencies:
  - python=3.9.13
  - apache-beam=2.40.0
  - pytest=7.2.0
  - pytest-subtests=0.8.0
  - cfgrib=0.9.10.2
  - dask=2022.10.0
  - dataclasses=0.8
  - distributed=2022.10.0
  - eccodes=2.27.0
  - requests=2.28.1
  - netcdf4=1.6.1
  - rioxarray=0.13.4
  - xarray-beam=0.6.2
  - ecmwf-api-client=1.6.3
  - fsspec=2022.11.0
  - gcsfs=2022.11.0
  - gdal=3.5.1
  - pyproj=3.4.0
  - geojson=2.5.0=py_0
  - simplejson=3.17.6
  - metview-batch=5.17.0
  - numpy=1.22.4
  - pandas=1.5.1
  - pip=24.3.1
  - pygrib=2.1.4
  - google-cloud-sdk=410.0.0
  - aria2=1.36.0
  - xarray==2023.1.0
  - ruff==0.1.2
  - zarr=2.15.0
  - google-cloud-monitoring=2.22.2
  - pillow=10.4.0
  - cdsapi=0.7.5
  - pip:
    - cython==0.29.34
    - earthengine-api==0.1.329
    - pyparsing==3.1.4
    - .[test]


================================================
FILE: configs/era5_example_config.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    500
year=
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00

================================================
FILE: configs/era5_example_config_local_run.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Run with
# $ weather-dl configs/era5_example_config_local_run.cfg --local-run
# This will create a folder '$CWD/local_run' with a manifest.json file and four data files
# era5-20160101-pressure-500.nc
# era5-20160115-pressure-500.nc
# era5-20170101-pressure-500.nc
# era5-20170115-pressure-500.nc
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=era5-{year:04d}{month:02d}{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    500
year=
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    12:00


================================================
FILE: configs/era5_example_config_local_run.json
================================================
{
  "parameters": {
    "client": "cds",
    "dataset": "reanalysis-era5-pressure-levels",
    "target_path": "era5-{}{}{}-pressure-{}.nc",
    "partition_keys": ["year","month","day","pressure_level"]
  },
  "selection": {
    "product_type": "ensemble_mean",
    "format": "netcdf",
    "variable": ["divergence","fraction_of_cloud_cover","geopotential"],
    "pressure_level": [500],
    "year": [2016, 2017],
    "month": [1],
    "day": [1, 15],
    "time": ["00:00", "12:00"]
  }
}


================================================
FILE: configs/era5_example_config_preproc.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-downloads/test/o1280-{year:04d}-{month:02d}-{day:02d}.grib
partition_keys=
    year
    month
    day
[selection]
# These are the averages of an ensemble of analyses.
product_type=ensemble_mean
format=grib
grid=o1280
variable=
    relative_humidity
    temperature
    fraction_of_cloud_cover
    geopotential
    u_component_of_wind
    v_component_of_wind
pressure_level=
    500
year=
    2020
month=
    01
day=
    01
    02
time=
    00:00
    12:00


================================================
FILE: configs/era5_example_config_using_date.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
# This config creates a date-based directory hierarchy.
# In this case, the two files that will be created are
# gs://ecmwf-output-test/era5/2017/01/01-pressure-500.nc
# gs://ecmwf-output-test/era5/2017/01/02-pressure-500.nc
# gs://ecmwf-output-test/era5/2017/01/01-pressure-1000.nc
# gs://ecmwf-output-test/era5/2017/01/02-pressure-1000.nc
target_path=gs://ecmwf-output-test/era5/{date:%%Y/%%m/%%d}-pressure-{pressure_level}.nc
partition_keys=
     date
     pressure_level
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    500
    1000
date=2017-01-01/to/2017-01-02
time=
    00:00
    06:00
    12:00
    18:00


================================================
FILE: configs/era5_example_monthly_soil.cfg
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-complete
target_path=gs://ecmwf-output-test/era5/ERA5GRIB/HRES/Month/{year}/{year}{month:02d}_hres_soil.grb2
partition_keys=
    year
    month

[selection]
class=ea
stream=oper
expver=1
levtype=sfc
type=an
year=1979/to/2021
month=01/to/12
day=all
time=00/to/23/by/1
param=139.128/170.128/183.128/236.128/238.128/39.128/40.128/41.128/42.128/35.128/36.128/37.128/38.128


================================================
FILE: configs/mars_example_config.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[parameters]
client=mars
dataset=ecmwf-mars-output
target_path=gs://ecmwf-downloads/hres-single-level/{date:%%Y/%%m/%%d}.nc
partition_keys=
    date

[selection]
# example requests
# https://confluence.ecmwf.int/display/UDOC/MARS+example+requests

# hres fields
# https://www.ecmwf.int/en/forecasts/datasets/set-i

stream=oper
levtype=sfc
param=10fg6/10u/10v/100u/100vcrr/2t/2d/200u/200v/cp/dsrp/hcc/i10fg/lcc/lsp/lspf/lsrr/msl/ptype/sf/sp/ssr/tcrw/tclw/tcsw/tcw/tcwv/tp
padding=0
step=0/1/2/3/4/5/6/7/8/9/10/11/12/24/48/72/96/120/144/168/192/216/240
grid=0.125/0.125
expver=1
time=0000/1800
date=2017-01-01/to/2017-01-07
# these are weather forecasts
type=fc
class=od
expect=anymars 
format=netcdf


================================================
FILE: configs/mars_example_config.json
================================================
{
  "parameters": {
    "client": "mars",
    "dataset": "ecmwf-mars-output",
    "target_path": "gs://ecmwf-downloads/hres-single-level/{:%Y/%m/%d}.nc",
    "partition_keys": "date"
  },

  "selection": {
    "stream": "oper",
    "levtype": "sfc",
    "param": ["10fg6","10u","10v","100u","100vcrr","2t","2d","200u","200v","cp","dsrp","hcc","i10fg","lcc","lsp","lspf","lsrr","msl","ptype","sf","sp","ssr","tcrw","tclw","tcsw","tcw","tcwv","tp"],
    "padding": 0,
    "step": [0,1,2,3,4,5,6,7,8,9,10,11,12,24,48,72,96,120,144,168,192,216,240],
    "grid": [0.125,0.125],
    "expver": 1,
    "time": ["0000","1800"],
    "date": ["2017-01-01","2017-01-07"],
    "type": "fc",
    "class": "od",
    "expect": "anymars",
    "format": "netcdf"
  }
}


================================================
FILE: configs/multiple_multiple_licenses/era5_pressure500.cfg
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
[parameters.a]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[parameters.b]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[parameters.c]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    500
year=
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00

================================================
FILE: configs/multiple_multiple_licenses/era5_pressure600.cfg
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
[parameters.a]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[parameters.b]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[parameters.c]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    600
year=
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00

================================================
FILE: configs/multiple_multiple_licenses/era5_pressure700.cfg
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
[parameters.a]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[parameters.b]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[parameters.c]
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    700
year=
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00

================================================
FILE: configs/multiple_single_license/era5_pressure500.cfg
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    500
year=
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00

================================================
FILE: configs/multiple_single_license/era5_pressure600.cfg
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    600
year=
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00

================================================
FILE: configs/multiple_single_license/era5_pressure700.cfg
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=cds
dataset=reanalysis-era5-pressure-levels
target_path=gs://ecmwf-output-test/era5/{year:04d}/{month:02d}/{day:02d}-pressure-{pressure_level}.nc
partition_keys=
    year
    month
    day
    pressure_level
api_url=https://cds.climate.copernicus.eu/api
# a fake key for tests
api_key=12345:1234567-ab12-34cd-9876-4o4fake90909
[selection]
product_type=reanalysis
format=netcdf
variable=
    divergence
    fraction_of_cloud_cover
    geopotential
pressure_level=
    700
year=
    2015
    2016
    2017
month=
    01
day=
    01
    15
time=
    00:00
    06:00
    12:00
    18:00

================================================
FILE: configs/s2s_operational_forecast_example.cfg
================================================
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=ecpublic
dataset=s2s
target_path=gs://ecmwf-downloads/s2s/pressure-levels/{date}-all-levs.gb
# target_path=gs://ecmwf-downloads/s2s/pressure-levels/{date}-hc-{hdate}-all-levs.gb
partition_keys=
    date
#    hdate

[selection]
class=s2
date=2016-01-04/2016-01-07/2016-01-11/2016-01-14/2016-01-18/2016-01-21/2016-01-25
# hdate = <value>, The number of years to subtract.
# Eg input:
#           date = 2020-01-02
#           hdate = 1/to/6 or 1/2/3/4/5/6
# Code will do:
#           hdate = 2019-01-02/2018-01-02/2017-01-02/2016-01-02/2015-01-02/2014-01-02
# Note: If 'hdate' is specified in the 'selection' section, then 'date' is required as a partition keys.
hdate=1/to/20
expver=prod
levelist=10/50/100/200/300/500/700/850/925/1000
levtype=pl
model=glob
number=1/to/50
origin=ecmf
# All except q
param=130/131/132/135/156
step=0/to/1104/by/24
stream=enfo
time=00:00:00
type=pf

================================================
FILE: configs/seasonal_forecast_example_config.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[parameters]
client=cds
dataset=seasonal-original-single-levels
target_path=gs://ecmwf-output-test/seasonal-forecast/seasonal-forecast-{year:04d}-{month:02d}.nc
partition_keys=
    year
    month
[selection]
format=netcdf
originating_centre=ecmwf
variable=
    10m_u_component_of_wind
    10m_v_component_of_wind
year=
    2016
    2017
month=
    01
day=
    01
leadtime_hour=
    6
area=
  10
  -10
  -10
  10


================================================
FILE: configs/tigge_example_config.cfg
================================================
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[parameters]
client=ecpublic
dataset=tigge
target_path=gs://ecmwf-output-test/tigge/{}.gb
partition_keys=
    date
[selection]
class=ti
date=2020-01-01/to/2020-01-31
expver=prod
grid=0.25/0.25
levtype=sfc
number=1/to/50
origin=ecmf
param=167
step=0/to/360/by/6
time=00/12
type=pf

================================================
FILE: configs/yesterdays_surface_example.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[parameters]
client=mars
partition_keys=
    date
target_path=all-{}.an
[selection]
class   = od
type    = analysis
levtype = surface
date    = -1
time    = 00/06/12/18
param   = z/sp

================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/Private-IP-Configuration.md
================================================
# Private IP Configuration 

_A Guide for Dataflow Pipeline Execution_

## Goals
In this document, we’ll describe how to use Private IP for the execution of a dataflow pipeline.

## Background
When we are running the dataflow pipeline, GCP decides to spawn one or more new VM-instances. By default, each VM-instance will have an External IP address. 

![VM Instance with External IP Address](_static/vm_instance_with_external_ip_address.png "VM Instance with External IP Address")

Considering a billing account has a limited number of External IP addresses, we can skip this overhead by providing VPC-parameters as CLI-input of dataflow.

Following table<font color="red">*</font> summarizes the required input parameters.

| Field             | Description                                                                                                                                                              |
|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| network           | The Compute Engine network for launching Compute Engine instances to run your pipeline. If not set, Google Cloud assumes that you intend to use a network named default. |
| subnetwork        | The Compute Engine subnetwork for launching Compute Engine instances to run your pipeline.                                                                               |
| no_use_public_ips | Command-line flag that sets use_public_ips to False. If the option is not explicitly enabled or disabled, the Dataflow workers use public IP addresses.                  |

(<font color="red">*</font> excerpt from [GCP pipeline-options](https://cloud.google.com/dataflow/docs/reference/pipeline-options))

## Steps to Configure VPC
1. Configure VPC-Network & Subnetwork
2. Configure Firewall-Rule
3. Configure NAT & Router
4. Sample commands to trigger dataflow pipeline execution using above options

## Configure VPC-Network & Subnetwork
1. Open GCP’s Create VPC-Network page.
2. Provide name, description.
3. Select “Subnet creation mode” as Custom.
4. Provide name, description, region, IP address range & other details for the new subnet.
5. Select “Private Google Access” as On.
6. Complete VPC-Network creation by providing other required parameters. Refer to GCP’s [Create-VPC-Network](https://cloud.google.com/vpc/docs/create-modify-vpc-networks) documentation for more details.

![VPC Network Details](_static/vpc_network_details.png "VPC Network Details")

## Configure Firewall-Rule
1. Open GCP’s Create a firewall rule page.
2. Provide name, description. You may set “Logs” as Off.
3. For the “Network” drop-down, select the network that we created in the previous step.
4. Select “Direction of traffic” as Ingress & “Action on match” as Allow.
5. For the "Targets" drop-down, select "All instances in the network".
6. For the "Source IPv4 ranges", pass in the following: "0.0.0.0/0".
7. Select "Protocols and ports" as Allow all.
8. Complete Firewall-Rule creation by providing other necessary information. Refer to GCP’s [Configuring-Firewall](https://cloud.google.com/filestore/docs/configuring-firewall) documentation for more details.

![Firewall Rule Details](_static/firewall_rule_details.png "Firewall Rule Details")

## Configure NAT & Router
1. Open GCP’s Create a NAT gateway page.
2. Provide name, region.
3. For the “Network” drop-down, select the network that we created earlier.
4. For the “Router” drop-down, EITHER select pre-created router OR click on “create new router”.
<br> a. Complete router creation by providing name, description & region. Refer to GCP’s [Create-Router](https://cloud.google.com/network-connectivity/docs/router/how-to/create-router-vpc-on-premises-network) documentation for more details.
5. Complete NAT gateway creation by providing required details. Refer to GCP’s [Create-NAT-Gateway](https://cloud.google.com/nat/docs/set-up-manage-network-address-translation) documentation for more details.

![Router Details](_static/router_details.png "Router Details")

![NAT Gateways](_static/nat_gateways.png "NAT Gateways")

## Sample commands to trigger dataflow pipeline execution using above options

Following section showcases how VPC-parameters can be given as CLI inputs to weather-mv dataflow pipeline.

```bash
weather-mv --uris "gs://$STORAGE_BUCKET/*.nc"
           --output_table "$HOST_PROJECT_ID.$DATASET_ID.$TABLE_ID"
           --temp_location "gs://$STORAGE_BUCKET/tmp"
           --runner DataflowRunner
           --project $HOST_PROJECT_ID
           --region $REGION_NAME
           --no_use_public_ips 
           --network=$NETWORK_NAME
           --subnetwork=regions/$REGION_NAME/subnetworks/$SUBNETWORK_NAME
```

Replace the following:

- STORAGE_BUCKET: the storage bucket, e.g. bucket_58231
- HOST_PROJECT_ID: the host project ID, e.g. weather_tools
- DATASET_ID: the name of dataset, e.g. weather_mv_ds
- TABLE_ID: the name of table, e.g. tbl_2017_01 
- REGION_NAME: the regional endpoint of your Dataflow job, e.g. us-central1
- NETWORK_NAME: the name of your Compute Engine network, e.g. dataflow
  - Provide network_name same as what we created in Step-1.
- SUBNETWORK_NAME: the name of your Compute Engine subnetwork, e.g. private
  - Provide a subnetwork_name same as what we created in Step-1.

Alternatively, you may also execute following command,

```bash
weather-mv --uris "gs://$STORAGE_BUCKET/*.nc"
           --output_table "$HOST_PROJECT_ID.$DATASET_ID.$TABLE_ID"
           --temp_location "gs://$STORAGE_BUCKET/tmp"
           --runner DataflowRunner
           --project $HOST_PROJECT_ID
           --region $REGION_NAME
           --no_use_public_ips
           –-subnetwork=https://www.googleapis.com/compute/v1/projects/$HOST_PROJECT_ID/regions/$REGION_NAME/subnetworks/$SUBNETWORK_NAME
```


================================================
FILE: docs/_static/custom.css
================================================
p {
    text-align: justify;
}

body {
   min-width: 250px;
}

div.body {
    min-width: 250px;
}


================================================
FILE: docs/conf.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.

import os
import sys

sys.path.insert(0, os.path.abspath('../weather_dl'))
sys.path.insert(1, os.path.abspath('../weather_mv'))
sys.path.insert(2, os.path.abspath('../weather_sp'))


# -- Project information -----------------------------------------------------

project = 'weather-tools'
copyright = '2021 Google'
author = 'Anthromets'

# The full version, including alpha/beta/rc tags
release = '0.0'


# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'myst_parser',
    'sphinx.ext.todo',
    'sphinx.ext.viewcode',
    'sphinx.ext.autodoc',
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# https://stackoverflow.com/a/66295922/809705
autodoc_typehints = "description"


================================================
FILE: docs/download_pipeline.md
================================================
# download_pipeline package
=========================

## Submodules

### download_pipeline.clients module

```{eval-rst}
.. automodule:: weather_dl.download_pipeline.clients
   :members:
   :undoc-members:
   :show-inheritance:
```

### download_pipeline.manifest module

```{eval-rst}
.. automodule:: weather_dl.download_pipeline.manifest
   :members:
   :undoc-members:
   :show-inheritance:
```

### download_pipeline.parsers module

```{eval-rst}
.. automodule:: weather_dl.download_pipeline.parsers
   :members:
   :undoc-members:
   :show-inheritance:
```

### download_pipeline.pipeline module

```{eval-rst}
.. automodule:: weather_dl.download_pipeline.pipeline
   :members:
   :undoc-members:
   :show-inheritance:
```

### download_pipeline.store module

```{eval-rst}
.. automodule:: weather_dl.download_pipeline.stores
   :members:
   :undoc-members:
   :show-inheritance:
```

## Module contents

```{eval-rst}
.. automodule:: download_pipeline
   :members:
   :undoc-members:
   :show-inheritance:
```

================================================
FILE: docs/index.md
================================================

```{include} README.md
```

## Contents

```{eval-rst}
.. toctree::
   :maxdepth: 2
   :titlesonly:
   :glob:
   :includehidden:

   weather_dl/README
   weather_mv/README
   weather_sp/README
   Configuration
   Efficient-Requests
   Runners
   Private-IP-Configuration
   Runtime-Container
   CONTRIBUTING
   modules
```




# Indices and tables
```{eval-rst}
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
```

================================================
FILE: docs/loader_pipeline.md
================================================
# loader_pipeline package

## Submodules

### loader_pipeline.netcdf_loader module

```{eval-rst}
.. automodule:: weather_mv.loader_pipeline
   :members:
   :undoc-members:
   :show-inheritance:
```

## Module contents

```{eval-rst}
.. automodule:: loader_pipeline
   :members:
   :undoc-members:
   :show-inheritance:
```

================================================
FILE: docs/make.bat
================================================
REM Copyright 2021 Google LLC
REM
REM Licensed under the Apache License, Version 2.0 (the "License");
REM you may not use this file except in compliance with the License.
REM You may obtain a copy of the License at
REM
REM     https://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd


================================================
FILE: docs/modules.md
================================================
# Modules

```{toctree}
:maxdepth: 4
download_pipeline
loader_pipeline
splitter_pipeline
```



================================================
FILE: docs/requirements.txt
================================================
# doc requirements
myst-parser==0.13.7
sphinx>=2.1
Jinja2<3.1

================================================
FILE: docs/splitter_pipeline.md
================================================
# splitter_pipeline package
=========================

## Submodules

### splitter_pipeline.file_splitters module


```{eval-rst}
.. automodule:: weather_sp.splitter_pipeline.file_splitters
   :members:
   :undoc-members:
   :show-inheritance:
```

### splitter_pipeline.pipeline module

```{eval-rst}
.. automodule:: weather_sp.splitter_pipeline.pipeline
   :members:
   :undoc-members:
   :show-inheritance:
```

## Module contents

```{eval-rst}
.. automodule:: splitter_pipeline
   :members:
   :undoc-members:
   :show-inheritance:
```

================================================
FILE: environment.yml
================================================
name: weather-tools
channels:
  - conda-forge
dependencies:
  - python=3.8.13
  - apache-beam=2.40.0
  - xarray-beam=0.6.2
  - xarray=2023.1.0
  - fsspec=2022.11.0
  - gcsfs=2022.11.0
  - rioxarray=0.13.4
  - gdal=3.5.1
  - pyproj=3.4.0
  - ecmwf-api-client=1.6.3
  - eccodes=2.27.0
  - cfgrib=0.9.10.2
  - pygrib=2.1.4
  - metview-batch=5.17.0
  - netcdf4=1.6.1
  - geojson=2.5.0=py_0
  - simplejson=3.17.6
  - numpy=1.22.4
  - pandas=1.5.1
  - google-cloud-sdk=410.0.0
  - aria2=1.36.0
  - pip=24.3.1
  - zarr=2.15.0
  - google-cloud-monitoring=2.22.2
  - pillow=10.4.0
  - cdsapi=0.7.5
  - pip:
    - cython==0.29.34
    - earthengine-api==0.1.329
    - firebase-admin==6.0.1
    - contourpy==1.1.1
    - google-crc32c==1.1.2
    - MarkupSafe==2.1.5
    - setuptools==70.3.0
    - pyparsing==3.1.4
    - .
    - ./weather_dl
    - ./weather_mv
    - ./weather_sp


================================================
FILE: pyproject.toml
================================================
[tool.ruff]
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = ["E", "F", "W"]
ignore = []

# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["A", "B", "C", "D", "E", "F"]
unfixable = []

# Exclude a variety of commonly ignored directories.
exclude = [
    ".bzr",
    ".direnv",
    ".eggs",
    ".git",
    ".hg",
    ".mypy_cache",
    ".nox",
    ".pants.d",
    ".pytype",
    ".ruff_cache",
    ".svn",
    ".tox",
    ".venv",
    "__pypackages__",
    "_build",
    "buck-out",
    "build",
    "dist",
    "node_modules",
    "venv",
]

# Same as Black.
line-length = 120

# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

# Assume Python 3.10.
target-version = "py310"

[tool.ruff.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 10


================================================
FILE: setup.cfg
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[pytype]
inputs = weather_dl
         weather_mv
         weather_sp


================================================
FILE: setup.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from setuptools import find_packages, setup

beam_gcp_requirements = [
    "google-cloud-bigquery==2.34.4",
    "google-cloud-bigquery-storage==2.14.1",
    "google-cloud-bigtable==1.7.2",
    "google-cloud-core==1.7.3",
    "google-cloud-datastore==1.15.5",
    "google-cloud-dlp==3.8.0",
    "google-cloud-language==1.3.2",
    "google-cloud-pubsub==2.13.4",
    "google-cloud-pubsublite==1.4.2",
    "google-cloud-recommendations-ai==0.2.0",
    "google-cloud-spanner==1.19.3",
    "google-cloud-videointelligence==1.16.3",
    "google-cloud-vision==1.0.2",
    "apache-beam[gcp]==2.40.0",
]

weather_dl_requirements = [
    "cdsapi==0.7.5",
    "ecmwf-api-client",
    "numpy>=1.19.1",
    "pandas",
    "xarray",
    "requests>=2.24.0",
    "google-cloud-firestore",
    "firebase-admin",
    "urllib3==1.26.5",
]

weather_mv_requirements = [
    "dataclasses",
    "numpy",
    "pandas",
    "xarray",
    "cfgrib",
    "netcdf4",
    "geojson",
    "simplejson",
    "rioxarray",
    "rasterio",
    "earthengine-api>=0.1.263",
    "pyproj",  # requires separate binary installation!
    "gdal",  # requires separate binary installation!
    "xarray-beam==0.6.2",
    "gcsfs==2022.11.0",
    "zarr==2.15.0",
]

weather_sp_requirements = [
    "numpy>=1.20.3",
    "pygrib",
    "xarray",
    "scipy",
]

test_requirements = [
    "pytype==2021.11.29",
    "ruff==0.1.2",
    "pytest",
    "pytest-subtests",
    "netcdf4",
    "numpy",
    "xarray",
    "xarray-beam",
    "absl-py",
    "metview",
    "memray",
    "pytest-memray",
    "h5py",
    "pooch",
]

all_test_requirements = beam_gcp_requirements + weather_dl_requirements + \
                        weather_mv_requirements + weather_sp_requirements + \
                        test_requirements

setup(
    name='google-weather-tools',
    packages=find_packages(),
    author='Anthromets',
    author_email='anthromets-ecmwf@google.com',
    url='https://weather-tools.readthedocs.io/',
    description='Apache Beam pipelines to make weather data accessible and useful.',
    long_description=open('README.md', 'r', encoding='utf-8').read(),
    long_description_content_type='text/markdown',
    platforms=['darwin', 'linux'],
    license='License :: OSI Approved :: Apache Software License',
    classifiers=[
        'Development Status :: 4 - Beta',
        'Environment :: Console',
        'Intended Audience :: Science/Research',
        'Intended Audience :: Developers',
        'Intended Audience :: Information Technology',
        'License :: OSI Approved :: Apache Software License',
        'Operating System :: MacOS :: MacOS X',
        # 'Operating System :: Microsoft :: Windows',  # TODO(#64): Fully support Windows.
        'Operating System :: POSIX',
        'Programming Language :: Python :: 3.8',
        'Programming Language :: Python :: 3.9',
        'Topic :: Scientific/Engineering :: Atmospheric Science',

    ],
    python_requires='>=3.8, <3.10',
    install_requires=['apache-beam[gcp]==2.40.0', 'gcsfs==2022.11.0'],
    use_scm_version=True,
    setup_requires=['setuptools_scm'],
    scripts=['weather_dl/weather-dl', 'weather_mv/weather-mv', 'weather_sp/weather-sp'],
    tests_require=test_requirements,
    extras_require={
        'docs': ['tox', 'sphinx>=2.1', 'myst-parser', 'Jinja2<3.1'],
        'test': all_test_requirements,
        'dev': ['google-weather-tools[docs,test]'],
        'regrid': ['metview']
    },
    project_urls={
        'Issue Tracking': 'http://github.com/google/weather-tools/issues',
    },
)


================================================
FILE: tox.ini
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[tox]
envlist = py37,py38,lint,type

[testenv]
deps = .[test]
commands = pytest

[testenv:type]
deps = .[test]
commands = pytype

[testenv:lint]
deps = .[test]
commands =
    flake8 weather_dl
    flake8 weather_mv
    flake8 weather_sp

[flake8]
max-line-length = 120

[gh-actions]
python =
    3.8: py38, lint, type
    3.9: py39


================================================
FILE: weather_dl/MANIFEST.in
================================================
global-exclude *_test.py
include README.md
exclude download_status*.py

================================================
FILE: weather_dl/README.md
================================================
# ⛈ `weather-dl` – Weather Downloader

Weather Downloader ingests weather data to cloud buckets, such
as [Google Cloud Storage](https://cloud.google.com/storage) (_beta_).

## Features

* **Flexible Pipelines**: `weather-dl` offers a high degree of control over what is downloaded via configuration files.
  Separate scripts need not be written to get new data or add parameters. For more, see the
  [configuration docs](../Configuration.md).
* **Efficient Parallelization**: The tool gives you full control over how downloads are sharded and parallelized (with
  good defaults). This lets you focus on the data and not the plumbing.
* **Hassle-Free Dev-Ops**. `weather-dl` and Dataflow make it easy to spin up VMs on your behalf with one command. No
  need to keep your local machine online all night to acquire data.
* **Robust Downloads**. If an error occurs when fetching a shard, Dataflow will automatically retry the download for
  you. Previously downloaded shards will be skipped by default, so you can re-run the tool without having to worry about
  duplication of work.

> Note: Currently, only ECMWF's MARS and CDS clients are supported. If you'd like to use `weather-dl` to work with other
> data sources, please [file an issue](https://github.com/googlestaging/weather-tools/issues) (or consider
> [making a contribution](../CONTRIBUTING.md)).

## Usage

```
usage: weather-dl [-h] [-f] [-d] [-l] [-m MANIFEST_LOCATION] [-n NUM_REQUESTS_PER_KEY] [-p PARTITION_CHUNKS]
                  [-s {in-order,fair}]
                  config [config ...]


Weather Downloader ingests weather data to cloud storage.

positional arguments:
  config                path/to/configs.cfg, containing client and data information. Can take multiple configs.Accepts
                        *.cfg and *.json files.
```

_Common options_:

* `-f, --force-download`: Force redownload of partitions that were previously downloaded.
* `-d, --dry-run`: Run pipeline steps without _actually_ downloading or writing to cloud storage.
* `-l, --local-run`: Run locally and download to local hard drive. The data and manifest directory is set by default
  to '<$CWD>/local_run'. The runner will be set to `DirectRunner`. The only other relevant option is the config
  and `--direct_num_workers`
* `-m, --manifest-location MANIFEST_LOCATION`: Location of the manifest. By default, it will use Cloud Logging
  (stdout for direct runner). You can set the name of the manifest as the hostname of a URL with the 'cli' protocol.
  For example, `cli://manifest` will prefix all the manifest logs as '[manifest]'. In addition, users can specify 
  either a Firestore collection URI (`fs://<my-collection>?projectId=<my-project-id>`), or 
  BigQuery table (`bq://<project-id>.<dataset-name>.<table-name>`), or `noop://<name>` 
  for an in-memory location.
* `-n, --num-requests-per-key`: Number of concurrent requests to make per API key. Default: make an educated guess per
  client & config. Please see the client documentation for more details.
* `-p, --partition-chunks`: Group shards into chunks of this size when computing the partitions. Specifically, this 
  controls how we chunk elements in a cartesian product, which affects parallelization of that step. Default: chunks of 
  1000 elements for 'in-order' scheduling. Chunks of 1 element for 'fair' scheduling. 
* `-s, --schedule {in-order,fair}`: When using multiple configs, decide how partitions are scheduled: 'in-order' implies 
  that partitions will be processed in sequential order of each config; 'fair' means that partitions from each config 
  will be interspersed evenly. Note: When using 'fair' scheduling, we recommend you set the '--partition-chunks' to a 
  much smaller number. Default: 'in-order'.
* `--log-level`: An integer to configure log level. Default: 2(INFO).
* `--use-local-code`: Supply local code to the Runner. Default: False.

> Note: 
>  * In case of BigQuery manifest tool will create the BQ table itself, if not already present. 
>    Or it will use the existing table but can report errors in case of schema mismatch.
>  * To run complex queries on the Firestore manifest, users may find it helpful to replicate Firestore to BigQuery 
>    using the automated process described in 
>    [this article](https://medium.com/@ammppp/automated-firestore-replication-to-bigquery-15915d518e38). 
>    By following the step-by-step instructions, users can easily set up the automated replication and then use BigQuery 
>    to perform advanced analysis on the data.

Invoke with `-h` or `--help` to see the full range of options.

For further information on how to write config files, please consult [this documentation](../Configuration.md).

_Usage Examples_:

```bash
weather-dl configs/era5_example_config_local_run.cfg --local-run
```

Preview download with a dry run:

```bash
weather-dl configs/mars_example_config.cfg --dry-run
```

Using DataflowRunner

```bash
weather-dl configs/mars_example_config.cfg \
           --runner DataflowRunner \
           --project $PROJECT \
           --temp_location gs://$BUCKET/tmp  \
           --job_name $JOB_NAME
```

Using DataflowRunner and using local code for pipeline

```bash
weather-dl configs/mars_example_config.cfg \
           --runner DataflowRunner \
           --project $PROJECT \
           --temp_location gs://$BUCKET/tmp  \
           --job_name $JOB_NAME \
           --use-local-code
```

Using the DataflowRunner and specifying 3 requests per license

```bash
weather-dl configs/mars_example_config.cfg \
           -n 3 \
           --runner DataflowRunner \
           --project $PROJECT \
           --temp_location gs://$BUCKET/tmp  \
           --job_name $JOB_NAME
```

For a full list of how to configure the Dataflow pipeline, please review
[this table](https://cloud.google.com/dataflow/docs/reference/pipeline-options).

## Monitoring

You can view how your ECMWF API jobs are by visitng the client-specific job queue:

* [MARS](https://apps.ecmwf.int/mars-activity/)
* [Copernicus](https://cds.climate.copernicus.eu/requests?tab=all)

If you use Google Cloud Storage, we recommend using [`gsutil` (link)](https://cloud.google.com/storage/docs/gsutil) to
inspect the progress of your downloads. For example:

```shell
# Check that the file-sizes of your downloads look alright
gcloud storage du --readable-sizes gs://your-cloud-bucket/mars-data/*T00z.nc 
# See how many downloads have finished
gcloud storage du --readable-sizes gs://your-cloud-bucket/mars-data/*T00z.nc | wc -l
```


================================================
FILE: weather_dl/__init__.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: weather_dl/download_pipeline/__init__.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .pipeline import run, pipeline


def cli(extra=[]):
    import sys
    pipeline(run(sys.argv + extra))


================================================
FILE: weather_dl/download_pipeline/clients.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ECMWF Downloader Clients."""

import abc
import collections
import contextlib
import datetime
import io
import json
import logging
import os
import time
import typing as t
import warnings
from urllib.parse import urljoin

from cdsapi import api as cds_api
import urllib3
from ecmwfapi import api

from .config import Config, optimize_selection_partition
from .manifest import Manifest, Stage
from .util import download_with_aria2, retry_with_exponential_backoff

warnings.simplefilter(
    "ignore", category=urllib3.connectionpool.InsecureRequestWarning)


class Client(abc.ABC):
    """Weather data provider client interface.

    Defines methods and properties required to efficiently interact with weather
    data providers.

    Attributes:
        config: A config that contains pipeline parameters, such as API keys.
        level: Default log level for the client.
    """

    def __init__(self, config: Config, level: int = logging.INFO) -> None:
        """Clients are initialized with the general CLI configuration."""
        self.config = config
        self.logger = logging.getLogger(f'{__name__}.{type(self).__name__}')
        self.logger.setLevel(level)

    @abc.abstractmethod
    def retrieve(self, dataset: str, selection: t.Dict, output: str, manifest: Manifest) -> None:
        """Download from data source."""
        pass

    @classmethod
    @abc.abstractmethod
    def num_requests_per_key(cls, dataset: str) -> int:
        """Specifies the number of workers to be used per api key for the dataset."""
        pass

    @property
    @abc.abstractmethod
    def license_url(self):
        """Specifies the License URL."""
        pass


class SplitCDSRequest():
    """Extended CDS class that separates fetch and download stage."""
    def __init__(self, *args, **kwargs):
        self.__cds_client = cds_api.Client(*args, **kwargs)

    @retry_with_exponential_backoff
    def _download(self, url, path: str, size: int) -> None:
        self.__cds_client.info("Downloading %s to %s (%s)", url, path, cds_api.bytes_to_string(size))
        start = time.time()

        download_with_aria2(url, path)

        elapsed = time.time() - start
        if elapsed:
            self.__cds_client.info("Download rate %s/s", cds_api.bytes_to_string(size / elapsed))

    def fetch(self, request: t.Dict, dataset: str) -> t.Dict:
        result = self.__cds_client.retrieve(dataset, request)
        return {'href': result.location, 'size': result.content_length}

    def download(self, result: cds_api.Result, target: t.Optional[str] = None) -> None:
        if target:
            if os.path.exists(target):
                # Empty the target file, if it already exists, otherwise the
                # transfer below might be fooled into thinking we're resuming
                # an interrupted download.
                open(target, "w").close()

            self._download(result["href"], target, result["size"])


class CdsClient(Client):
    """A client to access weather data from the Cloud Data Store (CDS).

    Datasets on CDS can be found at:
      https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset

    The parameters section of the input `config` requires two values: `api_url` and
    `api_key`. Or, these values can be set as the environment variables: `CDSAPI_URL`
    and `CDSAPI_KEY`. These can be acquired from the following URL, which requires
    creating a free account: https://cds.climate.copernicus.eu/api-how-to

    The CDS global queues for data access has dynamic rate limits. These can be viewed
    live here: https://cds.climate.copernicus.eu/live/limits.

    Attributes:
        config: A config that contains pipeline parameters, such as API keys.
        level: Default log level for the client.
    """

    """Name patterns of datasets that are hosted internally on CDS servers."""
    cds_hosted_datasets = {'reanalysis-era'}

    def retrieve(self, dataset: str, selection: t.Dict, output: str, manifest: Manifest) -> None:
        c = CDSClientExtended(
            url=self.config.kwargs.get('api_url', os.environ.get('CDSAPI_URL')),
            key=self.config.kwargs.get('api_key', os.environ.get('CDSAPI_KEY')),
            debug_callback=self.logger.debug,
            info_callback=self.logger.info,
            warning_callback=self.logger.warning,
            error_callback=self.logger.error,
        )
        selection_ = optimize_selection_partition(selection)
        with StdoutLogger(self.logger, level=logging.DEBUG):
            manifest.set_stage(Stage.FETCH)
            precise_fetch_start_time = (
                datetime.datetime.utcnow()
                .replace(tzinfo=datetime.timezone.utc)
                .isoformat(timespec='seconds')
            )
            manifest.prev_stage_precise_start_time = precise_fetch_start_time
            result = c.fetch(selection_, dataset)
            manifest.set_stage(Stage.DOWNLOAD)
            precise_download_start_time = (
                datetime.datetime.utcnow()
                .replace(tzinfo=datetime.timezone.utc)
                .isoformat(timespec='seconds')
            )
            manifest.prev_stage_precise_start_time = precise_download_start_time
            c.download(result, target=output)

    @property
    def license_url(self):
        return 'https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf'

    @classmethod
    def num_requests_per_key(cls, dataset: str) -> int:
        """Number of requests per key from the CDS API.

        CDS has dynamic, data-specific limits, defined here:
          https://cds.climate.copernicus.eu/live/limits

        Typically, the reanalysis dataset allows for 3-5 simultaneous requets.
        For all standard CDS data (backed on disk drives), it's common that 2
        requests are allowed, though this is dynamically set, too.

        If the Beam pipeline encounters a user request limit error, please cancel
        all outstanding requests (per each user account) at the following link:
        https://cds.climate.copernicus.eu/cdsapp#!/yourrequests
        """
        # TODO(#15): Parse live CDS limits API to set data-specific limits.
        for internal_set in cls.cds_hosted_datasets:
            if dataset.startswith(internal_set):
                return 5
        return 2


class StdoutLogger(io.StringIO):
    """Special logger to redirect stdout to logs."""

    def __init__(self, logger_: logging.Logger, level: int = logging.INFO):
        super().__init__()
        self.logger = logger_
        self.level = level
        self._redirector = contextlib.redirect_stdout(self)

    def log(self, msg) -> None:
        self.logger.log(self.level, msg)

    def write(self, msg):
        if msg and not msg.isspace():
            self.logger.log(self.level, msg)

    def __enter__(self):
        self._redirector.__enter__()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        # let contextlib do any exception handling here
        self._redirector.__exit__(exc_type, exc_value, traceback)


class SplitMARSRequest(api.APIRequest):
    """Extended MARS APIRequest class that separates fetch and download stage."""
    @retry_with_exponential_backoff
    def _download(self, url, path: str, size: int) -> None:
        self.log(
            "Transferring %s into %s" % (self._bytename(size), path)
        )
        self.log("From %s" % (url,))

        download_with_aria2(url, path)

    def fetch(self, request: t.Dict, dataset: str) -> t.Dict:
        status = None

        self.connection.submit("%s/%s/requests" % (self.url, self.service), request)
        self.log("Request submitted")
        self.log("Request id: " + self.connection.last.get("name"))
        if self.connection.status != status:
            status = self.connection.status
            self.log("Request is %s" % (status,))

        while not self.connection.ready():
            if self.connection.status != status:
                status = self.connection.status
                self.log("Request is %s" % (status,))
            self.connection.wait()

        if self.connection.status != status:
            status = self.connection.status
            self.log("Request is %s" % (status,))

        result = self.connection.result()
        return result

    def download(self, result: t.Dict, target: t.Optional[str] = None) -> None:
        if target:
            if os.path.exists(target):
                # Empty the target file, if it already exists, otherwise the
                # transfer below might be fooled into thinking we're resuming
                # an interrupted download.
                open(target, "w").close()

            self._download(urljoin(self.url, result["href"]), target, result["size"])
        self.connection.cleanup()


class SplitRequestMixin:
    c = None

    def fetch(self, req: t.Dict, dataset: t.Optional[str] = None) -> t.Dict:
        return self.c.fetch(req, dataset)

    def download(self, res: t.Dict, target: str) -> None:
        self.c.download(res, target)


class CDSClientExtended(SplitRequestMixin):
    """Extended CDS Client class that separates fetch and download stage."""
    def __init__(self, *args, **kwargs):
        self.c = SplitCDSRequest(*args, **kwargs)


class MARSECMWFServiceExtended(api.ECMWFService, SplitRequestMixin):
    """Extended MARS ECMFService class that separates fetch and download stage."""
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.c = SplitMARSRequest(
            self.url,
            "services/%s" % (self.service,),
            email=self.email,
            key=self.key,
            log=self.log,
            verbose=self.verbose,
            quiet=self.quiet,
        )


class PublicECMWFServerExtended(api.ECMWFDataServer, SplitRequestMixin):
    def __init__(self, *args, dataset='', **kwargs):
        super().__init__(*args, **kwargs)
        self.c = SplitMARSRequest(
            self.url,
            "datasets/%s" % (dataset,),
            email=self.email,
            key=self.key,
            log=self.log,
            verbose=self.verbose,
        )


class MarsClient(Client):
    """A client to access data from the Meteorological Archival and Retrieval System (MARS).

    See https://www.ecmwf.int/en/forecasts/datasets for a summary of datasets available
    on MARS. Most notable, MARS provides access to ECMWF's Operational Archive
    https://www.ecmwf.int/en/forecasts/dataset/operational-archive.

    The client config must contain three parameters to autheticate access to the MARS archive:
    `api_key`, `api_url`, and `api_email`. These can also be configued by setting the
    commensurate environment variables: `MARSAPI_KEY`, `MARSAPI_URL`, and `MARSAPI_EMAIL`.
    These credentials can be looked up by after registering for an ECMWF account
    (https://apps.ecmwf.int/registration/) and visitng: https://api.ecmwf.int/v1/key/.

    MARS server activity can be observed at https://apps.ecmwf.int/mars-activity/.

    Attributes:
        config: A config that contains pipeline parameters, such as API keys.
        level: Default log level for the client.
    """
    def retrieve(self, dataset: str, selection: t.Dict, output: str, manifest: Manifest) -> None:
        c = MARSECMWFServiceExtended(
            "mars",
            key=self.config.kwargs.get('api_key', os.environ.get("MARSAPI_KEY")),
            url=self.config.kwargs.get('api_url', os.environ.get("MARSAPI_URL")),
            email=self.config.kwargs.get('api_email', os.environ.get("MARSAPI_EMAIL")),
            log=self.logger.debug,
            verbose=True,
        )
        selection_ = optimize_selection_partition(selection)
        with StdoutLogger(self.logger, level=logging.DEBUG):
            manifest.set_stage(Stage.FETCH)
            precise_fetch_start_time = (
                datetime.datetime.utcnow()
                .replace(tzinfo=datetime.timezone.utc)
                .isoformat(timespec='seconds')
            )
            manifest.prev_stage_precise_start_time = precise_fetch_start_time
            result = c.fetch(req=selection_)
            manifest.set_stage(Stage.DOWNLOAD)
            precise_download_start_time = (
                datetime.datetime.utcnow()
                .replace(tzinfo=datetime.timezone.utc)
                .isoformat(timespec='seconds')
            )
            manifest.prev_stage_precise_start_time = precise_download_start_time
            c.download(result, target=output)

    @property
    def license_url(self):
        return 'https://apps.ecmwf.int/datasets/licences/general/'

    @classmethod
    def num_requests_per_key(cls, dataset: str) -> int:
        """Number of requests per key (or user) for the Mars API.

        Mars allows 2 active requests per user and 20 queued requests per user, as of Sept 27, 2021.
        To ensure we never hit a rate limit error during download, we only make use of the active
        requests.
        See: https://confluence.ecmwf.int/display/UDOC/Total+number+of+requests+a+user+can+submit+-+Web+API+FAQ

        Queued requests can _only_ be canceled manually from a web dashboard. If the
        `ERROR 101 (USER_QUEUED_LIMIT_EXCEEDED)` error occurs in the Beam pipeline, then go to
        http://apps.ecmwf.int/webmars/joblist/ and cancel queued jobs.
        """
        return 2


class ECMWFPublicClient(Client):
    """A client for ECMWF's public datasets, like TIGGE."""
    def retrieve(self, dataset: str, selection: t.Dict, output: str, manifest: Manifest) -> None:
        c = PublicECMWFServerExtended(
            url=self.config.kwargs.get('api_url', os.environ.get("MARSAPI_URL")),
            key=self.config.kwargs.get('api_key', os.environ.get("MARSAPI_KEY")),
            email=self.config.kwargs.get('api_email', os.environ.get("MARSAPI_EMAIL")),
            log=self.logger.debug,
            verbose=True,
            dataset=dataset,
        )
        selection_ = optimize_selection_partition(selection)
        with StdoutLogger(self.logger, level=logging.DEBUG):
            manifest.set_stage(Stage.FETCH)
            precise_fetch_start_time = (
                datetime.datetime.utcnow()
                .replace(tzinfo=datetime.timezone.utc)
                .isoformat(timespec='seconds')
            )
            manifest.prev_stage_precise_start_time = precise_fetch_start_time
            result = c.fetch(req=selection_)
            manifest.set_stage(Stage.DOWNLOAD)
            precise_download_start_time = (
                datetime.datetime.utcnow()
                .replace(tzinfo=datetime.timezone.utc)
                .isoformat(timespec='seconds')
            )
            manifest.prev_stage_precise_start_time = precise_download_start_time
            c.download(result, target=output)

    @classmethod
    def num_requests_per_key(cls, dataset: str) -> int:
        # Experimentally validated request limit.
        return 5

    @property
    def license_url(self):
        if not self.config.dataset:
            raise ValueError('must specify a dataset for this client!')
        return f'https://apps.ecmwf.int/datasets/data/{self.config.dataset.lower()}/licence/'


class FakeClient(Client):
    """A client that writes the selection arguments to the output file."""

    def retrieve(self, dataset: str, selection: t.Dict, output: str, manifest: Manifest) -> None:
        manifest.set_stage(Stage.RETRIEVE)
        precise_retrieve_start_time = (
            datetime.datetime.utcnow()
            .replace(tzinfo=datetime.timezone.utc)
            .isoformat(timespec='seconds')
        )
        manifest.prev_stage_precise_start_time = precise_retrieve_start_time
        self.logger.debug(f'Downloading {dataset} to {output}')
        with open(output, 'w') as f:
            json.dump({dataset: selection}, f)

    @property
    def license_url(self):
        return 'lorem ipsum'

    @classmethod
    def num_requests_per_key(cls, dataset: str) -> int:
        return 1


CLIENTS = collections.OrderedDict(
    cds=CdsClient,
    mars=MarsClient,
    ecpublic=ECMWFPublicClient,
    fake=FakeClient,
)


================================================
FILE: weather_dl/download_pipeline/clients_test.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

from .clients import FakeClient, CdsClient, MarsClient


class MaxWorkersTest(unittest.TestCase):
    def test_cdsclient_internal(self):
        self.assertEqual(CdsClient.num_requests_per_key("reanalysis-era5-some-data"), 5)

    def test_cdsclient_mars_hosted(self):
        self.assertEqual(CdsClient.num_requests_per_key("reanalysis-carra-height-levels"), 2)

    def test_marsclient(self):
        self.assertEqual(MarsClient.num_requests_per_key("reanalysis-era5-some-data"), 2)

    def test_fakeclient(self):
        self.assertEqual(FakeClient.num_requests_per_key("reanalysis-era5-some-data"), 1)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: weather_dl/download_pipeline/config.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import calendar
import copy
import dataclasses
import itertools
import typing as t

Values = t.Union[t.List['Values'], t.Dict[str, 'Values'], bool, int, float, str]  # pytype: disable=not-supported-yet


@dataclasses.dataclass
class Config:
    """Contains pipeline parameters.

    Attributes:
        config_name:
            Name of the config file.
        client:
            Name of the Weather-API-client. Supported clients are mentioned in the 'CLIENTS' variable.
        dataset (optional):
            Name of the target dataset. Allowed options are dictated by the client.
        partition_keys (optional):
            Choose the keys from the selection section to partition the data request.
            This will compute a cartesian cross product of the selected keys
            and assign each as their own download.
        target_path:
            Download artifact filename template. Can make use of Python's standard string formatting.
            It can contain format symbols to be replaced by partition keys;
            if this is used, the total number of format symbols must match the number of partition keys.
        subsection_name:
            Name of the particular subsection. 'default' if there is no subsection.
        force_download:
            Force redownload of partitions that were previously downloaded.
        user_id:
            Username from the environment variables.
        kwargs (optional):
            For representing subsections or any other parameters.
        selection:
            Contains parameters used to select desired data.
    """

    config_name: str = ""
    client: str = ""
    dataset: t.Optional[str] = ""
    target_path: str = ""
    partition_keys: t.Optional[t.List[str]] = dataclasses.field(default_factory=list)
    subsection_name: str = "default"
    force_download: bool = False
    user_id: str = "unknown"
    kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict)
    selection: t.Dict[str, Values] = dataclasses.field(default_factory=dict)

    @classmethod
    def from_dict(cls, config: t.Dict) -> 'Config':
        config_instance = cls()
        for section_key, section_value in config.items():
            if section_key == "parameters":
                for key, value in section_value.items():
                    if hasattr(config_instance, key):
                        setattr(config_instance, key, value)
                    else:
                        config_instance.kwargs[key] = value
            if section_key == "selection":
                config_instance.selection = section_value
        return config_instance


def optimize_selection_partition(selection: t.Dict) -> t.Dict:
    """Compute right-hand-side values for the selection section of a single partition.

    Used to support custom syntax and optimizations, such as 'all'.
    """
    selection_ = copy.deepcopy(selection)

    if 'date_range' in selection_.keys():
        selection_['date'] = selection_['date_range'][0]
        del selection_['date_range']

    if 'day' in selection_.keys() and selection_['day'] == 'all':
        years, months = selection_['year'], selection_['month']

        multiples_error = "When using day='all' in selection, '/' is not allowed in {type}."

        if isinstance(years, str):
            years = [years]

        if isinstance(months, str):
            months = [months]

        date_ranges = []

        # Generating dates for every year-month.
        for year, month in itertools.product(years, months):

            if isinstance(year, str):
                assert '/' not in year, multiples_error.format(type='year')

            if isinstance(month, str):
                assert '/' not in month, multiples_error.format(type='month')

            year, month = int(year), int(month)

            _, n_days_in_month = calendar.monthrange(year, month)

            date_range = [f'{year:04d}-{month:02d}-{day:02d}' for day in range(1, n_days_in_month + 1)]

            date_ranges.extend(date_range)

        selection_['date'] = date_ranges
        del selection_['day']
        del selection_['month']
        del selection_['year']

    return selection_


================================================
FILE: weather_dl/download_pipeline/config_test.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import calendar
import itertools
import os
import unittest

import weather_dl
from .config import optimize_selection_partition
from .pipeline import run


class ConfigTest(unittest.TestCase):

    def setUp(self):
        self.data_dir = f'{next(iter(weather_dl.__path__))}/../configs'

    def test_process_config_files(self):
        for filename in os.listdir(self.data_dir):
            if filename.startswith('.'):
                continue
            # only files, no directories
            if os.path.isdir(os.path.join(self.data_dir, filename)):
                continue
            with self.subTest(filename=filename):
                config = os.path.join(self.data_dir, filename)
                try:
                    run(["weather-dl", config, "--dry-run"])
                except:  # noqa: E722
                    self.fail(f'Config {filename!r} is incorrect.')

    def test_process_multi_config_files(self):
        for filename in os.listdir(self.data_dir):
            if filename.startswith('.'):
                continue
            # only directories, no files
            if not os.path.isdir(os.path.join(self.data_dir, filename)):
                continue
            with self.subTest(dirname=filename):
                configs_dir = os.path.join(self.data_dir, filename)
                configs = [os.path.join(configs_dir, c) for c in os.listdir(configs_dir)]
                try:
                    run(["weather-dl"] + configs + ["--dry-run"])
                except:  # noqa: E722
                    self.fail(f'Configs {filename!r} is incorrect.')


if __name__ == '__main__':
    unittest.main()


class SelectionSyntaxTest(unittest.TestCase):

    def test_all_days__invalid_year(self):
        selection_with_multiple_years = {'year': '2020/2021', 'month': '2', 'day': 'all'}
        with self.assertRaisesRegex(AssertionError, "When using day='all' in selection, '/' is not allowed in year."):
            optimize_selection_partition(selection_with_multiple_years)

        selection_with_multiple_years = {'year': ['2020/2021'], 'month': '2', 'day': 'all'}
        with self.assertRaisesRegex(AssertionError, "When using day='all' in selection, '/' is not allowed in year."):
            optimize_selection_partition(selection_with_multiple_years)

    def test_all_days__invalid_month(self):
        selection_with_multiple_years = {'year': '2020', 'month': '1/2/3', 'day': 'all'}
        with self.assertRaisesRegex(AssertionError, "When using day='all' in selection, '/' is not allowed in month."):
            optimize_selection_partition(selection_with_multiple_years)

        selection_with_multiple_years = {'year': '2020', 'month': ['1/2/3'], 'day': 'all'}
        with self.assertRaisesRegex(AssertionError, "When using day='all' in selection, '/' is not allowed in month."):
            optimize_selection_partition(selection_with_multiple_years)

    def test_date_range(self):
        selection_with_date_range = {'date_range': ['2017-01-01/to/2017-01-10']}
        actual = optimize_selection_partition(selection_with_date_range)
        self.assertEqual(actual['date'], selection_with_date_range['date_range'][0])

    def test_with_year_month_as_string(self):
        selection_with_multiple_months = {'year': '2017', 'month': '12', 'day':'all'}
        actual = optimize_selection_partition(selection_with_multiple_months)

        expected = []
        _, n_days_in_month = calendar.monthrange(2017, 12)
        expected = [f'2017-12-{day:02d}' for day in range(1, n_days_in_month + 1)]

        self.assertEqual(actual['date'], expected)
        self.assertNotIn('day', actual)
        self.assertNotIn('month', actual)
        self.assertNotIn('year', actual)

    def test_with_multiple_months(self):
        selection_with_multiple_months = {'year': ['2017'], 'month': ['2', '4', '6', '8'], 'day':'all'}
        actual = optimize_selection_partition(selection_with_multiple_months)

        expected = []
        for y, m in itertools.product(selection_with_multiple_months['year'], selection_with_multiple_months['month']):
            y, m = int(y), int(m)
            _, n_days_in_month = calendar.monthrange(y, m)
            date_range = [f'{y:04d}-{m:02d}-{day:02d}' for day in range(1, n_days_in_month + 1)]
            expected.extend(date_range)

        self.assertEqual(actual['date'], expected)
        self.assertNotIn('day', actual)
        self.assertNotIn('month', actual)
        self.assertNotIn('year', actual)

    def test_with_multiple_years(self):
        selection_with_multiple_years = {'year': ['2017', '2018'], 'month': ['2', '4', '6', '8'], 'day':'all'}
        actual = optimize_selection_partition(selection_with_multiple_years)

        expected = []
        for y, m in itertools.product(selection_with_multiple_years['year'], selection_with_multiple_years['month']):
            y, m = int(y), int(m)
            _, n_days_in_month = calendar.monthrange(y, m)
            date_range = [f'{y:04d}-{m:02d}-{day:02d}' for day in range(1, n_days_in_month + 1)]
            expected.extend(date_range)

        self.assertEqual(actual['date'], expected)
        self.assertNotIn('day', actual)
        self.assertNotIn('month', actual)
        self.assertNotIn('year', actual)


================================================
FILE: weather_dl/download_pipeline/fetcher.py
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import dataclasses
import datetime
import logging
import tempfile
import typing as t

import apache_beam as beam

from .clients import CLIENTS, Client
from .config import Config
from .manifest import Manifest, NoOpManifest, Location, Stage
from .parsers import prepare_target_name
from .partition import skip_partition
from .stores import Store, FSStore
from .util import copy, retry_with_exponential_backoff

logger = logging.getLogger(__name__)


@dataclasses.dataclass
class Fetcher(beam.DoFn):
    """Executes client download requests.

    Given a sequence of configs (keyed by subsection parameters and number of allowed
    requests), this will execute retrievals via each client. The keyed strucutre, the
    result of a `beam.GroupBy` operation, will ensure that all licenses and requests
    are utilized without any conflict.

    Attributes:
        client_name: The name of the download client to construct per each request.
        manifest: A manifest to keep track of the status of requests
        store: To manage where downloads are persisted.
    """

    client_name: str
    manifest: Manifest = NoOpManifest(Location('noop://in-memory'))
    store: t.Optional[Store] = None
    log_level: t.Optional[int] = logging.INFO

    def __post_init__(self):
        if self.store is None:
            self.store = FSStore()

    @retry_with_exponential_backoff
    def retrieve(self, client: Client, dataset: str, selection: t.Dict, dest: str) -> None:
        """Retrieve from download client, with retries."""
        client.retrieve(dataset, selection, dest, self.manifest)

    def fetch_data(self, config: Config, *, worker_name: str = 'default') -> None:
        """Download data from a client to a temp file, then upload to Cloud Storage."""
        if not config:
            return

        if skip_partition(config, self.store, self.manifest):
            return

        client = CLIENTS[self.client_name](config, self.log_level)
        target = prepare_target_name(config)

        with tempfile.NamedTemporaryFile() as temp:
            logger.info(f'[{worker_name}] Fetching data for {target!r}.')
            with self.manifest.transact(config.config_name, config.dataset, config.selection, target, config.user_id):
                self.retrieve(client, config.dataset, config.selection, temp.name)

                self.manifest.set_stage(Stage.UPLOAD)
                precise_upload_start_time = (
                    datetime.datetime.utcnow()
                    .replace(tzinfo=datetime.timezone.utc)
                    .isoformat(timespec='seconds')
                )
                self.manifest.prev_stage_precise_start_time = precise_upload_start_time
                logger.info(f'[{worker_name}] Uploading to store for {target!r}.')

                # In dry-run mode we actually aren't required to upload a file.
                if not self.client_name == "fake":
                    copy(temp.name, target)

                logger.info(f'[{worker_name}] Upload to store complete for {target!r}.')

    def process(self, element) -> None:
        # element: Tuple[Tuple[str, int], Iterator[Config]]
        """Execute download requests one-by-one."""
        (subsection, request_idx), partitions = element
        worker_name = f'{subsection}.{request_idx}'

        logger.info(f'[{worker_name}] Starting requests...')
        logger.debug(f'[{worker_name}] Partitions: {partitions!r}.')

        for partition in partitions:
            beam.metrics.Metrics.counter('Fetcher', subsection).inc()
            self.fetch_data(partition, worker_name=worker_name)


================================================
FILE: weather_dl/download_pipeline/fetcher_test.py
================================================
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import io
import json
import os
import tempfile
import unittest
from unittest.mock import patch

from .config import Config
from .fetcher import Fetcher
from .manifest import MockManifest, Location
from .stores import InMemoryStore


class FetchDataTest(unittest.TestCase):

    def setUp(self) -> None:
        self.dummy_manifest = MockManifest(Location('dummy-manifest'))

    @patch('weather_dl.download_pipeline.clients.CDSClientExtended.download')
    @patch('weather_dl.download_pipeline.clients.CDSClientExtended.fetch')
    def test_fetch_data(self, mock_fetch, mock_download):
        with tempfile.TemporaryDirectory() as tmpdir:
            config = Config.from_dict({
                'parameters': {
                    'dataset': 'reanalysis-era5-pressure-levels',
                    'partition_keys': ['year', 'month'],
                    'target_path': os.path.join(tmpdir, 'download-{:02d}-{:02d}.nc'),
                    'api_url': 'https//api-url.com/v1/',
                    'api_key': '12345',
                },
                'selection': {
                    'features': ['pressure'],
                    'month': ['12'],
                    'year': ['01']
                }
            })

            fetcher = Fetcher('cds', self.dummy_manifest, InMemoryStore())
            fetcher.fetch_data(config)

            self.assertTrue(os.path.exists(os.path.join(tmpdir, 'download-01-12.nc')))

            mock_fetch.assert_called_with(
                config.selection,
                'reanalysis-era5-pressure-levels',
                )

    @patch('weather_dl.download_pipeline.clients.CDSClientExtended.download')
    @patch('weather_dl.download_pipeline.clients.CDSClientExtended.fetch')
    def test_fetch_data__manifest__returns_success(self, mock_fetch, mock_download):
        with tempfile.TemporaryDirectory() as tmpdir:
            config = Config.from_dict({
                'parameters': {
                    'dataset': 'reanalysis-era5-pressure-levels',
                    'partition_keys': ['year', 'month'],
                    'target_path': os.path.join(tmpdir, 'download-{:02d}-{:02d}.nc'),
                    'api_url': 'https//api-url.com/v1/',
                    'api_key': '12345',
                },
                'selection': {
                    'features': ['pressure'],
                    'month': ['12'],
                    'year': ['01']
                }
            })

            fetcher = Fetcher('cds', self.dummy_manifest, InMemoryStore())
            fetcher.fetch_data(config)

            self.assertDictContainsSubset(dict(
                selection=json.dumps(config.selection),
                location=os.path.join(tmpdir, 'download-01-12.nc'),
                stage='upload',
                status='success',
                error=None,
                username='unknown',
            ), list(self.dummy_manifest.records.values())[0])

    @patch('weather_dl.download_pipeline.clients.CDSClientExtended.fetch')
    def test_fetch_data__manifest__records_retrieve_failure(self, mock_fetch):
        with tempfile.TemporaryDirectory() as tmpdir:
            config = Config.from_dict({
                'parameters': {
                    'dataset': 'reanalysis-era5-pressure-levels',
                    'partition_keys': ['year', 'month'],
                    'target_path': os.path.join(tmpdir, 'download-{:02d}-{:02d}.nc'),
                    'api_url': 'https//api-url.com/v1/',
                    'api_key': '12345',
                },
                'selection': {
                    'features': ['pressure'],
                    'month': ['12'],
                    'year': ['01']
                }
            })

            error = IOError("We don't have enough permissions to download this.")
            mock_fetch.side_effect = error

            with self.assertRaises(IOError) as e:
                fetcher = Fetcher('cds', self.dummy_manifest, InMemoryStore())
                fetcher.fetch_data(config)

            actual = list(self.dummy_manifest.records.values())[0]

            self.assertDictContainsSubset(dict(
                selection=json.dumps(config.selection),
                location=os.path.join(tmpdir, 'download-01-12.nc'),
                stage='fetch',
                status='failure',
                username='unknown',
            ), actual)

            self.assertIn(error.args[0], actual['error'])
            self.assertIn(error.args[0], e.exception.args[0])

    @patch('weather_dl.download_pipeline.clients.CDSClientExtended.fetch')
    def test_fetch_data__manifest__records_gcs_failure(self, mock_fetch):
        with tempfile.TemporaryDirectory() as tmpdir:
            config = Config.from_dict({
                'parameters': {
                    'dataset': 'reanalysis-era5-pressure-levels',
                    'partition_keys': ['year', 'month'],
                    'target_path': os.path.join(tmpdir, 'download-{:02d}-{:02d}.nc'),
                    'api_url': 'https//api-url.com/v1/',
                    'api_key': '12345',
                },
                'selection': {
                    'features': ['pressure'],
                    'month': ['12'],
                    'year': ['01']
                }
            })

            error = IOError("Can't open gcs file.")
            mock_fetch.side_effect = error

            with self.assertRaises(IOError) as e:
                fetcher = Fetcher('cds', self.dummy_manifest, InMemoryStore())
                fetcher.fetch_data(config)

            actual = list(self.dummy_manifest.records.values())[0]

            self.assertDictContainsSubset(dict(
                selection=json.dumps(config.selection),
                location=os.path.join(tmpdir, 'download-01-12.nc'),
                stage='fetch',
                status='failure',
                username='unknown',
            ), actual)

            self.assertIn(error.args[0], actual['error'])
            self.assertIn(error.args[0], e.exception.args[0])

    @patch('weather_dl.download_pipeline.stores.InMemoryStore.open', return_value=io.StringIO())
    @patch('weather_dl.download_pipeline.clients.CDSClientExtended.fetch')
    def test_fetch_data__skips_existing_download(self, mock_fetch, mock_gcs_file):
        with tempfile.TemporaryDirectory() as tmpdir:
            config = Config.from_dict({
                'parameters': {
                    'dataset': 'reanalysis-era5-pressure-levels',
                    'partition_keys': ['year', 'month'],
                    'target_path': os.path.join(tmpdir, 'download-{:02d}-{:02d}.nc'),
                    'api_url': 'https//api-url.com/v1/',
                    'api_key': '12345',
                },
                'selection': {
                    'features': ['pressure'],
                    'month': ['12'],
                    'year': ['01']
                }
            })

            # target file already exists in store...
            store = InMemoryStore()
            store.store[os.path.join(tmpdir, 'download-01-12.nc')] = ''

            fetcher = Fetcher('cds', self.dummy_manifest, store)
            fetcher.fetch_data(config)

            self.assertFalse(mock_gcs_file.called)
            self.assertFalse(mock_fetch.called)


================================================
FILE: weather_dl/download_pipeline/manifest.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Client interface for connecting to a manifest."""

import abc
import collections
import dataclasses
import datetime
import enum
import json
import logging
import os
import pandas as pd
import threading
import time
import traceback
import typing as t
from urllib.parse import urlparse, parse_qsl

from .util import (
    to_json_serializable_type,
    fetch_geo_polygon,
    get_file_size,
    get_wait_interval,
    generate_md5_hash,
    retry_with_exponential_backoff,
    GLOBAL_COVERAGE_AREA
)

import firebase_admin
from firebase_admin import firestore
from google.cloud import bigquery
from google.cloud.firestore_v1 import DocumentReference
from google.cloud.firestore_v1.types import WriteResult

"""An implementation-dependent Manifest URI."""
Location = t.NewType('Location', str)

logger = logging.getLogger(__name__)


class ManifestException(Exception):
    """Errors that occur in Manifest Clients."""
    pass


class Stage(enum.Enum):
    """A request can be either in one of the following stages at a time:

    fetch : This represents request is currently in fetch stage i.e. request placed on the client's server
        & waiting for some result before starting download (eg. MARS client).
    download : This represents request is currently in download stage i.e. data is being downloading from client's
        server to the worker's local file system.
    upload : This represents request is currently in upload stage i.e. data is getting uploaded from worker's local
        file system to target location (GCS path).
    retrieve : In case of clients where there is no proper separation of fetch & download stages (eg. CDS client),
        request will be in the retrieve stage i.e. fetch + download.
    """
    RETRIEVE = 'retrieve'
    FETCH = 'fetch'
    DOWNLOAD = 'download'
    UPLOAD = 'upload'


class Status(enum.Enum):
    """Depicts the request's state status:

    scheduled : A request partition is created & scheduled for processing.
        Note: Its corresponding state can be None only.
    in-progress : This represents the request state is currently in-progress (i.e. running).
        The next status would be "success" or "failure".
    success : This represents the request state execution completed successfully without any error.
    failure : This represents the request state execution failed.
    """
    SCHEDULED = 'scheduled'
    IN_PROGRESS = 'in-progress'
    SUCCESS = 'success'
    FAILURE = 'failure'


@dataclasses.dataclass
class DownloadStatus():
    """Data recorded in `Manifest`s reflecting the status of a download."""

    """The name of the config file associated with the request."""
    config_name: str = ""

    """Represents the dataset field of the configuration."""
    dataset: t.Optional[str] = ""

    """Copy of selection section of the configuration."""
    selection: t.Dict = dataclasses.field(default_factory=dict)

    """Location of the downloaded data."""
    location: str = ""

    """Represents area covered by the shard."""
    area: str = ""

    """Current stage of request : 'fetch', 'download', 'retrieve', 'upload' or None."""
    stage: t.Optional[Stage] = None

    """Download status: 'scheduled', 'in-progress', 'success', or 'failure'."""
    status: t.Optional[Status] = None

    """Cause of error, if any."""
    error: t.Optional[str] = ""

    """Identifier for the user running the download."""
    username: str = ""

    """Shard size in GB."""
    size: t.Optional[float] = 0

    """A UTC datetime when download was scheduled."""
    scheduled_time: t.Optional[str] = ""

    """A UTC datetime when the retrieve stage starts."""
    retrieve_start_time: t.Optional[str] = ""

    """A UTC datetime when the retrieve state ends."""
    retrieve_end_time: t.Optional[str] = ""

    """A UTC datetime when the fetch state starts."""
    fetch_start_time: t.Optional[str] = ""

    """A UTC datetime when the fetch state ends."""
    fetch_end_time: t.Optional[str] = ""

    """A UTC datetime when the download state starts."""
    download_start_time: t.Optional[str] = ""

    """A UTC datetime when the download state ends."""
    download_end_time: t.Optional[str] = ""

    """A UTC datetime when the upload state starts."""
    upload_start_time: t.Optional[str] = ""

    """A UTC datetime when the upload state ends."""
    upload_end_time: t.Optional[str] = ""

    @classmethod
    def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus':
        """Instantiate DownloadStatus dataclass from dict."""
        download_status_instance = cls()
        for key, value in download_status.items():
            if key == 'status':
                setattr(download_status_instance, key, Status(value))
            elif key == 'stage' and value is not None:
                setattr(download_status_instance, key, Stage(value))
            else:
                setattr(download_status_instance, key, value)
        return download_status_instance

    @classmethod
    def to_dict(cls, instance) -> t.Dict:
        """Return the fields of a dataclass instance as a manifest ingestible
        dictionary mapping of field names to field values."""
        download_status_dict = {}
        for field in dataclasses.fields(instance):
            key = field.name
            value = getattr(instance, field.name)
            if isinstance(value, Status) or isinstance(value, Stage):
                download_status_dict[key] = value.value
            elif isinstance(value, pd.Timestamp):
                download_status_dict[key] = value.isoformat()
            elif key == 'selection' and value is not None:
                download_status_dict[key] = json.dumps(value)
            else:
                download_status_dict[key] = value
        return download_status_dict


@dataclasses.dataclass
class Manifest(abc.ABC):
    """Abstract manifest of download statuses.

    Update download statuses to some storage medium.

    This class lets one indicate that a download is `scheduled` or in a transaction process.
    In the event of a transaction, a download will be updated with an `in-progress`, `success`
    or `failure` status (with accompanying metadata).

    Example:
        ```
        my_manifest = parse_manifest_location(Location('fs://some-firestore-collection'))

        # Schedule data for download
        my_manifest.schedule({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username')

        # ...

        # Initiate a transaction – it will record that the download is `in-progess`
        with my_manifest.transact({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username') as tx:
            # download logic here
            pass

            # ...

            # on error, will record the download as a `failure` before propagating the error.  By default, it will
            # record download as a `success`.
        ```

    Attributes:
        location: An implementation-specific manifest URI.
        status: The current `DownloadStatus` of the Manifest.
    """

    location: Location
    # To reduce the impact of _read() and _update() calls
    # on the start time of the stage.
    prev_stage_precise_start_time: t.Optional[str] = None
    status: t.Optional[DownloadStatus] = None

    # This is overridden in subclass.
    def __post_init__(self):
        """Initialize the manifest."""
        pass

    def schedule(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None:
        """Indicate that a job has been scheduled for download.

        'scheduled' jobs occur before 'in-progress', 'success' or 'finished'.
        """
        scheduled_time = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat(timespec='seconds')
        self.status = DownloadStatus(
                config_name=config_name,
                dataset=dataset if dataset else None,
                selection=selection,
                location=location,
                area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)),
                username=user,
                stage=None,
                status=Status.SCHEDULED,
                error=None,
                size=None,
                scheduled_time=scheduled_time,
                retrieve_start_time=None,
                retrieve_end_time=None,
                fetch_start_time=None,
                fetch_end_time=None,
                download_start_time=None,
                download_end_time=None,
                upload_start_time=None,
                upload_end_time=None,
            )
        self._update(self.status)

    def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None:
        """Updates the manifest to mark the shards that were skipped in the current job
        as 'upload' stage and 'success' status, indicating that they have already been downloaded.
        """
        old_status = self._read(location)
        # The manifest needs to be updated for a skipped shard if its entry is not present, or
        # if the stage is not 'upload', or if the stage is 'upload' but the status is not 'success'.
        if old_status.location != location or old_status.stage != Stage.UPLOAD or old_status.status != Status.SUCCESS:
            current_utc_time = (
                datetime.datetime.utcnow()
                .replace(tzinfo=datetime.timezone.utc)
                .isoformat(timespec='seconds')
            )

            size = get_file_size(location)

            status = DownloadStatus(
                    config_name=config_name,
                    dataset=dataset if dataset else None,
                    selection=selection,
                    location=location,
                    area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)),
                    username=user,
                    stage=Stage.UPLOAD,
                    status=Status.SUCCESS,
                    error=None,
                    size=size,
                    scheduled_time=None,
                    retrieve_start_time=None,
                    retrieve_end_time=None,
                    fetch_start_time=None,
                    fetch_end_time=None,
                    download_start_time=None,
                    download_end_time=None,
                    upload_start_time=current_utc_time,
                    upload_end_time=current_utc_time,
                )
            self._update(status)
            logger.info(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.')

    def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None:
        """Reset Manifest state in preparation for a new transaction."""
        self.status = dataclasses.replace(self._read(location))
        self.status.config_name = config_name
        self.status.dataset = dataset if dataset else None
        self.status.selection = selection
        self.status.location = location
        self.status.username = user

    def __enter__(self) -> None:
        pass

    def __exit__(self, exc_type, exc_inst, exc_tb) -> None:
        """Record end status of a transaction as either 'success' or 'failure'."""
        if exc_type is None:
            status = Status.SUCCESS
            error = None
        else:
            status = Status.FAILURE
            # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception
            error = '\n'.join(traceback.format_exception(exc_type, exc_inst, exc_tb))

        new_status = dataclasses.replace(self.status)
        new_status.error = error
        new_status.status = status
        current_utc_time = (
            datetime.datetime.utcnow()
            .replace(tzinfo=datetime.timezone.utc)
            .isoformat(timespec='seconds')
        )

        # This is necessary for setting the precise start time of the previous stage
        # and end time of the final stage, as well as handling the case of Status.FAILURE.
        if new_status.stage == Stage.FETCH:
            new_status.fetch_start_time = self.prev_stage_precise_start_time
            new_status.fetch_end_time = current_utc_time
        elif new_status.stage == Stage.RETRIEVE:
            new_status.retrieve_start_time = self.prev_stage_precise_start_time
            new_status.retrieve_end_time = current_utc_time
        elif new_status.stage == Stage.DOWNLOAD:
            new_status.download_start_time = self.prev_stage_precise_start_time
            new_status.download_end_time = current_utc_time
        else:
            new_status.upload_start_time = self.prev_stage_precise_start_time
            new_status.upload_end_time = current_utc_time

        new_status.size = get_file_size(new_status.location)

        self.status = new_status

        self._update(self.status)

    def transact(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> 'Manifest':
        """Create a download transaction."""
        self._set_for_transaction(config_name, dataset, selection, location, user)
        return self

    def set_stage(self, stage: Stage) -> None:
        """Sets the current stage in manifest."""
        prev_stage = self.status.stage
        new_status = dataclasses.replace(self.status)
        new_status.stage = stage
        new_status.status = Status.IN_PROGRESS
        current_utc_time = (
            datetime.datetime.utcnow()
            .replace(tzinfo=datetime.timezone.utc)
            .isoformat(timespec='seconds')
        )

        if stage == Stage.FETCH:
            new_status.fetch_start_time = current_utc_time
            new_status.fetch_end_time = None
            new_status.download_start_time = None
            new_status.download_end_time = None
        elif stage == Stage.RETRIEVE:
            new_status.retrieve_start_time = current_utc_time
        elif stage == Stage.DOWNLOAD:
            new_status.fetch_start_time = self.prev_stage_precise_start_time
            new_status.fetch_end_time = current_utc_time
            new_status.download_start_time = current_utc_time
        else:
            if prev_stage == Stage.DOWNLOAD:
                new_status.download_start_time = self.prev_stage_precise_start_time
                new_status.download_end_time = current_utc_time
            else:
                new_status.retrieve_start_time = self.prev_stage_precise_start_time
                new_status.retrieve_end_time = current_utc_time
            new_status.upload_start_time = current_utc_time

        self.status = new_status
        self._update(self.status)

    @abc.abstractmethod
    def _read(self, location: str) -> DownloadStatus:
        pass

    @abc.abstractmethod
    def _update(self, download_status: DownloadStatus) -> None:
        pass


class ConsoleManifest(Manifest):

    def __post_init__(self):
        self.name = urlparse(self.location).hostname

    def _read(self, location: str) -> DownloadStatus:
        return DownloadStatus()

    def _update(self, download_status: DownloadStatus) -> None:
        logger.info(f'[{self.name}] {DownloadStatus.to_dict(download_status)!r}')


class LocalManifest(Manifest):
    """Writes a JSON representation of the manifest to local file."""

    _lock = threading.Lock()

    def __init__(self, location: Location) -> None:
        super().__init__(Location(os.path.join(location, 'manifest.json')))
        if location and not os.path.exists(location):
            os.makedirs(location)

        # If the file is empty, it should start out as an empty JSON object.
        if not os.path.exists(self.location) or os.path.getsize(self.location) == 0:
            with open(self.location, 'w') as file:
                json.dump({}, file)

    def _read(self, location: str) -> DownloadStatus:
        """Reads the JSON data from a manifest."""
        assert os.path.exists(self.location), f'{self.location} must exist!'
        with LocalManifest._lock:
            with open(self.location, 'r') as file:
                manifest = json.load(file)
                return DownloadStatus.from_dict(manifest.get(location, {}))

    def _update(self, download_status: DownloadStatus) -> None:
        """Writes the JSON data to a manifest."""
        assert os.path.exists(self.location), f'{self.location} must exist!'
        with LocalManifest._lock:
            with open(self.location, 'r') as file:
                manifest = json.load(file)

            status = DownloadStatus.to_dict(download_status)
            manifest[status['location']] = status

            with open(self.location, 'w') as file:
                json.dump(manifest, file)
                logger.debug('Manifest written to.')
                logger.debug(download_status)


class BQManifest(Manifest):
    """Writes a JSON representation of the manifest to BQ file.

    This is an append-only implementation, the latest value in the manifest
    represents the current state of a download.
    """
    def __init__(self, location: Location) -> None:
        super().__init__(Location(location[5:]))
        TABLE_SCHEMA = [
            bigquery.SchemaField('config_name', 'STRING', mode='REQUIRED',
                                 description="The name of the config file associated with the request."),
            bigquery.SchemaField('dataset', 'STRING', mode='NULLABLE',
                                 description="Represents the dataset field of the configuration."),
            bigquery.SchemaField('selection', 'JSON', mode='REQUIRED',
                                 description="Copy of selection section of the configuration."),
            bigquery.SchemaField('location', 'STRING', mode='REQUIRED',
                                 description="Location of the downloaded data."),
            bigquery.SchemaField('area', 'STRING', mode='NULLABLE',
                                 description="Represents area covered by the shard. "
                                 "ST_GeogFromGeoJson(area): To convert a GeoJSON geometry object into a "
                                 "GEOGRAPHY value. "
                                 "ST_COVERS(geography_expression, ST_GEOGPOINT(longitude, latitude)): To check "
                                 "if a point lies in the given area or not."),
            bigquery.SchemaField('stage', 'STRING', mode='NULLABLE',
                                 description="Current stage of request : 'fetch', 'download', 'retrieve', 'upload' "
                                 "or None."),
            bigquery.SchemaField('status', 'STRING', mode='REQUIRED',
                                 description="Download status: 'scheduled', 'in-progress', 'success', or 'failure'."),
            bigquery.SchemaField('error', 'STRING', mode='NULLABLE',
                                 description="Cause of error, if any."),
            bigquery.SchemaField('username', 'STRING', mode='REQUIRED',
                                 description="Identifier for the user running the download."),
            bigquery.SchemaField('size', 'FLOAT', mode='NULLABLE',
                                 description="Shard size in GB."),
            bigquery.SchemaField('scheduled_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when download was scheduled."),
            bigquery.SchemaField('retrieve_start_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the retrieve stage starts."),
            bigquery.SchemaField('retrieve_end_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the retrieve state ends."),
            bigquery.SchemaField('fetch_start_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the fetch state starts."),
            bigquery.SchemaField('fetch_end_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the fetch state ends."),
            bigquery.SchemaField('download_start_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the download state starts."),
            bigquery.SchemaField('download_end_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the download state ends."),
            bigquery.SchemaField('upload_start_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the upload state starts."),
            bigquery.SchemaField('upload_end_time', 'TIMESTAMP', mode='NULLABLE',
                                 description="A UTC datetime when the upload state ends."),
        ]
        table = bigquery.Table(self.location, schema=TABLE_SCHEMA)
        with bigquery.Client() as client:
            client.create_table(table, exists_ok=True)

    def _read(self, location: str) -> DownloadStatus:
        """Reads the JSON data from a manifest."""
        with bigquery.Client() as client:
            select_statement = f"SELECT * FROM {self.location} WHERE location = @location"

            # Build the QueryJobConfig object with the parameters.
            job_config = bigquery.QueryJobConfig()
            job_config.query_parameters = [bigquery.ScalarQueryParameter('location', 'STRING', location)]

            # Execute the merge statement with the parameters.
            query_job = client.query(select_statement, job_config=job_config)

            # Wait for the query to execute.
            result = query_job.result()
            row = {}
            if result.total_rows > 0:
                records = result.to_dataframe().to_dict('records')
                row = {n: to_json_serializable_type(v) for n, v in records[0].items()}
            return DownloadStatus.from_dict(row)

    # Added retry here to handle the concurrency issue in BigQuery.
    # Eg: 400 Resources exceeded during query execution: Too many DML statements outstanding
    # against table <table-name>, limit is 20
    @retry_with_exponential_backoff
    def _update(self, download_status: DownloadStatus) -> None:
        """Writes the JSON data to a manifest."""
        with bigquery.Client() as client:
            status = DownloadStatus.to_dict(download_status)
            table = client.get_table(self.location)
            columns = [field.name for field in table.schema]
            parameter_type_mapping = {field.name: field.field_type for field in table.schema}

            update_dml = [f"{col} = @{col}" for col in columns]
            insert_dml = [f"@{col}" for col in columns]
            params = {col: status[col] for col in columns}

            # Build the merge statement as a string with parameter placeholders.
            merge_statement = f"""
                MERGE {self.location} T
                USING (
                SELECT
                    @location as location
                ) S
                ON T.location = S.location
                WHEN MATCHED THEN
                UPDATE SET
                    {', '.join(update_dml)}
                WHEN NOT MATCHED THEN
                INSERT
                    ({", ".join(columns)})
                VALUES
                    ({', '.join(insert_dml)})
            """

            logger.debug(merge_statement)

            # Build the QueryJobConfig object with the parameters.
            job_config = bigquery.QueryJobConfig()
            job_config.query_parameters = [bigquery.ScalarQueryParameter(col, parameter_type_mapping[col], value)
                                           for col, value in params.items()]

            # Execute the merge statement with the parameters.
            query_job = client.query(merge_statement, job_config=job_config)

            # Wait for the query to execute.
            query_job.result()

            logger.debug('Manifest written to.')
            logger.debug(download_status)


class FirestoreManifest(Manifest):
    """A Firestore Manifest.
    This Manifest implementation stores DownloadStatuses in a Firebase document store.
    The document hierarchy for the manifest is as follows:
      [manifest  <or manifest name, configurable from CLI>]
      ├── doc_id (md5 hash of the path) { 'selection': {...}, 'location': ..., 'username': ... }
      └── etc...
    Where `[<name>]` indicates a collection and `<name> {...}` indicates a document.
    """

    def _get_db(self) -> firestore.firestore.Client:
        """Acquire a firestore client, initializing the firebase app if necessary.
        Will attempt to get the db client five times. If it's still unsuccessful, a
        `ManifestException` will be raised.
        """
        db = None
        attempts = 0

        while db is None:
            try:
                db = firestore.client()
            except ValueError as e:
                # The above call will fail with a value error when the firebase app is not initialized.
                # Initialize the app here, and try again.
                firebase_admin.initialize_app(options=self.get_firestore_config())
                logger.info('Initialized Firebase App.')

                if attempts > 4:
                    raise ManifestException('Exceeded number of retries to get firestore client.') from e

            time.sleep(get_wait_interval(attempts))

            attempts += 1

        return db

    def _read(self, location: str) -> DownloadStatus:
        """Reads the JSON data from a manifest."""

        doc_id = generate_md5_hash(location)

        # Update document with download status
        download_doc_ref = (
            self.root_document_for_store(doc_id)
        )

        result = download_doc_ref.get()
        row = {}
        if result.exists:
            records = result.to_dict()
            row = {n: to_json_serializable_type(v) for n, v in records.items()}
        return DownloadStatus.from_dict(row)

    def _update(self, download_status: DownloadStatus) -> None:
        """Update or create a download status record."""
        logger.debug('Updating Firestore Manifest.')

        status = DownloadStatus.to_dict(download_status)
        doc_id = generate_md5_hash(status['location'])

        # Update document with download status
        download_doc_ref = (
            self.root_document_for_store(doc_id)
        )

        result: WriteResult = download_doc_ref.set(status)

        logger.debug(f'Firestore manifest updated. '
                     f'update_time={result.update_time}, '
                     f'filename={download_status.location}.')

    def root_document_for_store(self, store_scheme: str) -> DocumentReference:
        """Get the root manifest document given the user's config and current document's storage location."""
        # Get user-defined collection for manifest.
        root_collection = self.get_firestore_config().get('collection', 'manifest')
        return self._get_db().collection(root_collection).document(store_scheme)

    def get_firestore_config(self) -> t.Dict:
        """Parse firestore Location format: 'fs://<collection-name>?projectId=<project-id>'
        Users must specify a 'projectId' query parameter in the firestore location. If this argument
        isn't passed in, users must set the `GOOGLE_CLOUD_PROJECT` environment variable.
        Users may specify options to `firebase_admin.initialize_app()` via query arguments in the URL.
        For more information about what options are available, consult this documentation:
        https://firebase.google.com/docs/reference/admin/python/firebase_admin#initialize_app
            Note: each query key-value pair may only appear once. If there are duplicates, the last pair
            will be used.
        Optionally, users may configure these options via the `FIREBASE_CONFIG` environment variable,
        which is typically a path/to/a/file.json.
        Examples:
            >>> location = Location("fs://my-collection?projectId=my-project-id&storageBucket=foo")
            >>> FirestoreManifest(location).get_firestore_config()
            {'collection': 'my-collection', 'projectId': 'my-project-id', 'storageBucket': 'foo'}
        Raises:
            ValueError: If query parameters are malformed.
            AssertionError: If the 'projectId' query parameter is not set.
        """
        parsed = urlparse(self.location)
        query_params = {}
        if parsed.query:
            query_params = dict(parse_qsl(parsed.query, strict_parsing=True))
        return {'collection': parsed.netloc, **query_params}


class MockManifest(Manifest):
    """In-memory mock manifest."""

    def __init__(self, location: Location) -> None:
        super().__init__(location)
        self.records = {}

    def _read(self, location: str) -> DownloadStatus:
        manifest = self.records
        return DownloadStatus.from_dict(manifest.get(location, {}))

    def _update(self, download_status: DownloadStatus) -> None:
        status = DownloadStatus.to_dict(download_status)
        self.records.update({status.get('location'): status})
        logger.debug('Manifest updated.')
        logger.debug(download_status)


class NoOpManifest(Manifest):
    """A manifest that performs no operations."""

    def _read(self, location: str) -> DownloadStatus:
        return DownloadStatus()

    def _update(self, download_status: DownloadStatus) -> None:
        pass


"""Exposed manifest implementations.

Users can choose their preferred manifest implementation by via the protocol of the Manifest Location.
The protocol corresponds to the keys of this ordered dictionary.

If no protocol is specified, we assume the user wants to write to the local file system.
If no key is found, the `NoOpManifest` option will be chosen. See `parsers:parse_manifest_location`.
"""
MANIFESTS = collections.OrderedDict({
    'cli': ConsoleManifest,
    'fs': FirestoreManifest,
    'bq': BQManifest,
    '': LocalManifest,
})

if __name__ == '__main__':
    # Execute doc tests
    import doctest

    doctest.testmod()


================================================
FILE: weather_dl/download_pipeline/manifest_test.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import random
import string
import tempfile
import typing as t
import unittest

from .manifest import LocalManifest, Location, DownloadStatus, Status, Stage


def rand_str(max_len=32):
    return ''.join([random.choice(string.printable) for _ in range(random.randint(0, max_len))])


def make_download_status(location: t.Optional[str] = None) -> DownloadStatus:
    return DownloadStatus(
        selection={},
        location=rand_str() if location is None else location,
        status=random.choice([Status.SCHEDULED, Status.IN_PROGRESS,
                              Status.SUCCESS, Status.FAILURE]),
        error=random.choice([None] + [rand_str(100) for _ in range(4)]),
        username=random.choice(['user', 'alice', 'bob', 'root']),
        stage=random.choice([Stage.FETCH, Stage.DOWNLOAD,
                             Stage.UPLOAD, Stage.RETRIEVE, None]),
        size=0.039322572,
        scheduled_time="2023-02-07T17:15:26+00:00",
        retrieve_start_time=None,
        retrieve_end_time=None,
        fetch_start_time="2023-02-07T17:15:29+00:00",
        fetch_end_time="2023-02-07T17:21:37+00:00",
        download_start_time="2023-02-07T17:21:39+00:00",
        download_end_time="2023-02-07T17:21:56+00:00",
        upload_start_time="2023-02-07T17:21:59+00:00",
        upload_end_time="2023-02-07T17:22:03+00:00"
    )


class LocalManifestTest(unittest.TestCase):
    NUM_RUNS = 128

    def test_empty_manifest_is_valid_json(self):
        with tempfile.TemporaryDirectory() as dir_:
            manifest = LocalManifest(Location(dir_))
            with open(manifest.location) as file:
                self.is_valid_json(file)

    def does_not_overwrite_existing_manifest(self):
        with tempfile.TemporaryDirectory() as dir_:
            with open(f'{dir}/manifest.json', 'w') as file:
                json.dump({'foo': 'bar'}, file)

            manifest = LocalManifest(Location(dir_))

            with open(manifest.location, 'r') as file:
                manifest = json.load(file)
                self.assertIn('foo', manifest)
                self.assertEqual(manifest['foo'], 'bar')

    def test_writes_valid_json(self):
        with tempfile.TemporaryDirectory() as dir_:
            manifest = LocalManifest(Location(dir_))
            for _ in range(self.NUM_RUNS):
                status = make_download_status()
                manifest._update(status)
                with open(manifest.location) as file:
                    self.is_valid_json(file)

    def test_overwrites_existing_statuses(self):
        locations = ['a', 'b', 'c']
        with tempfile.TemporaryDirectory() as dir_:
            manifest = LocalManifest(Location(dir_))
            for i in range(self.NUM_RUNS):
                status = make_download_status(location=locations[i % 3])
                manifest._update(status)
                with open(manifest.location) as file:
                    self.is_valid_json(file)

            with open(manifest.location) as file:
                manifest = json.load(file)
            self.assertEqual(set(locations), set(manifest.keys()))

    def is_valid_json(self, file: t.IO) -> None:
        """Fails test on error decoding JSON."""
        try:
            json.dumps(json.load(file))
        except json.JSONDecodeError:
            self.fail('JSON is invalid.')


================================================
FILE: weather_dl/download_pipeline/parsers.py
================================================
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Parsers for ECMWF download configuration."""

import ast
import configparser
import copy as cp
import datetime
import json
import string
import textwrap
import typing as t
import numpy as np
from collections import OrderedDict
from dateutil.relativedelta import relativedelta
from urllib.parse import urlparse

from .clients import CLIENTS
from .config import Config
from .manifest import MANIFESTS, Manifest, Location, NoOpManifest


def date(candidate: str) -> datetime.date:
    """Converts ECMWF-format date strings into a `datetime.date`.

    Accepted absolute date formats:
    - YYYY-MM-DD
    - YYYYMMDD
    - YYYY-DDD, where DDD refers to the day of the year

    For example:
    - 2021-10-31
    - 19700101
    - 1950-007

    See https://confluence.ecmwf.int/pages/viewpage.action?pageId=118817289 for date format spec.
    Note: Name of month is not supported.
    """
    converted = None

    # Parse relative day value.
    if candidate.startswith('-'):
        return datetime.date.today() + datetime.timedelta(days=int(candidate))

    accepted_formats = ["%Y-%m-%d", "%Y%m%d", "%Y-%j"]

    for fmt in accepted_formats:
        try:
            converted = datetime.datetime.strptime(candidate, fmt).date()
            break
        except ValueError:
            pass

    if converted is None:
        raise ValueError(
            f"Not a valid date: '{candidate}'. Please use valid relative or absolute format."
        )

    return converted


def time(candidate: str) -> datetime.time:
    """Converts ECMWF-format time strings into a `datetime.time`.

    Accepted time formats:
    - HH:MM
    - HHMM
    - HH

    For example:
    - 18:00
    - 1820
    - 18

    Note: If MM is omitted it defaults to 00.
    """
    converted = None

    accepted_formats = ["%H", "%H:%M", "%H%M"]

    for fmt in accepted_formats:
        try:
            converted = datetime.datetime.strptime(candidate, fmt).time()
            break
        except ValueError:
            pass

    if converted is None:
        raise ValueError(
            f"Not a valid time: '{candidate}'. Please use valid format."
        )

    return converted


def day_month_year(candidate: t.Any) -> int:
    """Converts day, month and year strings into 'int'."""
    try:
        if isinstance(candidate, str) or isinstance(candidate, int):
            return int(candidate)
        raise ValueError('must be a str or int.')
    except ValueError as e:
        raise ValueError(
            f"Not a valid day, month, or year value: {candidate}. Please use valid value."
        ) from e


def date_range_converter(candidate: str) -> str:
    """Replace / with _ to avoid directory creation."""
    return candidate.replace('/', '_')


def parse_literal(candidate: t.Any) -> t.Any:
    try:
        # Support parsing ints with leading zeros, e.g. '01'
        if isinstance(candidate, str) and candidate.isdigit():
            return int(candidate)
        return ast.literal_eval(candidate)
    except (ValueError, TypeError, SyntaxError, MemoryError, RecursionError):
        return candidate


def validate(key: str, value: int) -> None:
    """Validates value based on the key."""
    if key == "day":
        assert 1 <= value <= 31, "Day value must be between 1 to 31."
    if key == "month":
        assert 1 <= value <= 12, "Month value must be between 1 to 12."


def typecast(key: str, value: t.Any) -> t.Any:
    """Type the value to its appropriate datatype."""
    SWITCHER = {
        'date': date,
        'time': time,
        'day': day_month_year,
        'month': day_month_year,
        'year': day_month_year,
        'date_range': date_range_converter,
    }
    converted = SWITCHER.get(key, parse_literal)(value)
    validate(key, converted)
    return converted


def _read_config_file(file: t.IO) -> t.Dict:
    """Reads `*.json` or `*.cfg` files."""
    try:
        return json.load(file)
    except json.JSONDecodeError:
        pass

    file.seek(0)

    try:
        config = configparser.ConfigParser()
        config.read_file(file)
        config = {s: dict(config.items(s)) for s in config.sections()}
        return config
    except configparser.ParsingError:
        return {}


def parse_config(file: t.IO) -> t.Dict:
    """Parses a `*.json` or `*.cfg` file into a configuration dictionary."""
    config = _read_config_file(file)
    config_by_section = {s: _parse_lists(v, s) for s, v in config.items()}
    config_with_nesting = parse_subsections(config_by_section)
    return config_with_nesting


def parse_manifest(location: Location, pipeline_opts: t.Dict) -> Manifest:
    """Constructs a manifest object by parsing the location."""
    project_id__exists = 'project' in pipeline_opts
    project_id__not_set = 'projectId' not in location

    # If the firestore location doesn't specify which project (and, the pipeline
    # knows which project)...
    if location.startswith('fs://') and project_id__not_set and project_id__exists:
        # ...Set the project query param in the Firestore URI.
        start_char = '&' if '?' in location else '?'
        project = pipeline_opts.get('project')
        location += f'{start_char}projectId={project}'

    parsed = urlparse(location)
    return MANIFESTS.get(parsed.scheme, NoOpManifest)(location)


def _splitlines(block: str) -> t.List[str]:
    """Converts a multi-line block into a list of strings."""
    return [line.strip() for line in block.strip().splitlines()]


def mars_range_value(token: str, key: str) -> t.Union[datetime.date, int, float]:
    """Converts a range token into either a date, int, or float."""
    # TODO(b/175432034): Recognize time values
    try:
        
Download .txt
gitextract_haxxml5e/

├── .github/
│   └── workflows/
│       ├── ci.yml
│       └── publish.yml
├── .gitignore
├── .read-the-docs.yaml
├── CONTRIBUTING.md
├── Configuration.md
├── Dockerfile
├── Efficient-Requests.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── Runners.md
├── Runtime-Container.md
├── bin/
│   ├── install-branch
│   └── post-push
├── ci3.8.yml
├── ci3.9.yml
├── configs/
│   ├── era5_example_config.cfg
│   ├── era5_example_config_local_run.cfg
│   ├── era5_example_config_local_run.json
│   ├── era5_example_config_preproc.cfg
│   ├── era5_example_config_using_date.cfg
│   ├── era5_example_monthly_soil.cfg
│   ├── mars_example_config.cfg
│   ├── mars_example_config.json
│   ├── multiple_multiple_licenses/
│   │   ├── era5_pressure500.cfg
│   │   ├── era5_pressure600.cfg
│   │   └── era5_pressure700.cfg
│   ├── multiple_single_license/
│   │   ├── era5_pressure500.cfg
│   │   ├── era5_pressure600.cfg
│   │   └── era5_pressure700.cfg
│   ├── s2s_operational_forecast_example.cfg
│   ├── seasonal_forecast_example_config.cfg
│   ├── tigge_example_config.cfg
│   └── yesterdays_surface_example.cfg
├── docs/
│   ├── Makefile
│   ├── Private-IP-Configuration.md
│   ├── _static/
│   │   └── custom.css
│   ├── conf.py
│   ├── download_pipeline.md
│   ├── index.md
│   ├── loader_pipeline.md
│   ├── make.bat
│   ├── modules.md
│   ├── requirements.txt
│   └── splitter_pipeline.md
├── environment.yml
├── pyproject.toml
├── setup.cfg
├── setup.py
├── tox.ini
├── weather_dl/
│   ├── MANIFEST.in
│   ├── README.md
│   ├── __init__.py
│   ├── download_pipeline/
│   │   ├── __init__.py
│   │   ├── clients.py
│   │   ├── clients_test.py
│   │   ├── config.py
│   │   ├── config_test.py
│   │   ├── fetcher.py
│   │   ├── fetcher_test.py
│   │   ├── manifest.py
│   │   ├── manifest_test.py
│   │   ├── parsers.py
│   │   ├── parsers_test.py
│   │   ├── partition.py
│   │   ├── partition_test.py
│   │   ├── pipeline.py
│   │   ├── pipeline_test.py
│   │   ├── stores.py
│   │   ├── stores_test.py
│   │   ├── util.py
│   │   └── util_test.py
│   ├── setup.py
│   └── weather-dl
├── weather_dl_v2/
│   ├── README.md
│   ├── __init__.py
│   ├── cli/
│   │   ├── CLI-Documentation.md
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── VERSION.txt
│   │   ├── app/
│   │   │   ├── __init__.py
│   │   │   ├── cli_config.py
│   │   │   ├── data/
│   │   │   │   └── cli_config.json
│   │   │   ├── main.py
│   │   │   ├── services/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── download_service.py
│   │   │   │   ├── license_service.py
│   │   │   │   ├── network_service.py
│   │   │   │   └── queue_service.py
│   │   │   ├── subcommands/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.py
│   │   │   │   ├── download.py
│   │   │   │   ├── license.py
│   │   │   │   └── queue.py
│   │   │   └── utils.py
│   │   ├── environment.yml
│   │   ├── setup.py
│   │   └── vm-startup.sh
│   ├── cloudbuild.yml
│   ├── config.json
│   ├── downloader_kubernetes/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── VERSION.txt
│   │   ├── downloader.py
│   │   ├── downloader_config.py
│   │   ├── environment.yml
│   │   ├── manifest.py
│   │   └── util.py
│   ├── fastapi-server/
│   │   ├── API-Interactions.md
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── VERSION.txt
│   │   ├── __init__.py
│   │   ├── config_processing/
│   │   │   ├── config.py
│   │   │   ├── manifest.py
│   │   │   ├── parsers.py
│   │   │   ├── partition.py
│   │   │   ├── pipeline.py
│   │   │   ├── stores.py
│   │   │   └── util.py
│   │   ├── database/
│   │   │   ├── __init__.py
│   │   │   ├── download_handler.py
│   │   │   ├── license_handler.py
│   │   │   ├── manifest_handler.py
│   │   │   ├── queue_handler.py
│   │   │   ├── session.py
│   │   │   └── storage_handler.py
│   │   ├── environment.yml
│   │   ├── example.cfg
│   │   ├── license_dep/
│   │   │   ├── deployment_creator.py
│   │   │   └── license_deployment.yaml
│   │   ├── logging.conf
│   │   ├── main.py
│   │   ├── routers/
│   │   │   ├── download.py
│   │   │   ├── license.py
│   │   │   └── queues.py
│   │   ├── server.yaml
│   │   ├── server_config.py
│   │   └── tests/
│   │       ├── __init__.py
│   │       ├── integration/
│   │       │   ├── __init__.py
│   │       │   ├── test_download.py
│   │       │   ├── test_license.py
│   │       │   └── test_queues.py
│   │       └── test_data/
│   │           ├── example.cfg
│   │           └── not_exist.cfg
│   └── license_deployment/
│       ├── Dockerfile
│       ├── README.md
│       ├── VERSION.txt
│       ├── __init__.py
│       ├── clients.py
│       ├── config.py
│       ├── database.py
│       ├── deployment_config.py
│       ├── downloader.yaml
│       ├── environment.yml
│       ├── fetch.py
│       ├── job_creator.py
│       ├── manifest.py
│       └── util.py
├── weather_mv/
│   ├── MANIFEST.in
│   ├── README.md
│   ├── __init__.py
│   ├── loader_pipeline/
│   │   ├── __init__.py
│   │   ├── bq.py
│   │   ├── bq_test.py
│   │   ├── ee.py
│   │   ├── ee_test.py
│   │   ├── execution_test.py
│   │   ├── metrics.py
│   │   ├── pipeline.py
│   │   ├── pipeline_test.py
│   │   ├── regrid.py
│   │   ├── regrid_test.py
│   │   ├── sinks.py
│   │   ├── sinks_test.py
│   │   ├── streaming.py
│   │   ├── streaming_test.py
│   │   ├── util.py
│   │   └── util_test.py
│   ├── setup.py
│   ├── test_data/
│   │   ├── test_data.zarr/
│   │   │   ├── .zattrs
│   │   │   ├── .zgroup
│   │   │   ├── .zmetadata
│   │   │   ├── cape/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0.0.0
│   │   │   ├── d2m/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0.0.0
│   │   │   ├── latitude/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0
│   │   │   ├── longitude/
│   │   │   │   ├── .zarray
│   │   │   │   ├── .zattrs
│   │   │   │   └── 0
│   │   │   └── time/
│   │   │       ├── .zarray
│   │   │       ├── .zattrs
│   │   │       └── 0
│   │   ├── test_data_20180101.nc
│   │   ├── test_data_corrupt_grib
│   │   ├── test_data_grib_multiple_edition_single_timestep.bz2
│   │   ├── test_data_grib_single_timestep
│   │   ├── test_data_has_nan.nc
│   │   ├── test_data_single_point.nc
│   │   └── test_data_tif_time.tif
│   └── weather-mv
├── weather_sp/
│   ├── MANIFEST.in
│   ├── README.md
│   ├── __init__.py
│   ├── setup.py
│   ├── splitter_pipeline/
│   │   ├── __init__.py
│   │   ├── file_name_utils.py
│   │   ├── file_name_utils_test.py
│   │   ├── file_splitters.py
│   │   ├── file_splitters_test.py
│   │   ├── pipeline.py
│   │   ├── pipeline_test.py
│   │   └── streaming.py
│   ├── test_data/
│   │   ├── era5_sample.grib
│   │   ├── era5_sample.nc
│   │   └── era5_sample_grib
│   └── weather-sp
└── xql/
    ├── README.md
    ├── main.py
    ├── setup.py
    └── src/
        ├── __init__.py
        ├── weather_lm/
        │   ├── __init__.py
        │   ├── constant.py
        │   ├── gemini.py
        │   ├── template.py
        │   └── utils.py
        └── xql/
            ├── __init__.py
            ├── apply.py
            ├── constant.py
            ├── open.py
            ├── utils.py
            └── where.py
Download .txt
SYMBOL INDEX (1312 symbols across 96 files)

FILE: weather_dl/download_pipeline/__init__.py
  function cli (line 18) | def cli(extra=[]):

FILE: weather_dl/download_pipeline/clients.py
  class Client (line 41) | class Client(abc.ABC):
    method __init__ (line 52) | def __init__(self, config: Config, level: int = logging.INFO) -> None:
    method retrieve (line 59) | def retrieve(self, dataset: str, selection: t.Dict, output: str, manif...
    method num_requests_per_key (line 65) | def num_requests_per_key(cls, dataset: str) -> int:
    method license_url (line 71) | def license_url(self):
  class SplitCDSRequest (line 76) | class SplitCDSRequest():
    method __init__ (line 78) | def __init__(self, *args, **kwargs):
    method _download (line 82) | def _download(self, url, path: str, size: int) -> None:
    method fetch (line 92) | def fetch(self, request: t.Dict, dataset: str) -> t.Dict:
    method download (line 96) | def download(self, result: cds_api.Result, target: t.Optional[str] = N...
  class CdsClient (line 107) | class CdsClient(Client):
    method retrieve (line 129) | def retrieve(self, dataset: str, selection: t.Dict, output: str, manif...
    method license_url (line 158) | def license_url(self):
    method num_requests_per_key (line 162) | def num_requests_per_key(cls, dataset: str) -> int:
  class StdoutLogger (line 183) | class StdoutLogger(io.StringIO):
    method __init__ (line 186) | def __init__(self, logger_: logging.Logger, level: int = logging.INFO):
    method log (line 192) | def log(self, msg) -> None:
    method write (line 195) | def write(self, msg):
    method __enter__ (line 199) | def __enter__(self):
    method __exit__ (line 203) | def __exit__(self, exc_type, exc_value, traceback):
  class SplitMARSRequest (line 208) | class SplitMARSRequest(api.APIRequest):
    method _download (line 211) | def _download(self, url, path: str, size: int) -> None:
    method fetch (line 219) | def fetch(self, request: t.Dict, dataset: str) -> t.Dict:
    method download (line 242) | def download(self, result: t.Dict, target: t.Optional[str] = None) -> ...
  class SplitRequestMixin (line 254) | class SplitRequestMixin:
    method fetch (line 257) | def fetch(self, req: t.Dict, dataset: t.Optional[str] = None) -> t.Dict:
    method download (line 260) | def download(self, res: t.Dict, target: str) -> None:
  class CDSClientExtended (line 264) | class CDSClientExtended(SplitRequestMixin):
    method __init__ (line 266) | def __init__(self, *args, **kwargs):
  class MARSECMWFServiceExtended (line 270) | class MARSECMWFServiceExtended(api.ECMWFService, SplitRequestMixin):
    method __init__ (line 272) | def __init__(self, *args, **kwargs):
  class PublicECMWFServerExtended (line 285) | class PublicECMWFServerExtended(api.ECMWFDataServer, SplitRequestMixin):
    method __init__ (line 286) | def __init__(self, *args, dataset='', **kwargs):
  class MarsClient (line 298) | class MarsClient(Client):
    method retrieve (line 317) | def retrieve(self, dataset: str, selection: t.Dict, output: str, manif...
    method license_url (line 346) | def license_url(self):
    method num_requests_per_key (line 350) | def num_requests_per_key(cls, dataset: str) -> int:
  class ECMWFPublicClient (line 365) | class ECMWFPublicClient(Client):
    method retrieve (line 367) | def retrieve(self, dataset: str, selection: t.Dict, output: str, manif...
    method num_requests_per_key (line 396) | def num_requests_per_key(cls, dataset: str) -> int:
    method license_url (line 401) | def license_url(self):
  class FakeClient (line 407) | class FakeClient(Client):
    method retrieve (line 410) | def retrieve(self, dataset: str, selection: t.Dict, output: str, manif...
    method license_url (line 423) | def license_url(self):
    method num_requests_per_key (line 427) | def num_requests_per_key(cls, dataset: str) -> int:

FILE: weather_dl/download_pipeline/clients_test.py
  class MaxWorkersTest (line 20) | class MaxWorkersTest(unittest.TestCase):
    method test_cdsclient_internal (line 21) | def test_cdsclient_internal(self):
    method test_cdsclient_mars_hosted (line 24) | def test_cdsclient_mars_hosted(self):
    method test_marsclient (line 27) | def test_marsclient(self):
    method test_fakeclient (line 30) | def test_fakeclient(self):

FILE: weather_dl/download_pipeline/config.py
  class Config (line 24) | class Config:
    method from_dict (line 66) | def from_dict(cls, config: t.Dict) -> 'Config':
  function optimize_selection_partition (line 80) | def optimize_selection_partition(selection: t.Dict) -> t.Dict:

FILE: weather_dl/download_pipeline/config_test.py
  class ConfigTest (line 24) | class ConfigTest(unittest.TestCase):
    method setUp (line 26) | def setUp(self):
    method test_process_config_files (line 29) | def test_process_config_files(self):
    method test_process_multi_config_files (line 43) | def test_process_multi_config_files(self):
  class SelectionSyntaxTest (line 63) | class SelectionSyntaxTest(unittest.TestCase):
    method test_all_days__invalid_year (line 65) | def test_all_days__invalid_year(self):
    method test_all_days__invalid_month (line 74) | def test_all_days__invalid_month(self):
    method test_date_range (line 83) | def test_date_range(self):
    method test_with_year_month_as_string (line 88) | def test_with_year_month_as_string(self):
    method test_with_multiple_months (line 101) | def test_with_multiple_months(self):
    method test_with_multiple_years (line 117) | def test_with_multiple_years(self):

FILE: weather_dl/download_pipeline/fetcher.py
  class Fetcher (line 34) | class Fetcher(beam.DoFn):
    method __post_init__ (line 53) | def __post_init__(self):
    method retrieve (line 58) | def retrieve(self, client: Client, dataset: str, selection: t.Dict, de...
    method fetch_data (line 62) | def fetch_data(self, config: Config, *, worker_name: str = 'default') ...
    method process (line 93) | def process(self, element) -> None:

FILE: weather_dl/download_pipeline/fetcher_test.py
  class FetchDataTest (line 28) | class FetchDataTest(unittest.TestCase):
    method setUp (line 30) | def setUp(self) -> None:
    method test_fetch_data (line 35) | def test_fetch_data(self, mock_fetch, mock_download):
    method test_fetch_data__manifest__returns_success (line 64) | def test_fetch_data__manifest__returns_success(self, mock_fetch, mock_...
    method test_fetch_data__manifest__records_retrieve_failure (line 94) | def test_fetch_data__manifest__records_retrieve_failure(self, mock_fet...
    method test_fetch_data__manifest__records_gcs_failure (line 132) | def test_fetch_data__manifest__records_gcs_failure(self, mock_fetch):
    method test_fetch_data__skips_existing_download (line 171) | def test_fetch_data__skips_existing_download(self, mock_fetch, mock_gc...

FILE: weather_dl/download_pipeline/manifest.py
  class ManifestException (line 53) | class ManifestException(Exception):
  class Stage (line 58) | class Stage(enum.Enum):
  class Status (line 76) | class Status(enum.Enum):
  class DownloadStatus (line 93) | class DownloadStatus():
    method from_dict (line 154) | def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus':
    method to_dict (line 167) | def to_dict(cls, instance) -> t.Dict:
  class Manifest (line 186) | class Manifest(abc.ABC):
    method __post_init__ (line 227) | def __post_init__(self):
    method schedule (line 231) | def schedule(self, config_name: str, dataset: str, selection: t.Dict, ...
    method skip (line 260) | def skip(self, config_name: str, dataset: str, selection: t.Dict, loca...
    method _set_for_transaction (line 300) | def _set_for_transaction(self, config_name: str, dataset: str, selecti...
    method __enter__ (line 309) | def __enter__(self) -> None:
    method __exit__ (line 312) | def __exit__(self, exc_type, exc_inst, exc_tb) -> None:
    method transact (line 352) | def transact(self, config_name: str, dataset: str, selection: t.Dict, ...
    method set_stage (line 357) | def set_stage(self, stage: Stage) -> None:
    method _read (line 393) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 397) | def _update(self, download_status: DownloadStatus) -> None:
  class ConsoleManifest (line 401) | class ConsoleManifest(Manifest):
    method __post_init__ (line 403) | def __post_init__(self):
    method _read (line 406) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 409) | def _update(self, download_status: DownloadStatus) -> None:
  class LocalManifest (line 413) | class LocalManifest(Manifest):
    method __init__ (line 418) | def __init__(self, location: Location) -> None:
    method _read (line 428) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 436) | def _update(self, download_status: DownloadStatus) -> None:
  class BQManifest (line 452) | class BQManifest(Manifest):
    method __init__ (line 458) | def __init__(self, location: Location) -> None:
    method _read (line 509) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 533) | def _update(self, download_status: DownloadStatus) -> None:
  class FirestoreManifest (line 580) | class FirestoreManifest(Manifest):
    method _get_db (line 590) | def _get_db(self) -> firestore.firestore.Client:
    method _read (line 616) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 633) | def _update(self, download_status: DownloadStatus) -> None:
    method root_document_for_store (line 651) | def root_document_for_store(self, store_scheme: str) -> DocumentRefere...
    method get_firestore_config (line 657) | def get_firestore_config(self) -> t.Dict:
  class MockManifest (line 683) | class MockManifest(Manifest):
    method __init__ (line 686) | def __init__(self, location: Location) -> None:
    method _read (line 690) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 694) | def _update(self, download_status: DownloadStatus) -> None:
  class NoOpManifest (line 701) | class NoOpManifest(Manifest):
    method _read (line 704) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 707) | def _update(self, download_status: DownloadStatus) -> None:

FILE: weather_dl/download_pipeline/manifest_test.py
  function rand_str (line 25) | def rand_str(max_len=32):
  function make_download_status (line 29) | def make_download_status(location: t.Optional[str] = None) -> DownloadSt...
  class LocalManifestTest (line 52) | class LocalManifestTest(unittest.TestCase):
    method test_empty_manifest_is_valid_json (line 55) | def test_empty_manifest_is_valid_json(self):
    method does_not_overwrite_existing_manifest (line 61) | def does_not_overwrite_existing_manifest(self):
    method test_writes_valid_json (line 73) | def test_writes_valid_json(self):
    method test_overwrites_existing_statuses (line 82) | def test_overwrites_existing_statuses(self):
    method is_valid_json (line 96) | def is_valid_json(self, file: t.IO) -> None:

FILE: weather_dl/download_pipeline/parsers.py
  function date (line 34) | def date(candidate: str) -> datetime.date:
  function time (line 73) | def time(candidate: str) -> datetime.time:
  function day_month_year (line 107) | def day_month_year(candidate: t.Any) -> int:
  function date_range_converter (line 119) | def date_range_converter(candidate: str) -> str:
  function parse_literal (line 124) | def parse_literal(candidate: t.Any) -> t.Any:
  function validate (line 134) | def validate(key: str, value: int) -> None:
  function typecast (line 142) | def typecast(key: str, value: t.Any) -> t.Any:
  function _read_config_file (line 157) | def _read_config_file(file: t.IO) -> t.Dict:
  function parse_config (line 175) | def parse_config(file: t.IO) -> t.Dict:
  function parse_manifest (line 183) | def parse_manifest(location: Location, pipeline_opts: t.Dict) -> Manifest:
  function _splitlines (line 200) | def _splitlines(block: str) -> t.List[str]:
  function mars_range_value (line 205) | def mars_range_value(token: str, key: str) -> t.Union[datetime.date, int...
  function mars_increment_value (line 225) | def mars_increment_value(token: str) -> t.Union[int, float]:
  function parse_mars_syntax (line 238) | def parse_mars_syntax(block: str, key: str) -> t.List[str]:
  function date_range (line 322) | def date_range(start: datetime.date, end: datetime.date, increment: int ...
  function _parse_lists (line 328) | def _parse_lists(config: dict, section: str = '') -> t.Dict:
  function _number_of_replacements (line 343) | def _number_of_replacements(s: t.Text):
  function parse_subsections (line 351) | def parse_subsections(config: t.Dict) -> t.Dict:
  function require (line 372) | def require(condition: bool, message: str, error_type: t.Type[Exception]...
  function process_config (line 378) | def process_config(file: t.IO, config_name: str) -> Config:
  function prepare_target_name (line 482) | def prepare_target_name(config: Config) -> str:
  function get_subsections (line 490) | def get_subsections(config: Config) -> t.List[t.Tuple[str, t.Dict]]:
  function all_equal (line 515) | def all_equal(iterator):
  function validate_all_configs (line 524) | def validate_all_configs(configs: t.List[Config]) -> None:

FILE: weather_dl/download_pipeline/parsers_test.py
  class DateTest (line 30) | class DateTest(unittest.TestCase):
    method test_parses_relative_date (line 32) | def test_parses_relative_date(self):
    method test_parses_kebob_date (line 36) | def test_parses_kebob_date(self):
    method test_parses_smooshed_date (line 40) | def test_parses_smooshed_date(self):
    method test_parses_year_and_day_of_year (line 44) | def test_parses_year_and_day_of_year(self):
    method test_throws_error (line 49) | def test_throws_error(self):
  class ParseConfigTest (line 63) | class ParseConfigTest(unittest.TestCase):
    method _assert_no_newlines_in_section (line 65) | def _assert_no_newlines_in_section(self, dictionary) -> None:
    method test_json (line 69) | def test_json(self):
    method test_bad_json (line 74) | def test_bad_json(self):
    method test_json_produces_lists (line 79) | def test_json_produces_lists(self):
    method test_json_parses_mars_list (line 85) | def test_json_parses_mars_list(self):
    method test_json_parses_mars_int_range (line 91) | def test_json_parses_mars_int_range(self):
    method test_json_parses_mars_int_range_padded (line 97) | def test_json_parses_mars_int_range_padded(self):
    method test_json_parses_mars_int_range_incremented (line 103) | def test_json_parses_mars_int_range_incremented(self):
    method test_json_parses_mars_float_range (line 109) | def test_json_parses_mars_float_range(self):
    method test_json_parses_mars_float_range_incremented (line 115) | def test_json_parses_mars_float_range_incremented(self):
    method test_json_parses_mars_float_range_incremented_by_float (line 121) | def test_json_parses_mars_float_range_incremented_by_float(self):
    method test_json_parses_mars_date_range (line 127) | def test_json_parses_mars_date_range(self):
    method test_json_parses_mars_relative_date_range (line 133) | def test_json_parses_mars_relative_date_range(self):
    method test_json_parses_mars_date_range_incremented (line 146) | def test_json_parses_mars_date_range_incremented(self):
    method test_json_raises_syntax_error_missing_right (line 152) | def test_json_raises_syntax_error_missing_right(self):
    method test_json_raises_syntax_error_missing_left (line 157) | def test_json_raises_syntax_error_missing_left(self):
    method test_json_raises_syntax_error_missing_increment (line 162) | def test_json_raises_syntax_error_missing_increment(self):
    method test_json_raises_syntax_error_no_range (line 167) | def test_json_raises_syntax_error_no_range(self):
    method test_json_raises_value_error_date_types (line 172) | def test_json_raises_value_error_date_types(self):
    method test_json_raises_value_error_float_types (line 180) | def test_json_raises_value_error_float_types(self):
    method test_json_raises_value_error_int_types (line 185) | def test_json_raises_value_error_int_types(self):
    method test_json_parses_accidental_extra_whitespace (line 190) | def test_json_parses_accidental_extra_whitespace(self):
    method test_json_parses_parameter_subsections (line 196) | def test_json_parses_parameter_subsections(self):
    method test_cfg (line 209) | def test_cfg(self):
    method test_bad_cfg (line 219) | def test_bad_cfg(self):
    method test_cfg_produces_lists (line 228) | def test_cfg_produces_lists(self):
    method test_cfg_parses_mars_list (line 244) | def test_cfg_parses_mars_list(self):
    method test_cfg_parses_mars_int_range (line 256) | def test_cfg_parses_mars_int_range(self):
    method test_cfg_parses_mars_int_range_padded (line 268) | def test_cfg_parses_mars_int_range_padded(self):
    method test_cfg_parses_mars_int_range_incremented (line 280) | def test_cfg_parses_mars_int_range_incremented(self):
    method test_cfg_parses_mars_int_reverse_range_incremented (line 292) | def test_cfg_parses_mars_int_reverse_range_incremented(self):
    method test_cfg_parses_mars_float_range (line 304) | def test_cfg_parses_mars_float_range(self):
    method test_cfg_parses_mars_float_range_incremented (line 316) | def test_cfg_parses_mars_float_range_incremented(self):
    method test_cfg_parses_mars_float_range_incremented_by_float (line 328) | def test_cfg_parses_mars_float_range_incremented_by_float(self):
    method test_cfg_parses_mars_float_reverse_range_incremented_by_float (line 340) | def test_cfg_parses_mars_float_reverse_range_incremented_by_float(self):
    method test_cfg_parses_mars_date_range (line 352) | def test_cfg_parses_mars_date_range(self):
    method test_cfg_parses_mars_relative_date_range (line 364) | def test_cfg_parses_mars_relative_date_range(self):
    method test_cfg_parses_mars_relative_date_reverse_range (line 383) | def test_cfg_parses_mars_relative_date_reverse_range(self):
    method test_cfg_parses_mars_date_range_incremented (line 403) | def test_cfg_parses_mars_date_range_incremented(self):
    method test_cfg_parses_mars_date_reverse_range_incremented (line 415) | def test_cfg_parses_mars_date_reverse_range_incremented(self):
    method test_cfg_raises_syntax_error_missing_right (line 428) | def test_cfg_raises_syntax_error_missing_right(self):
    method test_cfg_raises_syntax_error_missing_left (line 439) | def test_cfg_raises_syntax_error_missing_left(self):
    method test_cfg_raises_syntax_error_missing_increment (line 450) | def test_cfg_raises_syntax_error_missing_increment(self):
    method test_cfg_raises_syntax_error_no_range (line 461) | def test_cfg_raises_syntax_error_no_range(self):
    method test_cfg_raises_value_error_date_types (line 472) | def test_cfg_raises_value_error_date_types(self):
    method test_cfg_raises_value_error_float_types (line 486) | def test_cfg_raises_value_error_float_types(self):
    method test_cfg_raises_value_error_int_types (line 497) | def test_cfg_raises_value_error_int_types(self):
    method test_cfg_parses_accidental_extra_whitespace (line 508) | def test_cfg_parses_accidental_extra_whitespace(self):
    method test_json_parse_year_mon_one (line 521) | def test_json_parse_year_mon_one(self):
    method test_json_parse_year_mon_one_by_one (line 530) | def test_json_parse_year_mon_one_by_one(self):
    method test_json_parse_year_mon_one_by_two (line 539) | def test_json_parse_year_mon_one_by_two(self):
    method test_json_parse_year_mon_six (line 548) | def test_json_parse_year_mon_six(self):
    method test_json_parse_year_mon_six_by_one (line 557) | def test_json_parse_year_mon_six_by_one(self):
    method test_json_parse_year_mon_six_by_three (line 566) | def test_json_parse_year_mon_six_by_three(self):
    method test_json_parse_year_mon_six_rev (line 575) | def test_json_parse_year_mon_six_rev(self):
    method test_json_parse_year_mon_six_by_one_rev (line 584) | def test_json_parse_year_mon_six_by_one_rev(self):
    method test_json_parse_year_mon_six_by_three_rev (line 593) | def test_json_parse_year_mon_six_by_three_rev(self):
    method test_json_parse_year_mon_eighteen (line 602) | def test_json_parse_year_mon_eighteen(self):
    method test_json_parse_year_mon_eighteen_by_two (line 613) | def test_json_parse_year_mon_eighteen_by_two(self):
    method test_json_parse_year_mon_eighteen_by_six (line 622) | def test_json_parse_year_mon_eighteen_by_six(self):
    method test_cfg_parses_parameter_subsections (line 631) | def test_cfg_parses_parameter_subsections(self):
  class HelpersTest (line 652) | class HelpersTest(unittest.TestCase):
    method test_number_of_replacements (line 659) | def test_number_of_replacements(self):
  class SubsectionsTest (line 666) | class SubsectionsTest(unittest.TestCase):
    method test_parses_config_subsections (line 667) | def test_parses_config_subsections(self):
  class ApiKeyCountingTest (line 675) | class ApiKeyCountingTest(unittest.TestCase):
    method test_no_keys (line 676) | def test_no_keys(self):
    method test_api_keys (line 683) | def test_api_keys(self):
  class ProcessConfigTest (line 696) | class ProcessConfigTest(unittest.TestCase):
    method test_parse_config (line 698) | def test_parse_config(self):
    method test_require_params_section (line 709) | def test_require_params_section(self):
    method test_accepts_parameters_section (line 723) | def test_accepts_parameters_section(self):
    method test_requires_target_path_param (line 737) | def test_requires_target_path_param(self):
    method test_requires_target_template_param_not_present (line 753) | def test_requires_target_template_param_not_present(self):
    method test_accepts_target_path_param (line 769) | def test_accepts_target_path_param(self):
    method test_requires_partition_keys_to_match_sections (line 785) | def test_requires_partition_keys_to_match_sections(self):
    method test_accepts_partition_keys_matching_sections (line 818) | def test_accepts_partition_keys_matching_sections(self):
    method test_accepts_partition_keys_not_present (line 847) | def test_accepts_partition_keys_not_present(self):
    method test_treats_partition_keys_as_list (line 873) | def test_treats_partition_keys_as_list(self):
    method test_mismatched_template_partition_keys (line 891) | def test_mismatched_template_partition_keys(self):
    method test_append_date_dirs_raise_error (line 924) | def test_append_date_dirs_raise_error(self):
    method test_target_filename_raise_error (line 948) | def test_target_filename_raise_error(self):
    method test_client_not_set (line 971) | def test_client_not_set(self):
    method test_client_invalid (line 991) | def test_client_invalid(self):
    method test_partition_cannot_include_all (line 1012) | def test_partition_cannot_include_all(self):
    method test_partition_key_contains_date_range (line 1034) | def test_partition_key_contains_date_range(self):
    method test_partition_key_contains_date__in_case_of_hdate (line 1052) | def test_partition_key_contains_date__in_case_of_hdate(self):
    method test_singleton_partitions_are_converted_to_lists (line 1070) | def test_singleton_partitions_are_converted_to_lists(self):
  class PrepareTargetNameTest (line 1090) | class PrepareTargetNameTest(unittest.TestCase):
    method setUp (line 1191) | def setUp(self) -> None:
    method test_target_name (line 1194) | def test_target_name(self):

FILE: weather_dl/download_pipeline/partition.py
  class PartitionConfig (line 36) | class PartitionConfig(beam.PTransform):
    method expand (line 66) | def expand(self, configs):
  function _create_partition_config (line 118) | def _create_partition_config(option: t.Tuple, config: Config) -> Config:
  function skip_partition (line 149) | def skip_partition(config: Config, store: Store, manifest: Manifest) -> ...
  function prepare_partition_index (line 164) | def prepare_partition_index(config: Config,
  function prepare_partitions_from_index (line 194) | def prepare_partitions_from_index(config: Config, indexes: t.List[Index]...
  function new_downloads_only (line 210) | def new_downloads_only(candidate: Config, store: t.Optional[Store] = None,
  function assemble_config (line 221) | def assemble_config(partition: Partition, manifest: Manifest) -> Config:
  function cycle_iters (line 254) | def cycle_iters(iters: t.List[t.Iterator], take: int = 1) -> t.Iterator:
  function prepare_fair_partition_index (line 275) | def prepare_fair_partition_index(configs: t.List[Config],

FILE: weather_dl/download_pipeline/partition_test.py
  class OddFilesDoNotExistStore (line 31) | class OddFilesDoNotExistStore(InMemoryStore):
    method __init__ (line 32) | def __init__(self):
    method exists (line 36) | def exists(self, filename: str) -> bool:
  class PreparePartitionTest (line 42) | class PreparePartitionTest(unittest.TestCase):
    method setUp (line 44) | def setUp(self) -> None:
    method create_partition_configs (line 47) | def create_partition_configs(self, configs,
    method test_partition_single_key (line 59) | def test_partition_single_key(self):
    method test_partition_multi_key (line 80) | def test_partition_multi_key(self):
    method test_partition_multi_key_single_values (line 103) | def test_partition_multi_key_single_values(self):
    method test_partition_multi_params_multi_key (line 123) | def test_partition_multi_params_multi_key(self):
    method test_prepare_partition_records_download_status_to_manifest (line 168) | def test_prepare_partition_records_download_status_to_manifest(self):
    method test_prepare_partition_records_download_status_to_manifest_for_already_downloaded_shard (line 201) | def test_prepare_partition_records_download_status_to_manifest_for_alr...
    method test_prepare_partition_update_download_status_for_downloaded_shard_missing_upload_entry (line 238) | def test_prepare_partition_update_download_status_for_downloaded_shard...
    method test_prepare_partition_update_manifest_for_failed_upload_status_of_downloaded_shard (line 280) | def test_prepare_partition_update_manifest_for_failed_upload_status_of...
    method test_skip_partitions__never_unbalances_licenses (line 323) | def test_skip_partitions__never_unbalances_licenses(self):
    method test_multi_config_partition_single_key (line 352) | def test_multi_config_partition_single_key(self):
    method test_multi_config_partition_single_key_fair_schedule (line 376) | def test_multi_config_partition_single_key_fair_schedule(self):
    method test_multi_config_partition_multi_params_multi_key (line 400) | def test_multi_config_partition_multi_params_multi_key(self):
    method test_multi_config_partition_multi_params_fair_schedule (line 449) | def test_multi_config_partition_multi_params_fair_schedule(self):
    method test_multi_config_partition_multi_params_keys_and_requests_with_fair_schedule (line 499) | def test_multi_config_partition_multi_params_keys_and_requests_with_fa...
    method test_hdate_partition_single_key (line 551) | def test_hdate_partition_single_key(self):
    method test_partition_with_no_keys (line 572) | def test_partition_with_no_keys(self):
  class SkipPartitionsTest (line 589) | class SkipPartitionsTest(unittest.TestCase):
    method setUp (line 591) | def setUp(self) -> None:
    method test_skip_partition_missing_force_download (line 595) | def test_skip_partition_missing_force_download(self):
    method test_skip_partition_force_download_true (line 613) | def test_skip_partition_force_download_true(self):
    method test_skip_partition_force_download_false (line 632) | def test_skip_partition_force_download_false(self):

FILE: weather_dl/download_pipeline/pipeline.py
  function configure_logger (line 51) | def configure_logger(verbosity: int) -> None:
  class PipelineArgs (line 65) | class PipelineArgs:
  function pipeline (line 86) | def pipeline(args: PipelineArgs) -> None:
  function run (line 135) | def run(argv: t.List[str], save_main_session: bool = True) -> PipelineArgs:

FILE: weather_dl/download_pipeline/pipeline_test.py
  function default_args (line 72) | def default_args(parameters: t.Optional[t.Dict] = None, selection: t.Opt...
  class ParsePipelineArgs (line 94) | class ParsePipelineArgs(unittest.TestCase):
    method assert_pipeline (line 97) | def assert_pipeline(self, args, expected):
    method test_happy_path (line 111) | def test_happy_path(self):
    method test_force_download (line 114) | def test_force_download(self):
    method test_dry_run (line 120) | def test_dry_run(self):
    method test_local_run (line 130) | def test_local_run(self):
    method test_update_manifest (line 140) | def test_update_manifest(self):
    method test_user_specified_num_requests_per_key (line 146) | def test_user_specified_num_requests_per_key(self):
    method test_check_skip_in_dry_run_raise_error_if_dry_run_flag_is_absent (line 154) | def test_check_skip_in_dry_run_raise_error_if_dry_run_flag_is_absent(s...

FILE: weather_dl/download_pipeline/stores.py
  class Store (line 27) | class Store(abc.ABC):
    method open (line 34) | def open(self, filename: str, mode: str = 'r') -> t.IO:
    method exists (line 38) | def exists(self, filename: str) -> bool:
  class InMemoryStore (line 42) | class InMemoryStore(Store):
    method __init__ (line 45) | def __init__(self):
    method open (line 48) | def open(self, filename: str, mode: str = 'r') -> t.IO:
    method exists (line 57) | def exists(self, filename: str) -> bool:
  class TempFileStore (line 62) | class TempFileStore(Store):
    method __init__ (line 65) | def __init__(self, directory: t.Optional[str] = None) -> None:
    method open (line 71) | def open(self, filename: str, mode: str = 'r') -> t.IO:
    method exists (line 75) | def exists(self, filename: str) -> bool:
  class LocalFileStore (line 80) | class LocalFileStore(Store):
    method __init__ (line 83) | def __init__(self, directory: t.Optional[str] = None) -> None:
    method open (line 89) | def open(self, filename: str, mode: str = 'r') -> t.IO:
    method exists (line 93) | def exists(self, filename: str) -> bool:
  class FSStore (line 98) | class FSStore(Store):
    method open (line 102) | def open(self, filename: str, mode: str = 'r') -> t.IO:
    method exists (line 121) | def exists(self, filename: str) -> bool:

FILE: weather_dl/download_pipeline/stores_test.py
  class FSStoreTest (line 21) | class FSStoreTest(unittest.TestCase):
    method test_writes (line 23) | def test_writes(self):
    method test_reads (line 31) | def test_reads(self):
    method test_reads__default_argument (line 39) | def test_reads__default_argument(self):
    method test_asserts_bad_mode__both (line 47) | def test_asserts_bad_mode__both(self):
    method test_asserts_bad_mode__neither (line 54) | def test_asserts_bad_mode__neither(self):

FILE: weather_dl/download_pipeline/util.py
  function _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter (line 42) | def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(...
  class _FakeClock (line 53) | class _FakeClock:
    method sleep (line 54) | def sleep(self, value):
  function retry_with_exponential_backoff (line 58) | def retry_with_exponential_backoff(fun):
  function ichunked (line 73) | def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]:
  function copy (line 87) | def copy(src: str, dst: str) -> None:
  function to_json_serializable_type (line 105) | def to_json_serializable_type(value: t.Any) -> t.Any:
  function fetch_geo_polygon (line 148) | def fetch_geo_polygon(area: t.Union[list, str]) -> str:
  function get_file_size (line 179) | def get_file_size(path: str) -> float:
  function get_wait_interval (line 187) | def get_wait_interval(num_retries: int = 0) -> float:
  function generate_md5_hash (line 194) | def generate_md5_hash(input: str) -> str:
  function download_with_aria2 (line 199) | def download_with_aria2(url: str, path: str) -> None:
  function generate_hdate (line 213) | def generate_hdate(date: str, subtract_year: str) -> str:

FILE: weather_dl/download_pipeline/util_test.py
  class IChunksTests (line 7) | class IChunksTests(unittest.TestCase):
    method setUp (line 8) | def setUp(self) -> None:
    method test_even_chunks (line 11) | def test_even_chunks(self):
    method test_odd_chunks (line 24) | def test_odd_chunks(self):
  class TestFetchGeoPolygon (line 36) | class TestFetchGeoPolygon(unittest.TestCase):
    method test_valid_area (line 37) | def test_valid_area(self):
    method test_valid_string_area_value (line 46) | def test_valid_string_area_value(self):
    method test_invalid_string_area_value (line 55) | def test_invalid_string_area_value(self):
    method test_invalid_latitude_south (line 61) | def test_invalid_latitude_south(self):
    method test_invalid_latitude_north (line 67) | def test_invalid_latitude_north(self):
    method test_invalid_longitude_west (line 73) | def test_invalid_longitude_west(self):
    method test_invalid_longitude_east (line 79) | def test_invalid_longitude_east(self):
  class TestGenerateHdate (line 86) | class TestGenerateHdate(unittest.TestCase):
    method test_valid_hdate (line 87) | def test_valid_hdate(self):

FILE: weather_dl_v2/cli/app/cli_config.py
  class CliConfig (line 25) | class CliConfig:
    method BASE_URI (line 30) | def BASE_URI(self) -> str:
    method from_dict (line 39) | def from_dict(cls, config: t.Dict):
  function get_config (line 54) | def get_config():

FILE: weather_dl_v2/cli/app/main.py
  function ping (line 38) | def ping():

FILE: weather_dl_v2/cli/app/services/download_service.py
  class DownloadService (line 27) | class DownloadService(abc.ABC):
    method _list_all_downloads (line 30) | def _list_all_downloads(self):
    method _list_all_downloads_by_filter (line 34) | def _list_all_downloads_by_filter(self, filter_dict: dict):
    method _get_download_by_config (line 38) | def _get_download_by_config(self, config_name: str):
    method _show_config_content (line 42) | def _show_config_content(self, config_name: str):
    method _add_new_download (line 46) | def _add_new_download(
    method _remove_download (line 52) | def _remove_download(self, config_name: str):
    method _refetch_config_partitions (line 56) | def _refetch_config_partitions(self, config_name: str, licenses: t.Lis...
  class DownloadServiceNetwork (line 60) | class DownloadServiceNetwork(DownloadService):
    method __init__ (line 62) | def __init__(self):
    method _list_all_downloads (line 65) | def _list_all_downloads(self):
    method _list_all_downloads_by_filter (line 70) | def _list_all_downloads_by_filter(self, filter_dict: dict):
    method _get_download_by_config (line 77) | def _get_download_by_config(self, config_name: str):
    method _show_config_content (line 83) | def _show_config_content(self, config_name: str):
    method _add_new_download (line 89) | def _add_new_download(
    method _remove_download (line 105) | def _remove_download(self, config_name: str):
    method _refetch_config_partitions (line 110) | def _refetch_config_partitions(self, config_name: str, licenses: t.Lis...
  class DownloadServiceMock (line 119) | class DownloadServiceMock(DownloadService):
  function get_download_service (line 123) | def get_download_service(test: bool = False):

FILE: weather_dl_v2/cli/app/services/license_service.py
  class LicenseService (line 26) | class LicenseService(abc.ABC):
    method _get_all_license (line 29) | def _get_all_license(self):
    method _get_all_license_by_client_name (line 33) | def _get_all_license_by_client_name(self, client_name: str):
    method _get_license_by_license_id (line 37) | def _get_license_by_license_id(self, license_id: str):
    method _add_license (line 41) | def _add_license(self, license_dict: dict):
    method _remove_license (line 45) | def _remove_license(self, license_id: str):
    method _update_license (line 49) | def _update_license(self, license_id: str, license_dict: dict):
    method _redeploy_license_by_license_id (line 53) | def _redeploy_license_by_license_id(self, license_id: str):
    method _redeploy_licenses_by_client (line 57) | def _redeploy_licenses_by_client(self, client_name: str):
  class LicenseServiceNetwork (line 61) | class LicenseServiceNetwork(LicenseService):
    method __init__ (line 63) | def __init__(self):
    method _get_all_license (line 66) | def _get_all_license(self):
    method _get_all_license_by_client_name (line 71) | def _get_all_license_by_client_name(self, client_name: str):
    method _get_license_by_license_id (line 78) | def _get_license_by_license_id(self, license_id: str):
    method _add_license (line 84) | def _add_license(self, license_dict: dict):
    method _remove_license (line 91) | def _remove_license(self, license_id: str):
    method _update_license (line 97) | def _update_license(self, license_id: str, license_dict: dict):
    method _redeploy_license_by_license_id (line 104) | def _redeploy_license_by_license_id(self, license_id: str):
    method _redeploy_licenses_by_client (line 111) | def _redeploy_licenses_by_client(self, client_name: str):
  class LicenseServiceMock (line 119) | class LicenseServiceMock(LicenseService):
  function get_license_service (line 123) | def get_license_service(test: bool = False):

FILE: weather_dl_v2/cli/app/services/network_service.py
  class NetworkService (line 26) | class NetworkService:
    method parse_response (line 28) | def parse_response(self, response: requests.Response):
    method get (line 43) | def get(self, uri, header, query=None, payload=None):
    method post (line 53) | def post(self, uri, header, query=None, payload=None, file=None):
    method put (line 65) | def put(self, uri, header, query=None, payload=None, file=None):
    method delete (line 77) | def delete(self, uri, header, query=None):
    method patch (line 87) | def patch(self, uri, header, query=None):

FILE: weather_dl_v2/cli/app/services/queue_service.py
  class QueueService (line 28) | class QueueService(abc.ABC):
    method _get_all_license_queues (line 31) | def _get_all_license_queues(self):
    method _get_license_queue_by_client_name (line 35) | def _get_license_queue_by_client_name(self, client_name: str):
    method _get_queue_by_license (line 39) | def _get_queue_by_license(self, license_id: str):
    method _edit_license_queue (line 43) | def _edit_license_queue(self, license_id: str, priority_list: t.List[s...
    method _edit_config_absolute_priority (line 47) | def _edit_config_absolute_priority(
    method _save_queue_to_file (line 53) | def _save_queue_to_file(self, license_id: str, dir: str):
  class QueueServiceNetwork (line 57) | class QueueServiceNetwork(QueueService):
    method __init__ (line 59) | def __init__(self):
    method _get_all_license_queues (line 62) | def _get_all_license_queues(self):
    method _get_license_queue_by_client_name (line 67) | def _get_license_queue_by_client_name(self, client_name: str):
    method _get_queue_by_license (line 74) | def _get_queue_by_license(self, license_id: str):
    method _edit_license_queue (line 79) | def _edit_license_queue(self, license_id: str, priority_list: t.List[s...
    method _edit_config_absolute_priority (line 86) | def _edit_config_absolute_priority(
    method _save_queue_to_file (line 95) | def _save_queue_to_file(self, license_id: str, dir_path: str) -> str:
  class QueueServiceMock (line 116) | class QueueServiceMock(QueueService):
  function get_queue_service (line 120) | def get_queue_service(test: bool = False):

FILE: weather_dl_v2/cli/app/subcommands/config.py
  class ConfigValidator (line 29) | class ConfigValidator(Validator):
  function update_cli (line 34) | def update_cli():
  function show_server_ip (line 48) | def show_server_ip():
  function update_server_ip (line 53) | def update_server_ip(

FILE: weather_dl_v2/cli/app/subcommands/download.py
  class DowloadFilterValidator (line 27) | class DowloadFilterValidator(Validator):
  function get_downloads (line 38) | def get_downloads(
  function submit_download (line 65) | def submit_download(
  function get_download_by_config (line 92) | def get_download_by_config(
  function show_config (line 99) | def show_config(
  function remove_download (line 106) | def remove_download(
  function refetch_config (line 118) | def refetch_config(

FILE: weather_dl_v2/cli/app/subcommands/license.py
  class LicenseValidator (line 25) | class LicenseValidator(Validator):
  function get_all_license (line 35) | def get_all_license(
  function get_license (line 57) | def get_license(license: Annotated[str, typer.Argument(help="License ID....
  function add_license (line 62) | def add_license(
  function remove_license (line 86) | def remove_license(
  function update_license (line 96) | def update_license(
  function redeploy_license (line 125) | def redeploy_license(

FILE: weather_dl_v2/cli/app/subcommands/queue.py
  class QueueValidator (line 25) | class QueueValidator(Validator):
  function get_all_license_queue (line 33) | def get_all_license_queue(
  function get_license_queue (line 55) | def get_license_queue(license: Annotated[str, typer.Argument(help="Licen...
  function modify_license_queue (line 64) | def modify_license_queue(

FILE: weather_dl_v2/cli/app/utils.py
  function timeit (line 32) | def timeit(func):
  function confirm_action (line 43) | def confirm_action(message: str = "Are you sure you want to continue?"):
  function order_dict_fields (line 47) | def order_dict_fields(dictionary, key_order):
  function as_table (line 51) | def as_table(response: str, key_order=None):
  class Loader (line 82) | class Loader:
    method __init__ (line 84) | def __init__(self, desc="Loading...", end="", timeout=0.1):
    method start (line 101) | def start(self):
    method _animate (line 105) | def _animate(self):
    method __enter__ (line 112) | def __enter__(self):
    method stop (line 115) | def stop(self):
    method __exit__ (line 120) | def __exit__(self, exc_type, exc_value, tb):
  class Validator (line 126) | class Validator(abc.ABC):
    method validate (line 129) | def validate(
    method validate_json (line 151) | def validate_json(self, file_path, allow_missing: bool = False):
    method _validate_keys (line 167) | def _validate_keys(self, data_set: set, valid_set: set, allow_missing:...

FILE: weather_dl_v2/downloader_kubernetes/downloader.py
  function download (line 28) | def download(url: str, path: str) -> None:
  function main (line 62) | def main(

FILE: weather_dl_v2/downloader_kubernetes/downloader_config.py
  class DownloaderConfig (line 28) | class DownloaderConfig:
    method from_dict (line 33) | def from_dict(cls, config: t.Dict):
  function get_config (line 48) | def get_config():

FILE: weather_dl_v2/downloader_kubernetes/manifest.py
  class ManifestException (line 48) | class ManifestException(Exception):
  class Stage (line 54) | class Stage(enum.Enum):
  class Status (line 73) | class Status(enum.Enum):
  class DownloadStatus (line 91) | class DownloadStatus:
    method from_dict (line 152) | def from_dict(cls, download_status: t.Dict) -> "DownloadStatus":
    method to_dict (line 165) | def to_dict(cls, instance) -> t.Dict:
  class Manifest (line 184) | class Manifest(abc.ABC):
    method __post_init__ (line 224) | def __post_init__(self):
    method schedule (line 228) | def schedule(
    method skip (line 268) | def skip(
    method _set_for_transaction (line 321) | def _set_for_transaction(
    method __enter__ (line 337) | def __enter__(self) -> None:
    method __exit__ (line 340) | def __exit__(self, exc_type, exc_inst, exc_tb) -> None:
    method transact (line 381) | def transact(
    method set_stage (line 393) | def set_stage(self, stage: Stage) -> None:
    method _read (line 415) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 419) | def _update(self, download_status: DownloadStatus) -> None:
  class FirestoreManifest (line 423) | class FirestoreManifest(Manifest):
    method _get_db (line 433) | def _get_db(self) -> firestore.firestore.Client:
    method _read (line 464) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 479) | def _update(self, download_status: DownloadStatus) -> None:
    method root_document_for_store (line 497) | def root_document_for_store(self, store_scheme: str) -> DocumentRefere...

FILE: weather_dl_v2/downloader_kubernetes/util.py
  function _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter (line 40) | def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(
  class _FakeClock (line 53) | class _FakeClock:
    method sleep (line 55) | def sleep(self, value):
  function retry_with_exponential_backoff (line 59) | def retry_with_exponential_backoff(fun):
  function ichunked (line 74) | def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]:
  function copy (line 88) | def copy(src: str, dst: str) -> None:
  function to_json_serializable_type (line 100) | def to_json_serializable_type(value: t.Any) -> t.Any:
  function fetch_geo_polygon (line 147) | def fetch_geo_polygon(area: t.Union[list, str]) -> str:
  function get_file_size (line 178) | def get_file_size(path: str) -> float:
  function get_wait_interval (line 188) | def get_wait_interval(num_retries: int = 0) -> float:
  function generate_md5_hash (line 195) | def generate_md5_hash(input: str) -> str:
  function download_with_aria2 (line 200) | def download_with_aria2(url: str, path: str) -> None:
  function download_with_wget (line 229) | def download_with_wget(url: str, path: str) -> None:

FILE: weather_dl_v2/fastapi-server/config_processing/config.py
  class Config (line 26) | class Config:
    method from_dict (line 68) | def from_dict(cls, config: t.Dict) -> "Config":
  function optimize_selection_partition (line 82) | def optimize_selection_partition(selection: t.Dict) -> t.Dict:

FILE: weather_dl_v2/fastapi-server/config_processing/manifest.py
  class ManifestException (line 52) | class ManifestException(Exception):
  class Stage (line 58) | class Stage(enum.Enum):
  class Status (line 77) | class Status(enum.Enum):
  class DownloadStatus (line 95) | class DownloadStatus:
    method from_dict (line 156) | def from_dict(cls, download_status: t.Dict) -> "DownloadStatus":
    method to_dict (line 169) | def to_dict(cls, instance) -> t.Dict:
  class Manifest (line 188) | class Manifest(abc.ABC):
    method __post_init__ (line 227) | def __post_init__(self):
    method schedule (line 231) | def schedule(
    method skip (line 271) | def skip(
    method _set_for_transaction (line 324) | def _set_for_transaction(
    method __enter__ (line 340) | def __enter__(self) -> None:
    method __exit__ (line 343) | def __exit__(self, exc_type, exc_inst, exc_tb) -> None:
    method transact (line 383) | def transact(
    method set_stage (line 395) | def set_stage(self, stage: Stage) -> None:
    method _read (line 428) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 432) | def _update(self, download_status: DownloadStatus) -> None:
  class FirestoreManifest (line 436) | class FirestoreManifest(Manifest, Database):
    method _get_db (line 446) | def _get_db(self) -> firestore.firestore.Client:
    method _read (line 477) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 492) | def _update(self, download_status: DownloadStatus) -> None:
    method root_document_for_store (line 510) | def root_document_for_store(self, store_scheme: str) -> DocumentRefere...

FILE: weather_dl_v2/fastapi-server/config_processing/parsers.py
  function date (line 34) | def date(candidate: str) -> datetime.date:
  function time (line 73) | def time(candidate: str) -> datetime.time:
  function day_month_year (line 105) | def day_month_year(candidate: t.Any) -> int:
  function date_range_converter (line 117) | def date_range_converter(candidate: str) -> str:
  function parse_literal (line 122) | def parse_literal(candidate: t.Any) -> t.Any:
  function validate (line 132) | def validate(key: str, value: int) -> None:
  function typecast (line 140) | def typecast(key: str, value: t.Any) -> t.Any:
  function _read_config_file (line 155) | def _read_config_file(file: t.IO) -> t.Dict:
  function parse_config (line 173) | def parse_config(file: t.IO) -> t.Dict:
  function _splitlines (line 181) | def _splitlines(block: str) -> t.List[str]:
  function mars_range_value (line 186) | def mars_range_value(token: str, key: str) -> t.Union[datetime.date, int...
  function mars_increment_value (line 207) | def mars_increment_value(token: str) -> t.Union[int, float]:
  function parse_mars_syntax (line 220) | def parse_mars_syntax(block: str, key: str) -> t.List[str]:
  function date_range (line 311) | def date_range(
  function _parse_lists (line 322) | def _parse_lists(config: dict, section: str = "") -> t.Dict:
  function _number_of_replacements (line 337) | def _number_of_replacements(s: t.Text):
  function parse_subsections (line 345) | def parse_subsections(config: t.Dict) -> t.Dict:
  function require (line 366) | def require(
  function process_config (line 374) | def process_config(file: t.IO, config_name: str) -> Config:
  function prepare_target_name (line 504) | def prepare_target_name(config: Config) -> str:
  function get_subsections (line 514) | def get_subsections(config: Config) -> t.List[t.Tuple[str, t.Dict]]:

FILE: weather_dl_v2/fastapi-server/config_processing/partition.py
  class PartitionConfig (line 32) | class PartitionConfig:
    method _create_partition_config (line 49) | def _create_partition_config(self, option: t.Tuple) -> Config:
    method skip_partition (line 79) | def skip_partition(self, config: Config) -> bool:
    method prepare_partitions (line 99) | def prepare_partitions(self) -> t.Iterator[Config]:
    method new_downloads_only (line 116) | def new_downloads_only(self, candidate: Config) -> bool:
    method update_manifest_collection (line 124) | def update_manifest_collection(self, partition: Config) -> Config:

FILE: weather_dl_v2/fastapi-server/config_processing/pipeline.py
  function _do_partitions (line 32) | def _do_partitions(partition_obj: PartitionConfig):
  function start_processing_config (line 40) | async def start_processing_config(config_file, licenses, force_download,...

FILE: weather_dl_v2/fastapi-server/config_processing/stores.py
  class Store (line 27) | class Store(abc.ABC):
    method open (line 34) | def open(self, filename: str, mode: str = "r") -> t.IO:
    method exists (line 38) | def exists(self, filename: str) -> bool:
  class InMemoryStore (line 42) | class InMemoryStore(Store):
    method __init__ (line 45) | def __init__(self):
    method open (line 48) | def open(self, filename: str, mode: str = "r") -> t.IO:
    method exists (line 57) | def exists(self, filename: str) -> bool:
  class TempFileStore (line 62) | class TempFileStore(Store):
    method __init__ (line 65) | def __init__(self, directory: t.Optional[str] = None) -> None:
    method open (line 71) | def open(self, filename: str, mode: str = "r") -> t.IO:
    method exists (line 75) | def exists(self, filename: str) -> bool:
  class LocalFileStore (line 80) | class LocalFileStore(Store):
    method __init__ (line 83) | def __init__(self, directory: t.Optional[str] = None) -> None:
    method open (line 89) | def open(self, filename: str, mode: str = "r") -> t.IO:
    method exists (line 93) | def exists(self, filename: str) -> bool:
  class FSStore (line 98) | class FSStore(Store):
    method open (line 101) | def open(self, filename: str, mode: str = "r") -> t.IO:
    method exists (line 120) | def exists(self, filename: str) -> bool:

FILE: weather_dl_v2/fastapi-server/config_processing/util.py
  function _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter (line 43) | def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(
  class _FakeClock (line 56) | class _FakeClock:
    method sleep (line 58) | def sleep(self, value):
  function retry_with_exponential_backoff (line 62) | def retry_with_exponential_backoff(fun):
  function ichunked (line 77) | def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]:
  function copy (line 91) | def copy(src: str, dst: str) -> None:
  function to_json_serializable_type (line 103) | def to_json_serializable_type(value: t.Any) -> t.Any:
  function fetch_geo_polygon (line 150) | def fetch_geo_polygon(area: t.Union[list, str]) -> str:
  function get_file_size (line 181) | def get_file_size(path: str) -> float:
  function get_wait_interval (line 191) | def get_wait_interval(num_retries: int = 0) -> float:
  function generate_md5_hash (line 198) | def generate_md5_hash(input: str) -> str:
  function download_with_aria2 (line 203) | def download_with_aria2(url: str, path: str) -> None:
  function generate_hdate (line 231) | def generate_hdate(date: str, subtract_year: str) -> str:

FILE: weather_dl_v2/fastapi-server/database/download_handler.py
  function get_download_handler (line 27) | def get_download_handler():
  function get_mock_download_handler (line 31) | def get_mock_download_handler():
  class DownloadHandler (line 35) | class DownloadHandler(abc.ABC):
    method _start_download (line 38) | async def _start_download(self, config_name: str, client_name: str) ->...
    method _stop_download (line 42) | async def _stop_download(self, config_name: str) -> None:
    method _mark_partitioning_status (line 46) | async def _mark_partitioning_status(self, config_name: str, status: st...
    method _check_download_exists (line 50) | async def _check_download_exists(self, config_name: str) -> bool:
    method _get_downloads (line 54) | async def _get_downloads(self, client_name: str) -> list:
    method _get_download_by_config_name (line 58) | async def _get_download_by_config_name(self, config_name: str):
  class DownloadHandlerMock (line 62) | class DownloadHandlerMock(DownloadHandler):
    method __init__ (line 64) | def __init__(self):
    method _start_download (line 67) | async def _start_download(self, config_name: str, client_name: str) ->...
    method _stop_download (line 72) | async def _stop_download(self, config_name: str) -> None:
    method _mark_partitioning_status (line 77) | async def _mark_partitioning_status(self, config_name: str, status: st...
    method _check_download_exists (line 82) | async def _check_download_exists(self, config_name: str) -> bool:
    method _get_downloads (line 90) | async def _get_downloads(self, client_name: str) -> list:
    method _get_download_by_config_name (line 93) | async def _get_download_by_config_name(self, config_name: str):
  class DownloadHandlerFirestore (line 99) | class DownloadHandlerFirestore(DownloadHandler):
    method __init__ (line 101) | def __init__(self, db: firestore.firestore.Client):
    method _start_download (line 105) | async def _start_download(self, config_name: str, client_name: str) ->...
    method _stop_download (line 116) | async def _stop_download(self, config_name: str) -> None:
    method _mark_partitioning_status (line 124) | async def _mark_partitioning_status(self, config_name: str, status: st...
    method _check_download_exists (line 134) | async def _check_download_exists(self, config_name: str) -> bool:
    method _get_downloads (line 140) | async def _get_downloads(self, client_name: str) -> list:
    method _get_download_by_config_name (line 153) | async def _get_download_by_config_name(self, config_name: str):

FILE: weather_dl_v2/fastapi-server/database/license_handler.py
  function get_license_handler (line 28) | def get_license_handler():
  function get_mock_license_handler (line 32) | def get_mock_license_handler():
  class LicenseHandler (line 36) | class LicenseHandler(abc.ABC):
    method _add_license (line 39) | async def _add_license(self, license_dict: dict) -> str:
    method _delete_license (line 43) | async def _delete_license(self, license_id: str) -> None:
    method _check_license_exists (line 47) | async def _check_license_exists(self, license_id: str) -> bool:
    method _get_license_by_license_id (line 51) | async def _get_license_by_license_id(self, license_id: str) -> dict:
    method _get_license_by_client_name (line 55) | async def _get_license_by_client_name(self, client_name: str) -> list:
    method _get_licenses (line 59) | async def _get_licenses(self) -> list:
    method _update_license (line 63) | async def _update_license(self, license_id: str, license_dict: dict) -...
    method _get_license_without_deployment (line 67) | async def _get_license_without_deployment(self) -> list:
    method _mark_license_status (line 71) | async def _mark_license_status(self, license_id: str, status: str) -> ...
  class LicenseHandlerMock (line 74) | class LicenseHandlerMock(LicenseHandler):
    method __init__ (line 76) | def __init__(self):
    method _add_license (line 79) | async def _add_license(self, license_dict: dict) -> str:
    method _delete_license (line 84) | async def _delete_license(self, license_id: str) -> None:
    method _update_license (line 89) | async def _update_license(self, license_id: str, license_dict: dict) -...
    method _check_license_exists (line 94) | async def _check_license_exists(self, license_id: str) -> bool:
    method _get_license_by_license_id (line 102) | async def _get_license_by_license_id(self, license_id: str) -> dict:
    method _get_license_by_client_name (line 113) | async def _get_license_by_client_name(self, client_name: str) -> list:
    method _get_licenses (line 122) | async def _get_licenses(self) -> list:
    method _get_license_without_deployment (line 131) | async def _get_license_without_deployment(self) -> list:
  class LicenseHandlerFirestore (line 135) | class LicenseHandlerFirestore(LicenseHandler):
    method __init__ (line 137) | def __init__(self, db: firestore.firestore.AsyncClient):
    method _add_license (line 141) | async def _add_license(self, license_dict: dict) -> str:
    method _delete_license (line 155) | async def _delete_license(self, license_id: str) -> None:
    method _update_license (line 163) | async def _update_license(self, license_id: str, license_dict: dict) -...
    method _check_license_exists (line 173) | async def _check_license_exists(self, license_id: str) -> bool:
    method _get_license_by_license_id (line 179) | async def _get_license_by_license_id(self, license_id: str) -> dict:
    method _get_license_by_client_name (line 185) | async def _get_license_by_client_name(self, client_name: str) -> list:
    method _get_licenses (line 193) | async def _get_licenses(self) -> list:
    method _get_license_without_deployment (line 197) | async def _get_license_without_deployment(self) -> list:
    method _mark_license_status (line 205) | async def _mark_license_status(self, license_id: str, status: str) -> ...

FILE: weather_dl_v2/fastapi-server/database/manifest_handler.py
  function get_manifest_handler (line 26) | def get_manifest_handler():
  function get_mock_manifest_handler (line 30) | def get_mock_manifest_handler():
  class ManifestHandler (line 34) | class ManifestHandler(abc.ABC):
    method _get_download_success_count (line 37) | async def _get_download_success_count(self, config_name: str) -> int:
    method _get_download_failure_count (line 41) | async def _get_download_failure_count(self, config_name: str) -> int:
    method _get_download_scheduled_count (line 45) | async def _get_download_scheduled_count(self, config_name: str) -> int:
    method _get_download_inprogress_count (line 49) | async def _get_download_inprogress_count(self, config_name: str) -> int:
    method _get_download_total_count (line 53) | async def _get_download_total_count(self, config_name: str) -> int:
    method _get_non_successfull_downloads (line 57) | async def _get_non_successfull_downloads(self, config_name: str) -> list:
    method _get_failed_downloads (line 61) | async def _get_failed_downloads(self, config_name: str) -> list:
  class ManifestHandlerMock (line 65) | class ManifestHandlerMock(ManifestHandler):
    method _get_download_failure_count (line 67) | async def _get_download_failure_count(self, config_name: str) -> int:
    method _get_download_inprogress_count (line 70) | async def _get_download_inprogress_count(self, config_name: str) -> int:
    method _get_download_scheduled_count (line 73) | async def _get_download_scheduled_count(self, config_name: str) -> int:
    method _get_download_success_count (line 76) | async def _get_download_success_count(self, config_name: str) -> int:
    method _get_download_total_count (line 79) | async def _get_download_total_count(self, config_name: str) -> int:
    method _get_non_successfull_downloads (line 82) | async def _get_non_successfull_downloads(self, config_name: str) -> list:
    method _get_failed_downloads (line 85) | async def _get_failed_downloads(self, config_name: str) -> list:
  class ManifestHandlerFirestore (line 89) | class ManifestHandlerFirestore(ManifestHandler):
    method __init__ (line 91) | def __init__(self, db: firestore.firestore.Client):
    method _get_download_success_count (line 95) | async def _get_download_success_count(self, config_name: str) -> int:
    method _get_download_failure_count (line 109) | async def _get_download_failure_count(self, config_name: str) -> int:
    method _get_download_scheduled_count (line 122) | async def _get_download_scheduled_count(self, config_name: str) -> int:
    method _get_download_inprogress_count (line 135) | async def _get_download_inprogress_count(self, config_name: str) -> int:
    method _get_download_total_count (line 160) | async def _get_download_total_count(self, config_name: str) -> int:
    method _get_non_successfull_downloads (line 172) | async def _get_non_successfull_downloads(self, config_name: str) -> list:
    method _get_failed_downloads (line 195) | async def _get_failed_downloads(self, config_name: str) -> list:

FILE: weather_dl_v2/fastapi-server/database/queue_handler.py
  function get_queue_handler (line 27) | def get_queue_handler():
  function get_mock_queue_handler (line 31) | def get_mock_queue_handler():
  class QueueHandler (line 35) | class QueueHandler(abc.ABC):
    method _create_license_queue (line 38) | async def _create_license_queue(self, license_id: str, client_name: st...
    method _remove_license_queue (line 42) | async def _remove_license_queue(self, license_id: str) -> None:
    method _get_queues (line 46) | async def _get_queues(self) -> list:
    method _get_queue_by_license_id (line 50) | async def _get_queue_by_license_id(self, license_id: str) -> dict:
    method _get_queue_by_client_name (line 54) | async def _get_queue_by_client_name(self, client_name: str) -> list:
    method _update_license_queue (line 58) | async def _update_license_queue(self, license_id: str, priority_list: ...
    method _update_queues_on_start_download (line 62) | async def _update_queues_on_start_download(
    method _update_queues_on_stop_download (line 68) | async def _update_queues_on_stop_download(self, config_name: str) -> N...
    method _update_config_priority_in_license (line 72) | async def _update_config_priority_in_license(
    method _update_client_name_in_license_queue (line 78) | async def _update_client_name_in_license_queue(
  class QueueHandlerMock (line 84) | class QueueHandlerMock(QueueHandler):
    method __init__ (line 86) | def __init__(self):
    method _create_license_queue (line 89) | async def _create_license_queue(self, license_id: str, client_name: st...
    method _remove_license_queue (line 94) | async def _remove_license_queue(self, license_id: str) -> None:
    method _get_queues (line 99) | async def _get_queues(self) -> list:
    method _get_queue_by_license_id (line 102) | async def _get_queue_by_license_id(self, license_id: str) -> dict:
    method _get_queue_by_client_name (line 107) | async def _get_queue_by_client_name(self, client_name: str) -> list:
    method _update_license_queue (line 110) | async def _update_license_queue(self, license_id: str, priority_list: ...
    method _update_queues_on_start_download (line 115) | async def _update_queues_on_start_download(
    method _update_queues_on_stop_download (line 122) | async def _update_queues_on_stop_download(self, config_name: str) -> N...
    method _update_config_priority_in_license (line 127) | async def _update_config_priority_in_license(
    method _update_client_name_in_license_queue (line 134) | async def _update_client_name_in_license_queue(
  class QueueHandlerFirestore (line 142) | class QueueHandlerFirestore(QueueHandler):
    method __init__ (line 144) | def __init__(self, db: firestore.firestore.Client):
    method _create_license_queue (line 148) | async def _create_license_queue(self, license_id: str, client_name: st...
    method _remove_license_queue (line 158) | async def _remove_license_queue(self, license_id: str) -> None:
    method _get_queues (line 166) | async def _get_queues(self) -> list:
    method _get_queue_by_license_id (line 170) | async def _get_queue_by_license_id(self, license_id: str) -> dict:
    method _get_queue_by_client_name (line 176) | async def _get_queue_by_client_name(self, client_name: str) -> list:
    method _update_license_queue (line 184) | async def _update_license_queue(self, license_id: str, priority_list: ...
    method _update_queues_on_start_download (line 194) | async def _update_queues_on_start_download(
    method _update_queues_on_stop_download (line 207) | async def _update_queues_on_stop_download(self, config_name: str) -> N...
    method _update_config_priority_in_license (line 219) | async def _update_config_priority_in_license(
    method _update_client_name_in_license_queue (line 240) | async def _update_client_name_in_license_queue(

FILE: weather_dl_v2/fastapi-server/database/session.py
  class Database (line 29) | class Database(abc.ABC):
    method _get_db (line 32) | def _get_db(self):
  function get_async_client (line 38) | def get_async_client() -> firestore.AsyncClient:
  function get_gcs_client (line 66) | def get_gcs_client() -> storage.Client:

FILE: weather_dl_v2/fastapi-server/database/storage_handler.py
  function get_storage_handler (line 30) | def get_storage_handler():
  class StorageHandler (line 34) | class StorageHandler(abc.ABC):
    method _upload_file (line 37) | def _upload_file(self, file_path) -> str:
    method _open_local (line 41) | def _open_local(self, file_name) -> t.Iterator[str]:
  class StorageHandlerMock (line 45) | class StorageHandlerMock(StorageHandler):
    method __init__ (line 47) | def __init__(self) -> None:
    method _upload_file (line 50) | def _upload_file(self, file_path) -> None:
    method _open_local (line 53) | def _open_local(self, file_name) -> t.Iterator[str]:
  class StorageHandlerGCS (line 57) | class StorageHandlerGCS(StorageHandler):
    method __init__ (line 59) | def __init__(self, client: storage.Client) -> None:
    method _upload_file (line 63) | def _upload_file(self, file_path) -> str:
    method _open_local (line 73) | def _open_local(self, file_name) -> t.Iterator[str]:

FILE: weather_dl_v2/fastapi-server/license_dep/deployment_creator.py
  function create_license_deployment (line 25) | def create_license_deployment(license_id: str) -> str:
  function terminate_license_deployment (line 54) | def terminate_license_deployment(license_id: str) -> None:

FILE: weather_dl_v2/fastapi-server/main.py
  function create_pending_license_deployments (line 33) | async def create_pending_license_deployments():
  function lifespan (line 49) | async def lifespan(app: FastAPI):
  function main (line 69) | async def main():

FILE: weather_dl_v2/fastapi-server/routers/download.py
  function fetch_config_stats (line 45) | async def fetch_config_stats(
  function get_fetch_config_stats (line 82) | def get_fetch_config_stats():
  function get_fetch_config_stats_mock (line 86) | def get_fetch_config_stats_mock():
  function get_upload (line 103) | def get_upload():
  function get_upload_mock (line 117) | def get_upload_mock():
  function get_reschedule_partitions (line 124) | def get_reschedule_partitions():
  function get_reschedule_partitions_mock (line 187) | def get_reschedule_partitions_mock():
  function submit_download (line 196) | async def submit_download(
  class DownloadStatus (line 244) | class DownloadStatus(str, Enum):
  function show_download_config (line 251) | async def show_download_config(
  function get_downloads (line 276) | async def get_downloads(
  function get_download_by_config_name (line 323) | async def get_download_by_config_name(
  function delete_download (line 348) | async def delete_download(
  function retry_config (line 369) | async def retry_config(

FILE: weather_dl_v2/fastapi-server/routers/license.py
  class License (line 27) | class License(BaseModel):
  class LicenseInternal (line 34) | class LicenseInternal(License):
  function mark_license_active (line 45) | async def mark_license_active(license_id: str, license_handler: LicenseH...
  function update_license_internal (line 50) | async def update_license_internal(
  function get_create_deployment (line 66) | def get_create_deployment():
  function get_create_deployment_mock (line 74) | def get_create_deployment_mock():
  function get_terminate_license_deployment (line 81) | def get_terminate_license_deployment():
  function get_terminate_license_deployment_mock (line 85) | def get_terminate_license_deployment_mock():
  function get_licenses (line 94) | async def get_licenses(
  function get_license_by_license_id (line 107) | async def get_license_by_license_id(
  function update_license (line 119) | async def update_license(
  function add_license (line 147) | async def add_license(
  function delete_license (line 189) | async def delete_license(
  function redeploy_licenses (line 208) | async def redeploy_licenses(

FILE: weather_dl_v2/fastapi-server/routers/queues.py
  function get_all_license_queue (line 35) | async def get_all_license_queue(
  function get_license_queue (line 48) | async def get_license_queue(
  function modify_license_queue (line 62) | async def modify_license_queue(
  function modify_config_priority_in_license (line 93) | async def modify_config_priority_in_license(

FILE: weather_dl_v2/fastapi-server/server_config.py
  class ServerConfig (line 28) | class ServerConfig:
    method from_dict (line 40) | def from_dict(cls, config: t.Dict):
  function get_config (line 55) | def get_config():

FILE: weather_dl_v2/fastapi-server/tests/integration/test_download.py
  function _get_download (line 36) | def _get_download(headers, query, code, expected):
  function test_get_downloads_basic (line 43) | def test_get_downloads_basic():
  function _submit_download (line 60) | def _submit_download(headers, file_path, licenses, code, expected):
  function test_submit_download_basic (line 77) | def test_submit_download_basic():
  function test_submit_download_file_not_uploaded (line 92) | def test_submit_download_file_not_uploaded():
  function test_submit_download_file_alreadys_exist (line 104) | def test_submit_download_file_alreadys_exist():
  function _get_download_by_config (line 118) | def _get_download_by_config(headers, config_name, code, expected):
  function test_get_download_by_config_basic (line 125) | def test_get_download_by_config_basic():
  function test_get_download_by_config_wrong_config (line 142) | def test_get_download_by_config_wrong_config():
  function _delete_download_by_config (line 151) | def _delete_download_by_config(headers, config_name, code, expected):
  function test_delete_download_by_config_basic (line 157) | def test_delete_download_by_config_basic():
  function test_delete_download_by_config_wrong_config (line 169) | def test_delete_download_by_config_wrong_config():

FILE: weather_dl_v2/fastapi-server/tests/integration/test_license.py
  function _get_license (line 43) | def _get_license(headers, query, code, expected):
  function test_get_license_basic (line 50) | def test_get_license_basic():
  function test_get_license_client_name (line 65) | def test_get_license_client_name():
  function _add_license (line 81) | def _add_license(headers, payload, code, expected):
  function test_add_license_basic (line 95) | def test_add_license_basic():
  function _get_license_by_license_id (line 110) | def _get_license_by_license_id(headers, license_id, code, expected):
  function test_get_license_by_license_id (line 118) | def test_get_license_by_license_id():
  function test_get_license_wrong_license (line 133) | def test_get_license_wrong_license():
  function _update_license (line 144) | def _update_license(headers, license_id, license, code, expected):
  function test_update_license_basic (line 155) | def test_update_license_basic():
  function test_update_license_wrong_license_id (line 170) | def test_update_license_wrong_license_id():
  function _delete_license (line 185) | def _delete_license(headers, license_id, code, expected):
  function test_delete_license_basic (line 192) | def test_delete_license_basic():
  function test_delete_license_wrong_license (line 201) | def test_delete_license_wrong_license():

FILE: weather_dl_v2/fastapi-server/tests/integration/test_queues.py
  function _get_all_queue (line 32) | def _get_all_queue(headers, query, code, expected):
  function test_get_all_queues (line 39) | def test_get_all_queues():
  function test_get_client_queues (line 48) | def test_get_client_queues():
  function _get_queue_by_license (line 58) | def _get_queue_by_license(headers, license_id, code, expected):
  function test_get_queue_by_license_basic (line 65) | def test_get_queue_by_license_basic():
  function test_get_queue_by_license_wrong_license (line 74) | def test_get_queue_by_license_wrong_license():
  function _modify_license_queue (line 83) | def _modify_license_queue(headers, license_id, priority_list, code, expe...
  function test_modify_license_queue_basic (line 90) | def test_modify_license_queue_basic():
  function test_modify_license_queue_wrong_license_id (line 100) | def test_modify_license_queue_wrong_license_id():
  function _modify_config_priority_in_license (line 110) | def _modify_config_priority_in_license(headers, license_id, query, code,...
  function test_modify_config_priority_in_license_basic (line 119) | def test_modify_config_priority_in_license_basic():
  function test_modify_config_priority_in_license_wrong_license (line 131) | def test_modify_config_priority_in_license_wrong_license():
  function test_modify_config_priority_in_license_wrong_config (line 141) | def test_modify_config_priority_in_license_wrong_config():

FILE: weather_dl_v2/license_deployment/clients.py
  class Client (line 41) | class Client(abc.ABC):
    method __init__ (line 52) | def __init__(self, dataset: str, level: int = logging.INFO) -> None:
    method retrieve (line 59) | def retrieve(
    method num_requests_per_key (line 67) | def num_requests_per_key(cls, dataset: str) -> int:
    method license_url (line 73) | def license_url(self):
  class SplitCDSRequest (line 78) | class SplitCDSRequest():
    method __init__ (line 80) | def __init__(self, *args, **kwargs):
    method _download (line 84) | def _download(self, url, path: str, size: int) -> None:
    method fetch (line 94) | def fetch(self, request: t.Dict, dataset: str) -> t.Dict:
    method download (line 98) | def download(self, result: cds_api.Result, target: t.Optional[str] = N...
  class CdsClient (line 109) | class CdsClient(Client):
    method retrieve (line 131) | def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest...
    method license_url (line 153) | def license_url(self):
    method num_requests_per_key (line 157) | def num_requests_per_key(cls, dataset: str) -> int:
  class StdoutLogger (line 178) | class StdoutLogger(io.StringIO):
    method __init__ (line 181) | def __init__(self, logger_: logging.Logger, level: int = logging.INFO):
    method log (line 187) | def log(self, msg) -> None:
    method write (line 190) | def write(self, msg):
    method __enter__ (line 194) | def __enter__(self):
    method __exit__ (line 198) | def __exit__(self, exc_type, exc_value, traceback):
  class SplitMARSRequest (line 203) | class SplitMARSRequest(api.APIRequest):
    method _download (line 207) | def _download(self, url, path: str, size: int) -> None:
    method fetch (line 213) | def fetch(self, request: t.Dict, dataset: str) -> t.Dict:
    method download (line 236) | def download(self, result: t.Dict, target: t.Optional[str] = None) -> ...
  class SplitRequestMixin (line 248) | class SplitRequestMixin:
    method fetch (line 251) | def fetch(self, req: t.Dict, dataset: t.Optional[str] = None) -> t.Dict:
    method download (line 254) | def download(self, res: t.Dict, target: str) -> None:
  class CDSClientExtended (line 258) | class CDSClientExtended(SplitRequestMixin):
    method __init__ (line 261) | def __init__(self, *args, **kwargs):
  class MARSECMWFServiceExtended (line 265) | class MARSECMWFServiceExtended(api.ECMWFService, SplitRequestMixin):
    method __init__ (line 268) | def __init__(self, *args, **kwargs):
  class PublicECMWFServerExtended (line 281) | class PublicECMWFServerExtended(api.ECMWFDataServer, SplitRequestMixin):
    method __init__ (line 283) | def __init__(self, *args, dataset="", **kwargs):
  class MarsClient (line 295) | class MarsClient(Client):
    method retrieve (line 315) | def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest...
    method license_url (line 337) | def license_url(self):
    method num_requests_per_key (line 341) | def num_requests_per_key(cls, dataset: str) -> int:
  class ECMWFPublicClient (line 356) | class ECMWFPublicClient(Client):
    method retrieve (line 359) | def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest...
    method num_requests_per_key (line 381) | def num_requests_per_key(cls, dataset: str) -> int:
    method license_url (line 386) | def license_url(self):
  class FakeClient (line 392) | class FakeClient(Client):
    method retrieve (line 395) | def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest...
    method license_url (line 406) | def license_url(self):
    method num_requests_per_key (line 410) | def num_requests_per_key(cls, dataset: str) -> int:

FILE: weather_dl_v2/license_deployment/config.py
  class Config (line 26) | class Config:
    method from_dict (line 68) | def from_dict(cls, config: t.Dict) -> "Config":
  function optimize_selection_partition (line 82) | def optimize_selection_partition(selection: t.Dict) -> t.Dict:

FILE: weather_dl_v2/license_deployment/database.py
  class Database (line 31) | class Database(abc.ABC):
    method _get_db (line 34) | def _get_db(self):
  class CRUDOperations (line 38) | class CRUDOperations(abc.ABC):
    method _initialize_license_deployment (line 41) | def _initialize_license_deployment(self, license_id: str) -> dict:
    method _get_config_from_queue_by_license_id (line 45) | def _get_config_from_queue_by_license_id(self, license_id: str) -> dict:
    method _remove_config_from_license_queue (line 49) | def _remove_config_from_license_queue(
    method _empty_license_queue (line 55) | def _empty_license_queue(self, license_id: str) -> None:
    method _get_partition_from_manifest (line 59) | def _get_partition_from_manifest(self, config_name: str) -> str:
    method _mark_license_status (line 63) | def _mark_license_status(self, license_id: str, status: str) -> None:
  class FirestoreClient (line 66) | class FirestoreClient(Database, CRUDOperations):
    method _get_db (line 68) | def _get_db(self) -> firestore.firestore.Client:
    method _initialize_license_deployment (line 99) | def _initialize_license_deployment(self, license_id: str) -> dict:
    method _get_config_from_queue_by_license_id (line 108) | def _get_config_from_queue_by_license_id(self, license_id: str) -> str...
    method _get_partition_from_manifest (line 121) | def _get_partition_from_manifest(self, config_name: str) -> str | None:
    method _remove_config_from_license_queue (line 125) | def _remove_config_from_license_queue(
    method _empty_license_queue (line 138) | def _empty_license_queue(self, license_id: str) -> None:
    method _mark_license_status (line 149) | def _mark_license_status(self, license_id: str, status: str) -> None:
  function get_partition_from_manifest (line 164) | def get_partition_from_manifest(transaction, config_name: str) -> str | ...

FILE: weather_dl_v2/license_deployment/deployment_config.py
  class DeploymentConfig (line 28) | class DeploymentConfig:
    method from_dict (line 37) | def from_dict(cls, config: t.Dict):
  function get_config (line 52) | def get_config():

FILE: weather_dl_v2/license_deployment/fetch.py
  function create_job (line 39) | def create_job(request, result):
  function make_fetch_request (line 56) | def make_fetch_request(request, error_map: ThreadSafeDict):
  function fetch_request_from_db (line 132) | def fetch_request_from_db():
  function main (line 148) | def main():
  function boot_up (line 198) | def boot_up(license: str) -> None:

FILE: weather_dl_v2/license_deployment/job_creator.py
  function create_download_job (line 24) | def create_download_job(message):

FILE: weather_dl_v2/license_deployment/manifest.py
  class ManifestException (line 52) | class ManifestException(Exception):
  class Stage (line 58) | class Stage(enum.Enum):
  class Status (line 77) | class Status(enum.Enum):
  class DownloadStatus (line 97) | class DownloadStatus:
    method from_dict (line 158) | def from_dict(cls, download_status: t.Dict) -> "DownloadStatus":
    method to_dict (line 171) | def to_dict(cls, instance) -> t.Dict:
  class Manifest (line 190) | class Manifest(abc.ABC):
    method __post_init__ (line 230) | def __post_init__(self):
    method schedule (line 234) | def schedule(
    method skip (line 274) | def skip(
    method _set_for_transaction (line 327) | def _set_for_transaction(
    method __enter__ (line 343) | def __enter__(self) -> None:
    method __exit__ (line 346) | def __exit__(self, exc_type, exc_inst, exc_tb) -> None:
    method transact (line 387) | def transact(
    method set_stage (line 399) | def set_stage(self, stage: Stage) -> None:
    method _read (line 432) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 436) | def _update(self, download_status: DownloadStatus) -> None:
  class FirestoreManifest (line 440) | class FirestoreManifest(Manifest, Database):
    method _get_db (line 450) | def _get_db(self) -> firestore.firestore.Client:
    method _read (line 481) | def _read(self, location: str) -> DownloadStatus:
    method _update (line 496) | def _update(self, download_status: DownloadStatus) -> None:
    method root_document_for_store (line 516) | def root_document_for_store(self, store_scheme: str) -> DocumentRefere...

FILE: weather_dl_v2/license_deployment/util.py
  function exceptionit (line 45) | def exceptionit(func):
  function _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter (line 55) | def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(
  class GracefulKiller (line 67) | class GracefulKiller:
    method __init__ (line 73) | def __init__(self):
    method exit_gracefully (line 77) | def exit_gracefully(self, signum, frame):
  class _FakeClock (line 82) | class _FakeClock:
    method sleep (line 84) | def sleep(self, value):
  function retry_with_exponential_backoff (line 88) | def retry_with_exponential_backoff(fun):
  function ichunked (line 103) | def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]:
  function copy (line 117) | def copy(src: str, dst: str) -> None:
  function to_json_serializable_type (line 129) | def to_json_serializable_type(value: t.Any) -> t.Any:
  function fetch_geo_polygon (line 176) | def fetch_geo_polygon(area: t.Union[list, str]) -> str:
  function get_file_size (line 207) | def get_file_size(path: str) -> float:
  function get_wait_interval (line 217) | def get_wait_interval(num_retries: int = 0) -> float:
  function generate_md5_hash (line 224) | def generate_md5_hash(input: str) -> str:
  function download_with_aria2 (line 229) | def download_with_aria2(url: str, path: str) -> None:
  class ThreadSafeDict (line 257) | class ThreadSafeDict:
    method __init__ (line 261) | def __init__(self) -> None:
    method __getitem__ (line 268) | def __getitem__(self, key):
    method __setitem__ (line 275) | def __setitem__(self, key, value):
    method remove (line 280) | def remove(self, key):
    method has_key (line 285) | def has_key(self, key):
    method increment (line 292) | def increment(self, key, delta=1):
    method decrement (line 298) | def decrement(self, key, delta=1):
    method find_exponential_delay (line 304) | def find_exponential_delay(self, n: int) -> int:
    method exponential_time (line 311) | def exponential_time(self, key):

FILE: weather_mv/loader_pipeline/__init__.py
  function cli (line 21) | def cli(extra=[]):

FILE: weather_mv/loader_pipeline/bq.py
  class ToBigQuery (line 60) | class ToBigQuery(ToDataSink):
    method add_parser_arguments (line 120) | def add_parser_arguments(cls, subparser: argparse.ArgumentParser):
    method validate_arguments (line 163) | def validate_arguments(cls, known_args: argparse.Namespace, pipeline_a...
    method generate_parquet (line 197) | def generate_parquet(
    method __post_init__ (line 240) | def __post_init__(self):
    method prepare_coordinates (line 311) | def prepare_coordinates(self, uri: str) -> t.Iterator[t.Tuple[str, t.D...
    method extract_rows (line 321) | def extract_rows(self, uri: str, coordinate: t.Dict) -> t.Iterator[t.D...
    method to_rows (line 338) | def to_rows(self, coordinate: t.Dict, ds: xr.Dataset, uri: str) -> t.I...
    method chunks_to_rows (line 388) | def chunks_to_rows(self, _, ds: xr.Dataset) -> t.Iterator[t.Dict]:
    method expand (line 397) | def expand(self, paths):
  function map_dtype_to_sql_type (line 438) | def map_dtype_to_sql_type(var_type: np.dtype) -> str:
  function dataset_to_table_schema (line 449) | def dataset_to_table_schema(ds: xr.Dataset) -> t.List[bigquery.SchemaFie...
  function to_table_schema (line 460) | def to_table_schema(columns: t.List[t.Tuple[str, str]]) -> t.List[bigque...
  function timestamp_row (line 477) | def timestamp_row(it: t.Dict) -> window.TimestampedValue:
  function fetch_geo_point (line 483) | def fetch_geo_point(lat: float, long: float) -> str:
  function fetch_geo_polygon (line 493) | def fetch_geo_polygon(latitude: float, longitude: float, lat_grid_resolu...
  function bound_point (line 517) | def bound_point(latitude: float, longitude: float, lat_grid_resolution: ...
  function get_lat_lon_range (line 542) | def get_lat_lon_range(value: float, lat_lon: str, is_point_out_of_bound:...

FILE: weather_mv/loader_pipeline/bq_test.py
  class SchemaCreationTests (line 44) | class SchemaCreationTests(TestDataBase):
    method setUp (line 46) | def setUp(self) -> None:
    method test_schema_generation (line 69) | def test_schema_generation(self):
    method test_schema_generation__with_schema_normalization (line 85) | def test_schema_generation__with_schema_normalization(self):
    method test_schema_generation__with_target_columns (line 101) | def test_schema_generation__with_target_columns(self):
    method test_schema_generation__with_target_columns__with_schema_normalization (line 117) | def test_schema_generation__with_target_columns__with_schema_normaliza...
    method test_schema_generation__no_targets_specified (line 133) | def test_schema_generation__no_targets_specified(self):
    method test_schema_generation__no_targets_specified__with_schema_normalization (line 150) | def test_schema_generation__no_targets_specified__with_schema_normaliz...
    method test_schema_generation__missing_target (line 167) | def test_schema_generation__missing_target(self):
    method test_schema_generation__missing_target__with_schema_normalization (line 172) | def test_schema_generation__missing_target__with_schema_normalization(...
    method test_schema_generation__non_index_coords (line 178) | def test_schema_generation__non_index_coords(self):
  class ExtractRowsTestBase (line 203) | class ExtractRowsTestBase(TestDataBase):
    method extract (line 205) | def extract(self, data_path, *, variables=None, area=None, open_datase...
    method assertGeopointEqual (line 226) | def assertGeopointEqual(self, actual: str, expected: str) -> None:
    method assertRowsEqual (line 231) | def assertRowsEqual(self, actual: t.Dict, expected: t.Dict):
  class ExtractRowsTest (line 247) | class ExtractRowsTest(ExtractRowsTestBase):
    method setUp (line 249) | def setUp(self) -> None:
    method test_01_extract_rows (line 254) | def test_01_extract_rows(self):
    method test_02_extract_rows__with_subset_variables (line 281) | def test_02_extract_rows__with_subset_variables(self):
    method test_03_extract_rows__specific_area (line 304) | def test_03_extract_rows__specific_area(self):
    method test_04_extract_rows__specific_area_float_points (line 328) | def test_04_extract_rows__specific_area_float_points(self):
    method test_05_extract_rows_raises_error_when_geo_data_parquet_dimensions_mismatch (line 354) | def test_05_extract_rows_raises_error_when_geo_data_parquet_dimensions...
    method test_06_extract_rows__specify_import_time (line 366) | def test_06_extract_rows__specify_import_time(self):
    method test_07_extract_rows_single_point (line 394) | def test_07_extract_rows_single_point(self):
    method test_08_extract_rows_nan (line 419) | def test_08_extract_rows_nan(self):
    method test_09_extract_rows__with_valid_lat_long_with_point (line 447) | def test_09_extract_rows__with_valid_lat_long_with_point(self):
    method test_10_extract_rows__with_valid_lat_long_with_polygon (line 466) | def test_10_extract_rows__with_valid_lat_long_with_polygon(self):
    method test_11_extract_rows__with_invalid_lat_lon (line 490) | def test_11_extract_rows__with_invalid_lat_lon(self):
    method test_12_extract_rows_zarr (line 497) | def test_12_extract_rows_zarr(self):
    method test_13_droping_variable_while_opening_zarr (line 525) | def test_13_droping_variable_while_opening_zarr(self):
  class ExtractRowsTifSupportTest (line 554) | class ExtractRowsTifSupportTest(ExtractRowsTestBase):
    method setUp (line 556) | def setUp(self) -> None:
    method test_01_extract_rows_with_end_time (line 561) | def test_01_extract_rows_with_end_time(self):
    method test_02_extract_rows_without_end_time (line 589) | def test_02_extract_rows_without_end_time(self):
  class ExtractRowsGribSupportTest (line 616) | class ExtractRowsGribSupportTest(ExtractRowsTestBase):
    method setUp (line 618) | def setUp(self) -> None:
    method test_01_extract_rows (line 624) | def test_01_extract_rows(self):
    method test_02_extract_rows__with_vars__excludes_non_index_coords__without_schema_normalization (line 654) | def test_02_extract_rows__with_vars__excludes_non_index_coords__withou...
    method test_03_extract_rows__with_vars__includes_coordinates_in_vars__without_schema_normalization (line 677) | def test_03_extract_rows__with_vars__includes_coordinates_in_vars__wit...
    method test_04_extract_rows__with_vars__excludes_non_index_coords__with_schema_normalization (line 704) | def test_04_extract_rows__with_vars__excludes_non_index_coords__with_s...
    method test_05_extract_rows__with_vars__includes_coordinates_in_vars__with_schema_normalization (line 729) | def test_05_extract_rows__with_vars__includes_coordinates_in_vars__wit...
    method test_06_multiple_editions__without_schema_normalization (line 755) | def test_06_multiple_editions__without_schema_normalization(self):
    method test_07_multiple_editions__with_schema_normalization (line 822) | def test_07_multiple_editions__with_schema_normalization(self):
    method test_08_multiple_editions__with_vars__includes_coordinates_in_vars__with_schema_normalization (line 891) | def test_08_multiple_editions__with_vars__includes_coordinates_in_vars...
  class ExtractRowsFromZarrTest (line 925) | class ExtractRowsFromZarrTest(ExtractRowsTestBase):
    method setUp (line 927) | def setUp(self) -> None:
    method tearDown (line 931) | def tearDown(self) -> None:
    method test_01_extracts_rows (line 935) | def test_01_extracts_rows(self):

FILE: weather_mv/loader_pipeline/ee.py
  function is_compute_engine (line 61) | def is_compute_engine() -> bool:
  function get_creds (line 69) | def get_creds(use_personal_account: bool, service_account: str, private_...
  function ee_initialize (line 104) | def ee_initialize(use_personal_account: bool = False,
  class SetupEarthEngine (line 140) | class SetupEarthEngine(RateLimit):
    method __init__ (line 143) | def __init__(self,
    method setup (line 161) | def setup(self, project_id):
    method check_setup (line 169) | def check_setup(self, project_id: t.Optional[str] = None):
    method process (line 179) | def process(self, *args, **kwargs):
  function get_ee_safe_name (line 184) | def get_ee_safe_name(uri: str) -> str:
  class AssetData (line 196) | class AssetData:
  class ToEarthEngine (line 216) | class ToEarthEngine(ToDataSink):
    method add_parser_arguments (line 269) | def add_parser_arguments(cls, subparser: argparse.ArgumentParser):
    method validate_arguments (line 317) | def validate_arguments(cls, known_args: argparse.Namespace, pipeline_a...
    method expand (line 372) | def expand(self, paths):
  class FilterFilesTransform (line 402) | class FilterFilesTransform(SetupEarthEngine, KwargsFactoryMixin):
    method __init__ (line 416) | def __init__(self,
    method process (line 443) | def process(self, uri: str) -> t.Iterator[str]:
  class ConvertToAsset (line 465) | class ConvertToAsset(beam.DoFn, KwargsFactoryMixin):
    method add_to_queue (line 486) | def add_to_queue(self, queue: Queue, item: t.Any):
    method convert_to_asset (line 495) | def convert_to_asset(self, queue: Queue, uri: str):
    method process (line 627) | def process(self, uri: str) -> t.Iterator[AssetData]:
  class IngestIntoEETransform (line 659) | class IngestIntoEETransform(SetupEarthEngine, KwargsFactoryMixin):
    method __init__ (line 673) | def __init__(self,
    method get_project_id (line 697) | def get_project_id(self) -> str:
    method ee_tasks_remaining (line 700) | def ee_tasks_remaining(self) -> int:
    method wait_for_task_queue (line 705) | def wait_for_task_queue(self) -> None:
    method start_ingestion (line 720) | def start_ingestion(self, asset_data: AssetData) -> t.Optional[str]:
    method process (line 823) | def process(self, asset_data: AssetData) -> t.Iterator[t.Tuple[str, fl...

FILE: weather_mv/loader_pipeline/ee_test.py
  class AssetNameCreationTests (line 28) | class AssetNameCreationTests(unittest.TestCase):
    method test_asset_name_creation (line 30) | def test_asset_name_creation(self):
    method test_asset_name_creation__with_special_chars (line 38) | def test_asset_name_creation__with_special_chars(self):
    method test_asset_name_creation__with_missing_filename (line 46) | def test_asset_name_creation__with_missing_filename(self):
    method test_asset_name_creation__with_only_filename (line 54) | def test_asset_name_creation__with_only_filename(self):
  class ConvertToAssetTests (line 63) | class ConvertToAssetTests(TestDataBase):
    method setUp (line 65) | def setUp(self) -> None:
    method tearDown (line 71) | def tearDown(self):
    method test_convert_to_image_asset (line 74) | def test_convert_to_image_asset(self):
    method test_convert_to_image_asset__with_multiple_grib_edition (line 83) | def test_convert_to_image_asset__with_multiple_grib_edition(self):
    method test_convert_to_table_asset (line 92) | def test_convert_to_table_asset(self):
    method test_convert_to_table_asset__with_multiple_grib_edition (line 101) | def test_convert_to_table_asset__with_multiple_grib_edition(self):

FILE: weather_mv/loader_pipeline/execution_test.py
  class ExecutionTest (line 20) | class ExecutionTest(unittest.TestCase):
    method test_run (line 30) | def test_run(self):

FILE: weather_mv/loader_pipeline/metrics.py
  function timeit (line 44) | def timeit(func_name: str, keyed_fn: bool = False):
  class AddTimer (line 109) | class AddTimer(beam.DoFn, KwargsFactoryMixin):
    method process (line 116) | def process(self, element) -> t.Iterator[t.Any]:
  class AddBeamMetrics (line 127) | class AddBeamMetrics(beam.DoFn):
    method __init__ (line 131) | def __init__(self, asset_start_time_format: str):
    method process (line 141) | def process(self, element):
  class CreateTimeSeries (line 192) | class CreateTimeSeries(beam.DoFn):
    method create_time_series_object (line 199) | def create_time_series_object(self, metric_name: str, metric_value: fl...
    method process (line 222) | def process(self, element: t.Any):
  class AddMetrics (line 260) | class AddMetrics(beam.PTransform, KwargsFactoryMixin):
    method expand (line 269) | def expand(self, pcoll: beam.PCollection):

FILE: weather_mv/loader_pipeline/pipeline.py
  function configure_logger (line 35) | def configure_logger(verbosity: int) -> None:
  function pattern_to_uris (line 42) | def pattern_to_uris(match_pattern: str, is_zarr: bool = False) -> t.Iter...
  function pipeline (line 51) | def pipeline(known_args: argparse.Namespace, pipeline_args: t.List[str])...
  function run (line 92) | def run(argv: t.List[str]) -> t.Tuple[argparse.Namespace, t.List[str]]:

FILE: weather_mv/loader_pipeline/pipeline_test.py
  class CLITests (line 21) | class CLITests(unittest.TestCase):
    method setUp (line 22) | def setUp(self) -> None:
  class TestCLI (line 85) | class TestCLI(CLITests):
    method test_dry_runs_are_allowed (line 87) | def test_dry_runs_are_allowed(self):
    method test_log_level_arg (line 91) | def test_log_level_arg(self):
    method test_tif_metadata_for_datetime_raise_error_for_non_tif_file (line 95) | def test_tif_metadata_for_datetime_raise_error_for_non_tif_file(self):
    method test_tif_metadata_for_datetime_raise_error_if_flag_is_absent (line 100) | def test_tif_metadata_for_datetime_raise_error_if_flag_is_absent(self):
    method test_area_only_allows_four (line 104) | def test_area_only_allows_four(self):
    method test_topic_creates_a_streaming_pipeline (line 118) | def test_topic_creates_a_streaming_pipeline(self):
    method test_subscription_creates_a_streaming_pipeline (line 122) | def test_subscription_creates_a_streaming_pipeline(self):
    method test_accepts_json_string_for_xarray_open (line 126) | def test_accepts_json_string_for_xarray_open(self):
    method test_ee_does_not_yet_support_zarr (line 134) | def test_ee_does_not_yet_support_zarr(self):
    method test_rg_zarr_cant_output_netcdf (line 138) | def test_rg_zarr_cant_output_netcdf(self):
    method test_rg_happy_path (line 142) | def test_rg_happy_path(self):
    method test_zarr_kwargs_must_come_with_zarr (line 145) | def test_zarr_kwargs_must_come_with_zarr(self):
    method test_topic_and_subscription__mutually_exclusive (line 149) | def test_topic_and_subscription__mutually_exclusive(self):
    method test_geo_data_parquet_path_must_be_parquet (line 153) | def test_geo_data_parquet_path_must_be_parquet(self):
  class IntegrationTest (line 157) | class IntegrationTest(CLITests):
    method test_dry_runs_are_allowed (line 158) | def test_dry_runs_are_allowed(self):

FILE: weather_mv/loader_pipeline/regrid.py
  function _clear_metview (line 46) | def _clear_metview():
  function _metview_op (line 60) | def _metview_op() -> t.Iterator[None]:
  class MapChunkAsFieldset (line 71) | class MapChunkAsFieldset(beam.PTransform):
    method apply (line 81) | def apply(self, key: xbeam.Key, fs: Fieldset) -> t.Tuple[xbeam.Key, Fi...
    method _apply (line 84) | def _apply(self, key: xbeam.Key, ds: xr.Dataset) -> t.Tuple[xbeam.Key,...
    method expand (line 109) | def expand(self, pcoll):
  class RegridChunk (line 114) | class RegridChunk(MapChunkAsFieldset):
    method template (line 126) | def template(self, source_ds: xr.Dataset) -> xr.Dataset:
    method apply (line 162) | def apply(self, key: xbeam.Key, fs: Fieldset) -> t.Tuple[xbeam.Key, Fi...
  class Regrid (line 167) | class Regrid(ToDataSink):
    method add_parser_arguments (line 191) | def add_parser_arguments(cls, subparser: argparse.ArgumentParser) -> N...
    method validate_arguments (line 209) | def validate_arguments(cls, known_args: argparse.Namespace, pipeline_o...
    method target_from (line 222) | def target_from(self, uri: str) -> str:
    method is_grib_file_corrupt (line 241) | def is_grib_file_corrupt(self, local_grib: str) -> bool:
    method path_exists (line 250) | def path_exists(self, path: str, force_regrid: bool = False) -> bool:
    method apply (line 258) | def apply(self, uri: str) -> None:
    method expand (line 311) | def expand(self, paths):

FILE: weather_mv/loader_pipeline/regrid_test.py
  function make_skin_temperature_dataset (line 34) | def make_skin_temperature_dataset() -> xr.Dataset:
  function metview_cache_exists (line 49) | def metview_cache_exists() -> bool:
  class RegridTest (line 54) | class RegridTest(TestDataBase):
    method setUp (line 58) | def setUp(self) -> None:
    method tearDown (line 73) | def tearDown(self) -> None:
    method test_target_name (line 76) | def test_target_name(self):
    method test_target_name__to_netCDF__changes_ext (line 80) | def test_target_name__to_netCDF__changes_ext(self):
    method test_apply__creates_a_file (line 85) | def test_apply__creates_a_file(self):
    method test_apply__works_when_called_twice (line 95) | def test_apply__works_when_called_twice(self):
    method test_apply__to_netCDF__creates_a_netCDF_file (line 99) | def test_apply__to_netCDF__creates_a_netCDF_file(self):
    method test_zarr__coarsen (line 114) | def test_zarr__coarsen(self):
    method test_corrupt_grib_file (line 138) | def test_corrupt_grib_file(self):

FILE: weather_mv/loader_pipeline/sinks.py
  class KwargsFactoryMixin (line 54) | class KwargsFactoryMixin:
    method from_kwargs (line 57) | def from_kwargs(cls, **kwargs):
  class ToDataSink (line 67) | class ToDataSink(abc.ABC, beam.PTransform, KwargsFactoryMixin):
    method add_parser_arguments (line 75) | def add_parser_arguments(cls, subparser: argparse.ArgumentParser) -> N...
    method validate_arguments (line 80) | def validate_arguments(cls, known_args: argparse.Namespace, pipeline_o...
  function _make_grib_dataset_inmem (line 84) | def _make_grib_dataset_inmem(grib_ds: xr.Dataset) -> xr.Dataset:
  function match_datetime (line 96) | def match_datetime(file_name: str, regex_expression: str) -> datetime.da...
  function _preprocess_tif (line 149) | def _preprocess_tif(
  function _to_utc_timestring (line 230) | def _to_utc_timestring(np_time: np.datetime64) -> str:
  function _add_is_normalized_attr (line 236) | def _add_is_normalized_attr(ds: xr.Dataset, value: bool) -> xr.Dataset:
  function _is_3d_da (line 246) | def _is_3d_da(da):
  function __normalize_grib_dataset (line 251) | def __normalize_grib_dataset(filename: str,
  function __open_dataset_file (line 345) | def __open_dataset_file(filename: str,
  function copy (line 398) | def copy(src: str, dst: str) -> None:
  function open_local (line 416) | def open_local(uri: str) -> t.Iterator[str]:
  function open_dataset (line 438) | def open_dataset(uri: str,
  function get_file_time (line 522) | def get_file_time(element: t.Any) -> int:

FILE: weather_mv/loader_pipeline/sinks_test.py
  class TestDataBase (line 28) | class TestDataBase(unittest.TestCase):
    method setUp (line 29) | def setUp(self) -> None:
  function _handle_missing_grib_be (line 33) | def _handle_missing_grib_be(f):
  function limit_memory (line 48) | def limit_memory(max_memory=30):
  function write_netcdf (line 62) | def write_netcdf():
  class OpenDatasetTest (line 81) | class OpenDatasetTest(TestDataBase):
    method setUp (line 83) | def setUp(self) -> None:
    method test_opens_grib_files (line 90) | def test_opens_grib_files(self):
    method test_accepts_xarray_kwargs (line 98) | def test_accepts_xarray_kwargs(self):
    method test_opens_tif_files (line 106) | def test_opens_tif_files(self):
    method test_opens_zarr (line 112) | def test_opens_zarr(self):
    method test_open_dataset__fits_memory_bounds (line 117) | def test_open_dataset__fits_memory_bounds(self):
    method test_group_common_hypercubes (line 123) | def test_group_common_hypercubes(self):
  class DatetimeTest (line 129) | class DatetimeTest(unittest.TestCase):
    method test_datetime_regex_string (line 131) | def test_datetime_regex_string(self):
    method test_datetime_regex_string_with_missing_parameters (line 141) | def test_datetime_regex_string_with_missing_parameters(self):
    method test_datetime_regex_string_with_different_order (line 151) | def test_datetime_regex_string_with_different_order(self):

FILE: weather_mv/loader_pipeline/streaming.py
  class GroupMessagesByFixedWindows (line 34) | class GroupMessagesByFixedWindows(beam.PTransform):
    method __init__ (line 39) | def __init__(self, window_size: int, num_shards: int = 5):
    method expand (line 44) | def expand(self, pcoll):
  class AddTimestamp (line 58) | class AddTimestamp(beam.DoFn):
    method process (line 63) | def process(self, element, publish_time=beam.DoFn.TimestampParam) -> t...
  class ParsePaths (line 72) | class ParsePaths(beam.DoFn):
    method __init__ (line 75) | def __init__(self, uri_pattern: str):
    method try_parse_message (line 81) | def try_parse_message(cls, message_body: t.Union[str, t.Dict]) -> t.Dict:
    method to_object_path (line 91) | def to_object_path(self, payload: t.Dict) -> str:
    method should_skip (line 95) | def should_skip(self, message_body: t.Dict) -> bool:
    method process (line 102) | def process(self, key_value, window=beam.DoFn.WindowParam) -> t.Iterab...

FILE: weather_mv/loader_pipeline/streaming_test.py
  class ParsePathsTests (line 20) | class ParsePathsTests(unittest.TestCase):
    method setUp (line 21) | def setUp(self) -> None:
    method test_parse_message (line 48) | def test_parse_message(self):
    method test_parse_message__already_is_dict (line 52) | def test_parse_message__already_is_dict(self):
    method test_parse_message__bad_json (line 56) | def test_parse_message__bad_json(self):
    method test_parse_message__round_trip (line 60) | def test_parse_message__round_trip(self):
    method test_should_skip (line 67) | def test_should_skip(self):
    method test_should_skip__missing_values (line 71) | def test_should_skip__missing_values(self):
    method test_should_skip__mismatch_pattern (line 76) | def test_should_skip__mismatch_pattern(self):

FILE: weather_mv/loader_pipeline/util.py
  function make_attrs_ee_compatible (line 53) | def make_attrs_ee_compatible(attrs: t.Dict) -> t.Dict:
  function to_json_serializable_type (line 83) | def to_json_serializable_type(value: t.Any) -> t.Any:
  function _check_for_coords_vars (line 133) | def _check_for_coords_vars(ds_data_var: str, target_var: str) -> bool:
  function get_utc_timestamp (line 138) | def get_utc_timestamp() -> float:
  function _only_target_coordinate_vars (line 142) | def _only_target_coordinate_vars(ds: xr.Dataset, data_vars: t.List[str])...
  function _only_target_vars (line 159) | def _only_target_vars(ds: xr.Dataset, data_vars: t.Optional[t.List[str]]...
  function ichunked (line 194) | def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]:
  function get_coordinates (line 207) | def get_coordinates(ds: xr.Dataset, uri: str = '') -> t.Iterator[t.Dict]:
  function _cleanup_bigquery (line 240) | def _cleanup_bigquery(bigquery_client: bigquery.Client,
  function _cleanup_bucket (line 252) | def _cleanup_bucket(storage_client: storage.Client,
  function validate_region (line 266) | def validate_region(output_table: t.Optional[str] = None,
  function _shard (line 330) | def _shard(elem, num_shards: int):
  class Shard (line 334) | class Shard(beam.DoFn):
    method __init__ (line 336) | def __init__(self, num_shards: int, use_metrics: bool):
    method process (line 342) | def process(self, element, *args, **kwargs):
  class RateLimit (line 345) | class RateLimit(beam.PTransform, abc.ABC):
    method __init__ (line 352) | def __init__(self,
    method process (line 384) | def process(self, elem: t.Any):
    method expand (line 398) | def expand(self, pcol: beam.PCollection):
  class _RateLimitDoFn (line 406) | class _RateLimitDoFn(beam.DoFn):
    method __init__ (line 409) | def __init__(self, rate_limit_fn: t.Callable, wait_time: datetime.time...
    method process (line 414) | def process(self, keyed_elem: t.Tuple[t.Any, t.Iterable[t.Any]]):

FILE: weather_mv/loader_pipeline/util_test.py
  class GetCoordinatesTest (line 32) | class GetCoordinatesTest(TestDataBase):
    method setUp (line 33) | def setUp(self) -> None:
    method test_gets_indexed_coordinates (line 37) | def test_gets_indexed_coordinates(self):
    method test_no_duplicate_coordinates (line 44) | def test_no_duplicate_coordinates(self):
  class IChunksTests (line 52) | class IChunksTests(TestDataBase):
    method setUp (line 53) | def setUp(self) -> None:
    method test_even_chunks (line 59) | def test_even_chunks(self):
    method test_odd_chunks (line 72) | def test_odd_chunks(self):
    method test_get_coordinates (line 83) | def test_get_coordinates(self):
  class MakeAttrsEeCompatibleTests (line 111) | class MakeAttrsEeCompatibleTests(TestDataBase):
    method test_make_attrs_ee_compatible_a1 (line 112) | def test_make_attrs_ee_compatible_a1(self):
    method test_make_attrs_ee_compatible_a2 (line 172) | def test_make_attrs_ee_compatible_a2(self):
  class ToJsonSerializableTypeTests (line 195) | class ToJsonSerializableTypeTests(unittest.TestCase):
    method _convert (line 197) | def _convert(self, value):
    method test_to_json_serializable_type_none (line 200) | def test_to_json_serializable_type_none(self):
    method test_to_json_serializable_type_float (line 207) | def test_to_json_serializable_type_float(self):
    method test_to_json_serializable_type_int (line 218) | def test_to_json_serializable_type_int(self):
    method test_to_json_serializable_type_set (line 226) | def test_to_json_serializable_type_set(self):
    method test_to_json_serializable_type_ndarray (line 233) | def test_to_json_serializable_type_ndarray(self):
    method test_to_json_serializable_type_datetime (line 240) | def test_to_json_serializable_type_datetime(self):

FILE: weather_sp/splitter_pipeline/__init__.py
  function cli (line 18) | def cli(extra=[]):

FILE: weather_sp/splitter_pipeline/file_name_utils.py
  class OutFileInfo (line 28) | class OutFileInfo:
    method __repr__ (line 45) | def __repr__(self):
    method unformatted_output_path (line 48) | def unformatted_output_path(self):
    method split_dims (line 52) | def split_dims(self) -> t.List[str]:
    method formatted_output_path (line 57) | def formatted_output_path(self, splits: t.Dict[str, str]) -> str:
  function get_output_file_info (line 62) | def get_output_file_info(filename: str,

FILE: weather_sp/splitter_pipeline/file_name_utils_test.py
  class FileNameUtilsTest (line 20) | class FileNameUtilsTest(unittest.TestCase):
    method test_get_output_file_info_pattern (line 22) | def test_get_output_file_info_pattern(self):
    method test_get_output_file_info_dir (line 35) | def test_get_output_file_info_dir(self):
    method test_get_output_file_info_no_fileending (line 48) | def test_get_output_file_info_no_fileending(self):
    method test_get_output_file_info_filecontainsdots (line 61) | def test_get_output_file_info_filecontainsdots(self):
    method test_get_output_file_info_dir_no_formatting (line 73) | def test_get_output_file_info_dir_no_formatting(self):
    method test_output_pattern_ignores_formatting (line 80) | def test_output_pattern_ignores_formatting(self):
    method test_split_dims (line 94) | def test_split_dims(self):

FILE: weather_sp/splitter_pipeline/file_splitters.py
  function copy (line 53) | def copy(src: str, dst: str) -> None:
  class FileSplitter (line 70) | class FileSplitter(abc.ABC):
    method __init__ (line 73) | def __init__(self, input_path: str, output_info: OutFileInfo,
    method split_data (line 86) | def split_data(self) -> None:
    method _copy_to_local_file (line 90) | def _copy_to_local_file(self) -> t.Iterator[t.IO]:
    method should_skip (line 109) | def should_skip(self):
    method should_skip_file (line 122) | def should_skip_file(self, output_path: str) -> bool:
  class GribSplitter (line 137) | class GribSplitter(FileSplitter):
    method split_data (line 139) | def split_data(self) -> None:
    method _open_grib_locally (line 187) | def _open_grib_locally(self) -> t.Iterator[t.Iterator[pygrib.gribmessa...
  class GribSplitterV2 (line 193) | class GribSplitterV2(GribSplitter):
    method replace_non_numeric_bracket (line 199) | def replace_non_numeric_bracket(self, match: re.Match) -> str:
    method split_data (line 203) | def split_data(self) -> None:
  class NetCdfSplitter (line 274) | class NetCdfSplitter(FileSplitter):
    method split_data (line 278) | def split_data(self) -> None:
    method _open_dataset_locally (line 314) | def _open_dataset_locally(self) -> t.Iterator[xr.Dataset]:
    method _write_dataset (line 320) | def _write_dataset(self, dataset: xr.Dataset, split_dims: t.List[str])...
    method _get_output_for_dataset (line 329) | def _get_output_for_dataset(self, dataset: xr.Dataset, split_dims: t.L...
  class DrySplitter (line 339) | class DrySplitter(FileSplitter):
    method split_data (line 341) | def split_data(self) -> None:
    method _get_keys (line 347) | def _get_keys(self) -> t.Dict[str, str]:
  function get_splitter (line 351) | def get_splitter(file_path: str,

FILE: weather_sp/splitter_pipeline/file_splitters_test.py
  function data_dir (line 39) | def data_dir():
  function grib_splitter (line 48) | def grib_splitter(request):
  class TestGetSplitter (line 52) | class TestGetSplitter:
    method test_get_splitter_grib (line 54) | def test_get_splitter_grib(self, data_dir):
    method test_get_splitter_nc (line 64) | def test_get_splitter_nc(self, data_dir):
    method test_get_splitter_undetermined_grib (line 75) | def test_get_splitter_undetermined_grib(self, data_dir):
    method test_get_splitter_dryrun (line 85) | def test_get_splitter_dryrun(self):
  class TestGribSplitter (line 96) | class TestGribSplitter:
    method test_get_output_file_path (line 97) | def test_get_output_file_path(self, grib_splitter):
    method test_split_data_with_filter (line 110) | def test_split_data_with_filter(self, data_dir):
    method test_split_data (line 146) | def test_split_data(self, data_dir, grib_splitter):
    method test_skips_existing_split (line 180) | def test_skips_existing_split(self, data_dir, grib_splitter):
    method test_skips_existing_split_with_filter (line 195) | def test_skips_existing_split_with_filter(self, mock_should_skip_file,...
    method test_does_not_skip__if_forced (line 208) | def test_does_not_skip__if_forced(self, data_dir, grib_splitter):
    method test_split__fits_memory_bounds (line 226) | def test_split__fits_memory_bounds(self, data_dir, grib_splitter):
  class TestNetCdfSplitter (line 240) | class TestNetCdfSplitter:
    method test_get_output_file_path (line 242) | def test_get_output_file_path(self):
    method test_split_data (line 252) | def test_split_data(self, data_dir):
    method test_split_data__not_in_dims_raises (line 272) | def test_split_data__not_in_dims_raises(self, data_dir):
    method test_split_data__unsupported_dim_raises (line 283) | def test_split_data__unsupported_dim_raises(self, data_dir):
    method test_skips_existing_split (line 294) | def test_skips_existing_split(self, data_dir):
    method test_does_not_skip__if_forced (line 307) | def test_does_not_skip__if_forced(self, data_dir):
    method test_split_data__fits_memory_bounds (line 323) | def test_split_data__fits_memory_bounds(self, data_dir):
    method test_split_data__is_netcdf4 (line 335) | def test_split_data__is_netcdf4(self, data_dir):
  class TestDrySplitter (line 354) | class TestDrySplitter:
    method test_path_with_output_pattern (line 356) | def test_path_with_output_pattern(self):
    method test_path_with_output_pattern_no_formatting (line 367) | def test_path_with_output_pattern_no_formatting(self):

FILE: weather_sp/splitter_pipeline/pipeline.py
  function configure_logger (line 34) | def configure_logger(verbosity: int) -> None:
  function split_file (line 41) | def split_file(input_file: str,
  function _get_base_input_directory (line 68) | def _get_base_input_directory(input_pattern: str) -> str:
  function get_output_base_name (line 76) | def get_output_base_name(input_path: str,
  function run (line 88) | def run(argv: t.List[str], save_main_session: bool = True):

FILE: weather_sp/splitter_pipeline/pipeline_test.py
  class PipelineTest (line 24) | class PipelineTest(unittest.TestCase):
    method test_get_base_input_directory (line 26) | def test_get_base_input_directory(self):
    method test_get_output_base_name (line 40) | def test_get_output_base_name(self):
    method test_split_file (line 50) | def test_split_file(self, mock_get_splitter):
    method test_split_file_with_filter (line 65) | def test_split_file_with_filter(self, mock_get_splitter):

FILE: weather_sp/splitter_pipeline/streaming.py
  class GroupMessagesByFixedWindows (line 34) | class GroupMessagesByFixedWindows(beam.PTransform):
    method __init__ (line 39) | def __init__(self, window_size: int, num_shards: int = 5):
    method expand (line 44) | def expand(self, pcoll):
  class AddTimestamp (line 58) | class AddTimestamp(beam.DoFn):
    method process (line 63) | def process(self, element, publish_time=beam.DoFn.TimestampParam) -> t...
  class ParsePaths (line 72) | class ParsePaths(beam.DoFn):
    method __init__ (line 75) | def __init__(self, uri_pattern: str):
    method try_parse_message (line 81) | def try_parse_message(cls, message_body: t.Union[str, t.Dict]) -> t.Dict:
    method to_object_path (line 91) | def to_object_path(self, payload: t.Dict) -> str:
    method should_skip (line 95) | def should_skip(self, message_body: t.Dict) -> bool:
    method process (line 102) | def process(self, key_value, window=beam.DoFn.WindowParam) -> t.Iterab...

FILE: xql/main.py
  function parse_args (line 22) | def parse_args() -> Tuple[argparse.Namespace, List[str]]:

FILE: xql/src/weather_lm/gemini.py
  function nl_to_sql_query (line 25) | def nl_to_sql_query(input_statement: str) -> str:
  function nl_to_weather_data (line 83) | def nl_to_weather_data(input_statement: str):

FILE: xql/src/weather_lm/utils.py
  function get_table_map (line 26) | def get_table_map() -> t.Dict:
  function get_table_map_prompt (line 39) | def get_table_map_prompt() -> t.Tuple:
  function get_invocation_steps (line 56) | def get_invocation_steps(prompt: PromptTemplate, model: ChatGoogleGenera...

FILE: xql/src/xql/apply.py
  function parse (line 62) | def parse(a: t.Union[xr.DataArray, str], b: t.Union[xr.DataArray, str]) ...
  function apply_orderby (line 87) | def apply_orderby(e: exp.Order, df: pd.DataFrame) -> pd.DataFrame:
  function aggregate_variables (line 114) | def aggregate_variables(agg_funcs: t.List[t.Dict[str, str]],
  function apply_group_by (line 160) | def apply_group_by(time_fields: t.List[str], ds: xr.Dataset, agg_funcs: ...
  function apply_aggregation (line 188) | def apply_aggregation(groups: t.Union[xr.Dataset, DatasetGroupBy], fun: ...
  function get_coords_to_squeeze (line 205) | def get_coords_to_squeeze(fields: t.List[str], ds: xr.Dataset) -> t.List...
  function get_table (line 225) | def get_table(e: exp.Expression) -> str:
  function parse_query (line 245) | def parse_query(query: str) -> xr.Dataset:
  function convert_to_dataframe (line 304) | def convert_to_dataframe(ds: xr.Dataset) -> pd.DataFrame:
  function filter_records (line 326) | def filter_records(df: pd.DataFrame, query: str) -> pd.DataFrame:
  function set_dataset_table (line 367) | def set_dataset_table(cmd: str) -> None:
  function list_key_values (line 389) | def list_key_values(input: t.Dict[str, str]) -> None:
  function display_help (line 400) | def display_help(cmd: str) -> None:
  function display_table_dataset_map (line 420) | def display_table_dataset_map(cmd: str) -> None:
  function run_query (line 439) | def run_query(query: str) -> None:
  function main (line 454) | def main(query: str):

FILE: xql/src/xql/open.py
  function get_chunking (line 30) | def get_chunking(uri: str, variables: t.List[str]) -> t.Dict:
  function open_dataset (line 69) | def open_dataset(uri: str) -> t.Tuple[xr.Dataset, bool]:

FILE: xql/src/xql/utils.py
  function timing (line 23) | def timing(f):
  function connect_dask_cluster (line 35) | def connect_dask_cluster() -> None:

FILE: xql/src/xql/where.py
  function parse (line 24) | def parse(a: t.Union[xr.DataArray, str], b: t.Union[xr.DataArray, str]) ...
  function get_sop_terms (line 48) | def get_sop_terms(expression: exp.Expression):
  function check_conditional (line 79) | def check_conditional(expression: exp.Expression) -> bool:
  function parse_condition (line 85) | def parse_condition(expression: exp.Expression, condition_dict: dict) ->...
  function is_ascending_order (line 109) | def is_ascending_order(da: xr.DataArray) -> bool:
  function select_coordinate (line 116) | def select_coordinate(da: xr.DataArray, coordinate: str, operator: str, ...
  function get_coords_condition (line 140) | def get_coords_condition(coordinate: str, condition: t.Dict[str, str]):
  function filter_condition_dict (line 158) | def filter_condition_dict(condition_dict: dict, ds: xr.Dataset):
  function apply_select_condition (line 173) | def apply_select_condition(ds: xr.Dataset, condition_dict: dict) -> xr.D...
  function postorder (line 209) | def postorder(expression: exp.Expression, condition_dict: dict):
  function apply_where (line 229) | def apply_where(ds: xr.Dataset, expression: exp.Expression) -> xr.Dataset:
Condensed preview — 238 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,100K chars).
[
  {
    "path": ".github/workflows/ci.yml",
    "chars": 4425,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": ".github/workflows/publish.yml",
    "chars": 3057,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": ".gitignore",
    "chars": 2096,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": ".read-the-docs.yaml",
    "chars": 494,
    "preview": "# .readthedocs.yaml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html f"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 6662,
    "preview": "# How to Contribute\n\nWe'd love to accept your patches and contributions to this project. There are just a few small guid"
  },
  {
    "path": "Configuration.md",
    "chars": 14610,
    "preview": "# Configuration Files\n\nConfig files describe both _what_ to download and _how_ it should be downloaded. To this end, con"
  },
  {
    "path": "Dockerfile",
    "chars": 1720,
    "preview": "\n# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this "
  },
  {
    "path": "Efficient-Requests.md",
    "chars": 383,
    "preview": "# Writing Efficient Data Requests\n\nTODO([#26](https://github.com/googlestaging/weather-tools/issues/26)). In the mean-ti"
  },
  {
    "path": "LICENSE",
    "chars": 11358,
    "preview": "\n                                 Apache License\n                           Version 2.0, January 2004\n                  "
  },
  {
    "path": "MANIFEST.in",
    "chars": 198,
    "preview": "global-exclude *.nc\nglobal-exclude *.gb *.grib\nglobal-exclude *.bz2\nglobal-exclude *.yaml *.yml\nglobal-exclude *_test.py"
  },
  {
    "path": "README.md",
    "chars": 8433,
    "preview": "# weather-tools\n\nApache Beam pipelines to make weather data accessible and useful.\n\n[![CI](https://github.com/googlestag"
  },
  {
    "path": "Runners.md",
    "chars": 2761,
    "preview": "# Choosing a Beam Runner\n\nAll tools use Apache Beam pipelines. By default, pipelines run locally using the `DirectRunner"
  },
  {
    "path": "Runtime-Container.md",
    "chars": 1296,
    "preview": "# Runtime Containers\n\n_How to build & public custom Dataflow images in GCS_\n\n*Pre-requisites*: Install the gcloud CLI (["
  },
  {
    "path": "bin/install-branch",
    "chars": 775,
    "preview": "#!/usr/bin/env sh\n# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you "
  },
  {
    "path": "bin/post-push",
    "chars": 792,
    "preview": "#!/usr/bin/env sh\n# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you "
  },
  {
    "path": "ci3.8.yml",
    "chars": 846,
    "preview": "name: weather-tools\nchannels:\n  - conda-forge\n  - defaults\ndependencies:\n  - python=3.8.13\n  - apache-beam=2.40.0\n  - py"
  },
  {
    "path": "ci3.9.yml",
    "chars": 846,
    "preview": "name: weather-tools\nchannels:\n  - conda-forge\n  - defaults\ndependencies:\n  - python=3.9.13\n  - apache-beam=2.40.0\n  - py"
  },
  {
    "path": "configs/era5_example_config.cfg",
    "chars": 1177,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/era5_example_config_local_run.cfg",
    "chars": 1306,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/era5_example_config_local_run.json",
    "chars": 488,
    "preview": "{\n  \"parameters\": {\n    \"client\": \"cds\",\n    \"dataset\": \"reanalysis-era5-pressure-levels\",\n    \"target_path\": \"era5-{}{}"
  },
  {
    "path": "configs/era5_example_config_preproc.cfg",
    "chars": 1118,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/era5_example_config_using_date.cfg",
    "chars": 1344,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/era5_example_monthly_soil.cfg",
    "chars": 992,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/mars_example_config.cfg",
    "chars": 1275,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/mars_example_config.json",
    "chars": 751,
    "preview": "{\n  \"parameters\": {\n    \"client\": \"mars\",\n    \"dataset\": \"ecmwf-mars-output\",\n    \"target_path\": \"gs://ecmwf-downloads/h"
  },
  {
    "path": "configs/multiple_multiple_licenses/era5_pressure500.cfg",
    "chars": 1460,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/multiple_multiple_licenses/era5_pressure600.cfg",
    "chars": 1460,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/multiple_multiple_licenses/era5_pressure700.cfg",
    "chars": 1460,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/multiple_single_license/era5_pressure500.cfg",
    "chars": 1177,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/multiple_single_license/era5_pressure600.cfg",
    "chars": 1177,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/multiple_single_license/era5_pressure700.cfg",
    "chars": 1177,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/s2s_operational_forecast_example.cfg",
    "chars": 1474,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/seasonal_forecast_example_config.cfg",
    "chars": 988,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/tigge_example_config.cfg",
    "chars": 854,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "configs/yesterdays_surface_example.cfg",
    "chars": 759,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "docs/Makefile",
    "chars": 634,
    "preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
  },
  {
    "path": "docs/Private-IP-Configuration.md",
    "chars": 5910,
    "preview": "# Private IP Configuration \n\n_A Guide for Dataflow Pipeline Execution_\n\n## Goals\nIn this document, we’ll describe how to"
  },
  {
    "path": "docs/_static/custom.css",
    "chars": 98,
    "preview": "p {\n    text-align: justify;\n}\n\nbody {\n   min-width: 250px;\n}\n\ndiv.body {\n    min-width: 250px;\n}\n"
  },
  {
    "path": "docs/conf.py",
    "chars": 2784,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "docs/download_pipeline.md",
    "chars": 1016,
    "preview": "# download_pipeline package\n=========================\n\n## Submodules\n\n### download_pipeline.clients module\n\n```{eval-rst"
  },
  {
    "path": "docs/index.md",
    "chars": 418,
    "preview": "\n```{include} README.md\n```\n\n## Contents\n\n```{eval-rst}\n.. toctree::\n   :maxdepth: 2\n   :titlesonly:\n   :glob:\n   :inclu"
  },
  {
    "path": "docs/loader_pipeline.md",
    "chars": 323,
    "preview": "# loader_pipeline package\n\n## Submodules\n\n### loader_pipeline.netcdf_loader module\n\n```{eval-rst}\n.. automodule:: weathe"
  },
  {
    "path": "docs/make.bat",
    "chars": 1361,
    "preview": "REM Copyright 2021 Google LLC\nREM\nREM Licensed under the Apache License, Version 2.0 (the \"License\");\nREM you may not us"
  },
  {
    "path": "docs/modules.md",
    "chars": 94,
    "preview": "# Modules\n\n```{toctree}\n:maxdepth: 4\ndownload_pipeline\nloader_pipeline\nsplitter_pipeline\n```\n\n"
  },
  {
    "path": "docs/requirements.txt",
    "chars": 61,
    "preview": "# doc requirements\nmyst-parser==0.13.7\nsphinx>=2.1\nJinja2<3.1"
  },
  {
    "path": "docs/splitter_pipeline.md",
    "chars": 540,
    "preview": "# splitter_pipeline package\n=========================\n\n## Submodules\n\n### splitter_pipeline.file_splitters module\n\n\n```{"
  },
  {
    "path": "environment.yml",
    "chars": 866,
    "preview": "name: weather-tools\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.8.13\n  - apache-beam=2.40.0\n  - xarray-beam=0.6"
  },
  {
    "path": "pyproject.toml",
    "chars": 885,
    "preview": "[tool.ruff]\n# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.\nselect = [\"E\", \"F\", \"W\"]\nignore = []\n\n# Allo"
  },
  {
    "path": "setup.cfg",
    "chars": 645,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "setup.py",
    "chars": 4111,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "tox.ini",
    "chars": 908,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/MANIFEST.in",
    "chars": 70,
    "preview": "global-exclude *_test.py\ninclude README.md\nexclude download_status*.py"
  },
  {
    "path": "weather_dl/README.md",
    "chars": 6528,
    "preview": "# ⛈ `weather-dl` – Weather Downloader\n\nWeather Downloader ingests weather data to cloud buckets, such\nas [Google Cloud S"
  },
  {
    "path": "weather_dl/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/__init__.py",
    "chars": 684,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/clients.py",
    "chars": 16803,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/clients_test.py",
    "chars": 1249,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/config.py",
    "chars": 4779,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/config_test.py",
    "chars": 5846,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/fetcher.py",
    "chars": 4182,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/fetcher_test.py",
    "chars": 7865,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/manifest.py",
    "chars": 30773,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/manifest_test.py",
    "chars": 3928,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/parsers.py",
    "chars": 18907,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/parsers_test.py",
    "chars": 44117,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/partition.py",
    "chars": 10535,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/partition_test.py",
    "chars": 25507,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/pipeline.py",
    "chars": 11859,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/pipeline_test.py",
    "chars": 6670,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/stores.py",
    "chars": 4076,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/stores_test.py",
    "chars": 2182,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/util.py",
    "chars": 8489,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/download_pipeline/util_test.py",
    "chars": 3252,
    "preview": "import unittest\n\nfrom .util import fetch_geo_polygon, generate_hdate, ichunked\n\n\n# TODO(#245): Duplicate tests; remove.\n"
  },
  {
    "path": "weather_dl/setup.py",
    "chars": 1871,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl/weather-dl",
    "chars": 2838,
    "preview": "#!/usr/bin/env python3\n# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
  },
  {
    "path": "weather_dl_v2/README.md",
    "chars": 1064,
    "preview": "## weather-dl-v2\n\n<!-- TODO: Create a setup script to ease deployment process. -->\n\n> **_NOTE:_**   weather-dl-v2 only s"
  },
  {
    "path": "weather_dl_v2/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/CLI-Documentation.md",
    "chars": 6731,
    "preview": "# CLI Documentation\nThe following doc provides cli commands and their various arguments and options.\n\nBase Command:\n```\n"
  },
  {
    "path": "weather_dl_v2/cli/Dockerfile",
    "chars": 1664,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/README.md",
    "chars": 1951,
    "preview": "# weather-dl-cli\nThis is a command line interface for talking to the weather-dl-v2 FastAPI server.\n\n- Due to our org lev"
  },
  {
    "path": "weather_dl_v2/cli/VERSION.txt",
    "chars": 5,
    "preview": "1.0.4"
  },
  {
    "path": "weather_dl_v2/cli/app/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/cli_config.py",
    "chars": 1860,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/data/cli_config.json",
    "chars": 47,
    "preview": "{\n    \"pod_ip\": \"<pod_ip>\",\n    \"port\": 8080\n}\n"
  },
  {
    "path": "weather_dl_v2/cli/app/main.py",
    "chars": 1504,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/services/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/services/download_service.py",
    "chars": 3815,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/services/license_service.py",
    "chars": 3690,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/services/network_service.py",
    "chars": 3269,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/services/queue_service.py",
    "chars": 3673,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/subcommands/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/subcommands/config.py",
    "chars": 2340,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/subcommands/download.py",
    "chars": 4560,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/subcommands/license.py",
    "chars": 5570,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/subcommands/queue.py",
    "chars": 5346,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/app/utils.py",
    "chars": 5155,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/environment.yml",
    "chars": 199,
    "preview": "name: weather-dl-v2-cli\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.10\n  - pip=23.0.1\n  - typer=0.9.0\n  - tabul"
  },
  {
    "path": "weather_dl_v2/cli/setup.py",
    "chars": 1120,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/cli/vm-startup.sh",
    "chars": 139,
    "preview": "#! /bin/bash\n\ncommand=\"docker exec -it \\\\\\$(docker ps -qf name=weather-dl-v2-cli) /bin/bash\"\nsudo sh -c \"echo \\\"$command"
  },
  {
    "path": "weather_dl_v2/cloudbuild.yml",
    "chars": 515,
    "preview": "steps:\n- name: 'gcr.io/cloud-builders/docker'\n  args: ['build', '-t', 'gcr.io/$_PROJECT_ID/$_REPO:$_SERVICE', '$_FOLDER'"
  },
  {
    "path": "weather_dl_v2/config.json",
    "chars": 381,
    "preview": "{\n    \"download_collection\": \"download\",\n    \"queues_collection\": \"queues\",\n    \"license_collection\": \"license\",\n    \"ma"
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/Dockerfile",
    "chars": 1743,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/README.md",
    "chars": 594,
    "preview": "# Deployment / Usage Instruction \n\n### User authorization required to set up the environment:\n* roles/container.admin\n\n#"
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/VERSION.txt",
    "chars": 5,
    "preview": "1.0.2"
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/downloader.py",
    "chars": 3415,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/downloader_config.py",
    "chars": 1985,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/environment.yml",
    "chars": 327,
    "preview": "name: weather-dl-v2-downloader\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.10\n  - google-cloud-sdk=410.0.0\n  - "
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/manifest.py",
    "chars": 17832,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/downloader_kubernetes/util.py",
    "chars": 7755,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/API-Interactions.md",
    "chars": 1703,
    "preview": "# API Interactions\n| Command | Type | Endpoint |\n|---|---|---|\n| `weather-dl-v2 ping` | `get` | `/`\n| Download  |   |   "
  },
  {
    "path": "weather_dl_v2/fastapi-server/Dockerfile",
    "chars": 1393,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/README.md",
    "chars": 3806,
    "preview": "# Deployment Instructions & General Notes\n\n### User authorization required to set up the environment:\n* roles/container."
  },
  {
    "path": "weather_dl_v2/fastapi-server/VERSION.txt",
    "chars": 6,
    "preview": "1.0.12"
  },
  {
    "path": "weather_dl_v2/fastapi-server/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/config_processing/config.py",
    "chars": 4781,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/config_processing/manifest.py",
    "chars": 18452,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/config_processing/parsers.py",
    "chars": 17718,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/config_processing/partition.py",
    "chars": 4587,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/config_processing/pipeline.py",
    "chars": 2606,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/config_processing/stores.py",
    "chars": 3990,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/config_processing/util.py",
    "chars": 8448,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/database/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/database/download_handler.py",
    "chars": 5274,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/database/license_handler.py",
    "chars": 6799,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/database/manifest_handler.py",
    "chars": 6146,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/database/queue_handler.py",
    "chars": 8750,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/database/session.py",
    "chars": 2013,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/database/storage_handler.py",
    "chars": 2106,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/environment.yml",
    "chars": 307,
    "preview": "name: weather-dl-v2-server\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.10\n  - xarray\n  - geojson\n  - pip=22.3\n "
  },
  {
    "path": "weather_dl_v2/fastapi-server/example.cfg",
    "chars": 615,
    "preview": "[parameters]\nclient=mars\n\ntarget_path=gs://<bucket-path>/test-weather-dl-v2/{date}T00z.gb\npartition_keys=\n  date\n  # ste"
  },
  {
    "path": "weather_dl_v2/fastapi-server/license_dep/deployment_creator.py",
    "chars": 2538,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml",
    "chars": 895,
    "preview": "# weather-dl-v2-license-dep Deployment\n# Defines the deployment of the app running in a pod on any worker node\napiVersio"
  },
  {
    "path": "weather_dl_v2/fastapi-server/logging.conf",
    "chars": 771,
    "preview": "[loggers]\nkeys=root,server\n\n[handlers]\nkeys=consoleHandler,detailedConsoleHandler\n\n[formatters]\nkeys=normalFormatter,det"
  },
  {
    "path": "weather_dl_v2/fastapi-server/main.py",
    "chars": 2431,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/routers/download.py",
    "chars": 13948,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/routers/license.py",
    "chars": 9187,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/routers/queues.py",
    "chars": 4782,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/server.yaml",
    "chars": 2254,
    "preview": "# Due to our org level policy we can't expose external-ip. \n# In case your project don't have any such restriction a\n# t"
  },
  {
    "path": "weather_dl_v2/fastapi-server/server_config.py",
    "chars": 2164,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/tests/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/tests/integration/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/tests/integration/test_download.py",
    "chars": 5389,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/tests/integration/test_license.py",
    "chars": 5921,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/tests/integration/test_queues.py",
    "chars": 4670,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/fastapi-server/tests/test_data/example.cfg",
    "chars": 615,
    "preview": "[parameters]\nclient=mars\n\ntarget_path=gs://<bucket-path>/test-weather-dl-v2/{date}T00z.gb\npartition_keys=\n  date\n  # ste"
  },
  {
    "path": "weather_dl_v2/fastapi-server/tests/test_data/not_exist.cfg",
    "chars": 615,
    "preview": "[parameters]\nclient=mars\n\ntarget_path=gs://<bucket-path>/test-weather-dl-v2/{date}T00z.gb\npartition_keys=\n  date\n  # ste"
  },
  {
    "path": "weather_dl_v2/license_deployment/Dockerfile",
    "chars": 1129,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/README.md",
    "chars": 433,
    "preview": "# Deployment Instructions & General Notes\n\n### How to create environment\n```\nconda env create --name weather-dl-v2-licen"
  },
  {
    "path": "weather_dl_v2/license_deployment/VERSION.txt",
    "chars": 6,
    "preview": "1.0.9\n"
  },
  {
    "path": "weather_dl_v2/license_deployment/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/clients.py",
    "chars": 15271,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/config.py",
    "chars": 4781,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/database.py",
    "chars": 6180,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/deployment_config.py",
    "chars": 2134,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/downloader.yaml",
    "chars": 859,
    "preview": "apiVersion: batch/v1\nkind: Job\nmetadata:\n  name: downloader-with-ttl\nspec:\n  ttlSecondsAfterFinished: 0\n  backoffLimit: "
  },
  {
    "path": "weather_dl_v2/license_deployment/environment.yml",
    "chars": 327,
    "preview": "name: weather-dl-v2-license-dep\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.10\n  - geojson\n  - ecmwf-api-client"
  },
  {
    "path": "weather_dl_v2/license_deployment/fetch.py",
    "chars": 8545,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/job_creator.py",
    "chars": 1824,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/manifest.py",
    "chars": 18734,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_dl_v2/license_deployment/util.py",
    "chars": 9648,
    "preview": "# Copyright 2023 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/MANIFEST.in",
    "chars": 59,
    "preview": "global-exclude *_test.py\ninclude README.md\nprune test_data\n"
  },
  {
    "path": "weather_mv/README.md",
    "chars": 29380,
    "preview": "# ⛅️ `weather-mv` – Weather Mover\n\nWeather Mover loads weather data from cloud storage into analytics engines,\nlike [Goo"
  },
  {
    "path": "weather_mv/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/__init__.py",
    "chars": 744,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/bq.py",
    "chars": 28679,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/bq_test.py",
    "chars": 44490,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/ee.py",
    "chars": 39639,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/ee_test.py",
    "chars": 4050,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/execution_test.py",
    "chars": 1362,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/metrics.py",
    "chars": 10695,
    "preview": "# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/pipeline.py",
    "chars": 9287,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/pipeline_test.py",
    "chars": 6905,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/regrid.py",
    "chars": 13951,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/regrid_test.py",
    "chars": 4904,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/sinks.py",
    "chars": 22403,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/sinks_test.py",
    "chars": 5947,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/streaming.py",
    "chars": 4516,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/streaming_test.py",
    "chars": 3372,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/util.py",
    "chars": 16897,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/loader_pipeline/util_test.py",
    "chars": 10896,
    "preview": "# Copyright 2022 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/setup.py",
    "chars": 2499,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/.zattrs",
    "chars": 365,
    "preview": "{\n    \"Conventions\": \"CF-1.6\",\n    \"history\": \"2022-12-07 15:15:12 GMT by grib_to_netcdf-2.25.1: /opt/ecmwf/mars-client/"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/.zgroup",
    "chars": 24,
    "preview": "{\n    \"zarr_format\": 2\n}"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/.zmetadata",
    "chars": 4574,
    "preview": "{\n    \"metadata\": {\n        \".zattrs\": {\n            \"Conventions\": \"CF-1.6\",\n            \"history\": \"2022-12-07 15:15:1"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/cape/.zarray",
    "chars": 370,
    "preview": "{\n    \"chunks\": [\n        24,\n        721,\n        1440\n    ],\n    \"compressor\": {\n        \"blocksize\": 0,\n        \"clev"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/cape/.zattrs",
    "chars": 282,
    "preview": "{\n    \"_ARRAY_DIMENSIONS\": [\n        \"time\",\n        \"latitude\",\n        \"longitude\"\n    ],\n    \"add_offset\": 3403.94805"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/d2m/.zarray",
    "chars": 370,
    "preview": "{\n    \"chunks\": [\n        24,\n        721,\n        1440\n    ],\n    \"compressor\": {\n        \"blocksize\": 0,\n        \"clev"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/d2m/.zattrs",
    "chars": 269,
    "preview": "{\n    \"_ARRAY_DIMENSIONS\": [\n        \"time\",\n        \"latitude\",\n        \"longitude\"\n    ],\n    \"add_offset\": 253.123353"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/latitude/.zarray",
    "chars": 317,
    "preview": "{\n    \"chunks\": [\n        721\n    ],\n    \"compressor\": {\n        \"blocksize\": 0,\n        \"clevel\": 5,\n        \"cname\": \""
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/latitude/.zattrs",
    "chars": 114,
    "preview": "{\n    \"_ARRAY_DIMENSIONS\": [\n        \"latitude\"\n    ],\n    \"long_name\": \"latitude\",\n    \"units\": \"degrees_north\"\n}"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/longitude/.zarray",
    "chars": 319,
    "preview": "{\n    \"chunks\": [\n        1440\n    ],\n    \"compressor\": {\n        \"blocksize\": 0,\n        \"clevel\": 5,\n        \"cname\": "
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/longitude/.zattrs",
    "chars": 115,
    "preview": "{\n    \"_ARRAY_DIMENSIONS\": [\n        \"longitude\"\n    ],\n    \"long_name\": \"longitude\",\n    \"units\": \"degrees_east\"\n}"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/time/.zarray",
    "chars": 314,
    "preview": "{\n    \"chunks\": [\n        24\n    ],\n    \"compressor\": {\n        \"blocksize\": 0,\n        \"clevel\": 5,\n        \"cname\": \"l"
  },
  {
    "path": "weather_mv/test_data/test_data.zarr/time/.zattrs",
    "chars": 144,
    "preview": "{\n    \"_ARRAY_DIMENSIONS\": [\n        \"time\"\n    ],\n    \"calendar\": \"gregorian\",\n    \"long_name\": \"time\",\n    \"units\": \"h"
  },
  {
    "path": "weather_mv/weather-mv",
    "chars": 2824,
    "preview": "#!/usr/bin/env python3\n# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n#"
  },
  {
    "path": "weather_sp/MANIFEST.in",
    "chars": 70,
    "preview": "global-exclude *_test.py\ninclude README.md\nglobal-exclude test_data/*\n"
  },
  {
    "path": "weather_sp/README.md",
    "chars": 10087,
    "preview": "# 🌪 `weather-sp` – Weather Splitter\n\nSplits NetCDF and Grib files into several files by variable or other dimension. (_a"
  },
  {
    "path": "weather_sp/__init__.py",
    "chars": 575,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_sp/setup.py",
    "chars": 1674,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  },
  {
    "path": "weather_sp/splitter_pipeline/__init__.py",
    "chars": 664,
    "preview": "# Copyright 2021 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this f"
  }
]

// ... and 38 more files (download for full content)

About this extraction

This page contains the full source code of the google/weather-tools GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 238 files (183.3 MB), approximately 247.3k tokens, and a symbol index with 1312 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!