Repository: jupyter/docker-stacks Branch: main Commit: 6da149350f01 Files: 273 Total size: 487.0 KB Directory structure: gitextract_w6y_9n58/ ├── .devcontainer/ │ ├── Dockerfile │ └── devcontainer.json ├── .flake8 ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── blank.yml │ │ ├── bug_report.yml │ │ ├── config.yml │ │ └── feature_request.yml │ ├── actions/ │ │ ├── apply-single-tags/ │ │ │ └── action.yml │ │ ├── create-dev-env/ │ │ │ └── action.yml │ │ ├── free-disk-space/ │ │ │ └── action.yml │ │ └── load-image/ │ │ └── action.yml │ ├── dependabot.yml │ ├── pull_request_template.md │ └── workflows/ │ ├── contributed-recipes.yml │ ├── docker-build-test-upload.yml │ ├── docker-tag-merge.yml │ ├── docker-tag-push-merge.yml │ ├── docker-tag-push.yml │ ├── docker-wiki-update.yml │ ├── docker.yml │ ├── pre-commit.yml │ ├── registry-move.yml │ ├── registry-overviews.yml │ └── sphinx.yml ├── .gitignore ├── .hadolint.yaml ├── .markdownlint.yaml ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.md ├── SECURITY.md ├── binder/ │ ├── Dockerfile │ └── README.ipynb ├── docs/ │ ├── conf.py │ ├── contributing/ │ │ ├── features.md │ │ ├── issues.md │ │ ├── lint.md │ │ ├── packages.md │ │ ├── recipes.md │ │ ├── stacks.md │ │ └── tests.md │ ├── index.rst │ ├── maintaining/ │ │ ├── new-images-and-packages-policy.md │ │ ├── tagging.md │ │ ├── tagging_examples/ │ │ │ ├── docker_runner.py │ │ │ └── git_helper.py │ │ └── tasks.md │ ├── requirements.txt │ └── using/ │ ├── changelog.md │ ├── common.md │ ├── custom-images.md │ ├── faq.md │ ├── recipe_code/ │ │ ├── custom_environment.dockerfile │ │ ├── dask_jupyterlab.dockerfile │ │ ├── docker-bake.custom-python.hcl │ │ ├── generate_matrix.py │ │ ├── ijavascript.dockerfile │ │ ├── jupyterhub_version.dockerfile │ │ ├── mamba_install.dockerfile │ │ ├── manpage_install.dockerfile │ │ ├── microsoft_odbc.dockerfile │ │ ├── oracledb.dockerfile │ │ ├── pip_install.dockerfile │ │ ├── requirements.txt │ │ ├── rise_jupyterlab.dockerfile │ │ ├── spellcheck_notebook_v6.dockerfile │ │ └── xgboost.dockerfile │ ├── recipes.md │ ├── running.md │ ├── selecting.md │ ├── specifics.md │ └── troubleshooting.md ├── examples/ │ ├── README.md │ ├── docker-compose/ │ │ ├── README.md │ │ ├── bin/ │ │ │ ├── letsencrypt.sh │ │ │ ├── sl-dns.sh │ │ │ ├── softlayer.sh │ │ │ └── vbox.sh │ │ └── notebook/ │ │ ├── Dockerfile │ │ ├── build.sh │ │ ├── down.sh │ │ ├── env.sh │ │ ├── letsencrypt-notebook.yml │ │ ├── notebook.yml │ │ ├── secure-notebook.yml │ │ └── up.sh │ ├── make-deploy/ │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── README.md │ │ ├── letsencrypt.makefile │ │ ├── self-signed.makefile │ │ ├── softlayer.makefile │ │ └── virtualbox.makefile │ ├── openshift/ │ │ ├── README.md │ │ └── templates.json │ └── source-to-image/ │ ├── README.md │ ├── assemble │ ├── run │ ├── save-artifacts │ └── templates.json ├── images/ │ ├── all-spark-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ └── README.md │ ├── base-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── docker_healthcheck.py │ │ ├── jupyter_server_config.py │ │ ├── start-notebook.py │ │ ├── start-notebook.sh │ │ ├── start-singleuser.py │ │ └── start-singleuser.sh │ ├── datascience-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ └── README.md │ ├── docker-stacks-foundation/ │ │ ├── .dockerignore │ │ ├── 10activate-conda-env.sh │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── fix-permissions │ │ ├── initial-condarc │ │ ├── run-hooks.sh │ │ └── start.sh │ ├── julia-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ └── README.md │ ├── minimal-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── Rprofile.site │ │ └── setup-scripts/ │ │ ├── activate_notebook_custom_env.py │ │ ├── setup-julia-packages.bash │ │ └── setup_julia.py │ ├── pyspark-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── ipython_kernel_config.py │ │ └── setup_spark.py │ ├── pytorch-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── cuda12/ │ │ │ └── Dockerfile │ │ └── cuda13/ │ │ └── Dockerfile │ ├── r-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ └── README.md │ ├── scipy-notebook/ │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ └── README.md │ └── tensorflow-notebook/ │ ├── .dockerignore │ ├── Dockerfile │ ├── README.md │ └── cuda/ │ ├── 20tensorboard-proxy-env.sh │ ├── Dockerfile │ └── nvidia-lib-dirs.sh ├── mypy.ini ├── requirements-dev.txt ├── tagging/ │ ├── README.md │ ├── __init__.py │ ├── apps/ │ │ ├── __init__.py │ │ ├── apply_tags.py │ │ ├── common_cli_arguments.py │ │ ├── config.py │ │ ├── merge_tags.py │ │ ├── write_manifest.py │ │ └── write_tags_file.py │ ├── hierarchy/ │ │ ├── __init__.py │ │ ├── get_manifests.py │ │ ├── get_taggers.py │ │ └── images_hierarchy.py │ ├── manifests/ │ │ ├── __init__.py │ │ ├── apt_packages.py │ │ ├── build_info.py │ │ ├── conda_environment.py │ │ ├── julia_packages.py │ │ ├── manifest_interface.py │ │ ├── r_packages.py │ │ └── spark_info.py │ ├── taggers/ │ │ ├── __init__.py │ │ ├── date.py │ │ ├── sha.py │ │ ├── tagger_interface.py │ │ ├── ubuntu_version.py │ │ └── versions.py │ └── utils/ │ ├── __init__.py │ ├── docker_runner.py │ ├── get_platform.py │ ├── get_prefix.py │ ├── git_helper.py │ └── quoted_output.py ├── tests/ │ ├── README.md │ ├── __init__.py │ ├── by_image/ │ │ ├── all-spark-notebook/ │ │ │ ├── data/ │ │ │ │ ├── local_sparkR.ipynb │ │ │ │ └── local_sparklyr.ipynb │ │ │ └── test_spark_r_nbconvert.py │ │ ├── base-notebook/ │ │ │ ├── data/ │ │ │ │ └── check_listening.py │ │ │ ├── test_container_options.py │ │ │ ├── test_healthcheck.py │ │ │ ├── test_ips.py │ │ │ ├── test_notebook.py │ │ │ ├── test_pandoc.py │ │ │ └── test_start_container.py │ │ ├── datascience-notebook/ │ │ │ ├── test_julia_datascience.py │ │ │ ├── test_mimetypes.py │ │ │ └── test_pluto_datascience.py │ │ ├── docker-stacks-foundation/ │ │ │ ├── data/ │ │ │ │ └── run-hooks/ │ │ │ │ ├── change/ │ │ │ │ │ ├── a.sh │ │ │ │ │ ├── b.sh │ │ │ │ │ └── c.sh │ │ │ │ ├── executables/ │ │ │ │ │ ├── executable.py │ │ │ │ │ ├── non_executable.py │ │ │ │ │ └── run-me.sh │ │ │ │ ├── failures/ │ │ │ │ │ ├── a.sh │ │ │ │ │ ├── b.py │ │ │ │ │ ├── c.sh │ │ │ │ │ └── d.sh │ │ │ │ ├── sh-files/ │ │ │ │ │ ├── executable.sh │ │ │ │ │ └── non-executable.sh │ │ │ │ └── unset/ │ │ │ │ ├── a.sh │ │ │ │ ├── b.sh │ │ │ │ └── c.sh │ │ │ ├── test_outdated.py │ │ │ ├── test_package_managers.py │ │ │ ├── test_packages.py │ │ │ ├── test_python_version.py │ │ │ ├── test_run_hooks.py │ │ │ ├── test_units.py │ │ │ └── test_user_options.py │ │ ├── julia-notebook/ │ │ │ ├── test_julia.py │ │ │ └── test_pluto.py │ │ ├── minimal-notebook/ │ │ │ ├── data/ │ │ │ │ ├── notebook_math.ipynb │ │ │ │ └── notebook_svg.ipynb │ │ │ └── test_nbconvert.py │ │ ├── pyspark-notebook/ │ │ │ ├── data/ │ │ │ │ ├── issue_1168.ipynb │ │ │ │ └── local_pyspark.ipynb │ │ │ ├── test_spark.py │ │ │ ├── test_spark_nbconvert.py │ │ │ └── units/ │ │ │ ├── unit_pandas_version.py │ │ │ └── unit_spark.py │ │ ├── pytorch-notebook/ │ │ │ └── units/ │ │ │ └── unit_pytorch.py │ │ ├── r-notebook/ │ │ │ └── test_R_mimetypes.py │ │ ├── scipy-notebook/ │ │ │ ├── data/ │ │ │ │ ├── cython/ │ │ │ │ │ ├── helloworld.pyx │ │ │ │ │ └── setup.py │ │ │ │ └── matplotlib/ │ │ │ │ ├── matplotlib_1.py │ │ │ │ └── matplotlib_fonts_1.py │ │ │ ├── test_cython.py │ │ │ ├── test_extensions.py │ │ │ ├── test_matplotlib.py │ │ │ └── units/ │ │ │ └── unit_pandas.py │ │ └── tensorflow-notebook/ │ │ └── units/ │ │ └── unit_tensorflow.py │ ├── conftest.py │ ├── hierarchy/ │ │ ├── __init__.py │ │ ├── get_test_dirs.py │ │ └── images_hierarchy.py │ ├── pytest.ini │ ├── run_tests.py │ ├── shared_checks/ │ │ ├── R_mimetype_check.py │ │ ├── __init__.py │ │ ├── nbconvert_check.py │ │ └── pluto_check.py │ └── utils/ │ ├── __init__.py │ ├── conda_package_helper.py │ └── tracked_container.py └── wiki/ ├── Home.md ├── __init__.py ├── config.py ├── manifest_time.py └── update_wiki.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .devcontainer/Dockerfile ================================================ FROM mcr.microsoft.com/devcontainers/python:3.13 COPY requirements-dev.txt /tmp/requirements-dev.txt COPY docs/requirements.txt /tmp/requirements-docs.txt RUN pip install --no-cache-dir -r /tmp/requirements-dev.txt && \ pip install --no-cache-dir -r /tmp/requirements-docs.txt ================================================ FILE: .devcontainer/devcontainer.json ================================================ { "name": "Jupyter Docker Stacks", "build": { "context": "..", "dockerfile": "Dockerfile" }, "features": { "ghcr.io/devcontainers/features/docker-in-docker:2": { "moby": false } }, "postCreateCommand": "pre-commit install --install-hooks", "customizations": { "vscode": { "extensions": [ "github.copilot-chat", "github.copilot", "github.vscode-github-actions", "github.vscode-pull-request-github", "ms-azuretools.vscode-containers", "ms-azuretools.vscode-docker", "ms-python.autopep8", "ms-vscode.makefile-tools" ] } } } ================================================ FILE: .flake8 ================================================ [flake8] max-line-length = 88 select = C, E, F, W, B, B950 extend-ignore = E203, E501, E704, W503 ================================================ FILE: .gitattributes ================================================ * text=auto eol=lf ================================================ FILE: .github/ISSUE_TEMPLATE/blank.yml ================================================ name: "(maintainers only) Blank issue" description: For maintainers only labels: [] body: - type: markdown attributes: value: | This is a blank issue template for maintainers to use as needed. - type: checkboxes attributes: label: Are you a maintainer? description: Please confirm you are a maintainer before proceeding. options: - label: Yes, I am a maintainer required: true ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug report description: Create a report to help us improve labels: ["type:Bug"] body: - type: markdown attributes: value: | Hi! Thanks for using the Jupyter Docker Stacks and taking some time to contribute to this project. We'd appreciate it if you could check out the [Troubleshooting common problems](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/troubleshooting.html) section in the documentation, as well as [existing issues](https://github.com/jupyter/docker-stacks/issues?q=is%3Aissue) prior to submitting an issue to avoid duplication. Please answer the following sections to help us troubleshoot the problem. - type: dropdown attributes: label: What docker image(s) are you using? description: Select as many images as applicable multiple: true options: - all-spark-notebook - base-notebook - datascience-notebook - docker-stacks-foundation - julia-notebook - minimal-notebook - pyspark-notebook - pytorch-notebook - r-notebook - scipy-notebook - tensorflow-notebook validations: required: true - type: input attributes: label: Host OS placeholder: | Example: Ubuntu 24.04 validations: required: true - type: dropdown attributes: label: Host architecture options: - x86_64 - aarch64 validations: required: true - type: textarea attributes: label: What Docker command are you running? description: | What complete docker command do you run to launch the container (omitting sensitive values)? placeholder: | Example: `docker run -it --rm -p 8888:8888 quay.io/jupyter/base-notebook` validations: required: true - type: textarea attributes: label: How to Reproduce the problem? description: Please provide steps to reproduce this bug (once the container is running). placeholder: | Example: 1. Visit 2. Start an R notebook 3. ... validations: required: true - type: textarea attributes: label: Command output render: bash session description: | Provide the output of the steps above, including the commands themselves and Docker's output/traceback etc. If you're familiar with Markdown, this block will have triple backticks added automatically around it -- you don't have to add them. If you want to present output from multiple commands, please present that as a shell session (commands you run get prefixed with `$ `). Please also ensure that the "How to reproduce" section contains matching instructions for reproducing this. - type: textarea attributes: label: Expected behavior description: | A clear and concise description of what you expected to happen. placeholder: | Example: `ggplot` output appears in my notebook. - type: textarea attributes: label: Actual behavior description: | A clear and concise description of what the bug is. placeholder: | Example: No output is visible in the notebook and the Server log contains messages about ... validations: required: true - type: textarea attributes: label: Anything else? description: | Links? References? Anything that will give us more context about the issue you are encountering! Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. validations: required: false - type: checkboxes attributes: label: Latest Docker version description: You should try to use the latest Docker version options: - label: I've updated my Docker version to the latest available, and the issue persists required: true ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: 📖 - Jupyter Docker Stacks documentation url: https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html about: Go to the project's documentation - name: 🔍 - Troubleshooting common problems url: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/troubleshooting.html about: Documentation section on troubleshooting commonly encountered errors - name: 💬 - Jupyter community Discourse url: https://discourse.jupyter.org/ about: Interact with the rest of the Jupyter community ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: Feature request description: Suggest a new feature for this project labels: ["type:Enhancement"] body: - type: markdown attributes: value: | Hi! Thanks for using the Jupyter Docker Stacks and taking some time to contribute to this project. We'd appreciate it if you could check out the [Suggesting a new feature](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/features.html#suggesting-a-new-feature) section in the documentation for our preferred processes before submitting a feature request. - type: dropdown attributes: label: What docker image(s) is this feature applicable to? description: Select as many images as applicable multiple: true options: - all-spark-notebook - base-notebook - datascience-notebook - docker-stacks-foundation - julia-notebook - minimal-notebook - pyspark-notebook - pytorch-notebook - r-notebook - scipy-notebook - tensorflow-notebook - new community stack validations: required: true - type: textarea attributes: label: What change(s) are you proposing? description: | Be concise and feel free to add supporting links or references. placeholder: | Example: - Add the [altair](https://altair-viz.github.io) package to the image. validations: required: true - type: textarea attributes: label: How does this affect the user? description: | How will the proposed feature affect the user's workflow? How will this feature make the image more robust, secure, etc.? placeholder: | Example: - Altair is a declarative statistical visualization library for Python, based on Vega and Vega-Lite, and the source is available on GitHub. - With Altair, you can spend more time understanding your data and its meaning. - Altair's API is simple, friendly, and consistent and built on top of the powerful Vega-Lite visualization grammar. - This elegant simplicity produces beautiful and effective visualizations with a minimal amount of code. validations: required: true - type: textarea attributes: label: Anything else? description: | Links? References? Anything that will give us more context about the feature you are proposing. validations: required: false ================================================ FILE: .github/actions/apply-single-tags/action.yml ================================================ name: Apply single platform tags description: Download the image tar, load it to Docker and apply tags to it inputs: image: description: Image name required: true platform: description: Image platform required: true variant: description: Variant tag prefix required: true runs: using: composite steps: - name: Load image to Docker 📥 uses: ./.github/actions/load-image with: image: ${{ inputs.image }} platform: ${{ inputs.platform }} variant: ${{ inputs.variant }} - name: Download tags file 📥 uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ inputs.platform }}-${{ inputs.variant }}-${{ inputs.image }}.txt path: /tmp/jupyter/tags/ - name: Apply tags to the loaded image 🏷 run: | python3 -m tagging.apps.apply_tags \ --registry ${{ env.REGISTRY }} \ --owner ${{ env.OWNER }} \ --image ${{ inputs.image }} \ --variant ${{ inputs.variant }} \ --platform ${{ inputs.platform }} \ --tags-dir /tmp/jupyter/tags/ shell: bash - name: Upload SBOM for the image 🧾 uses: anchore/sbom-action@57aae528053a48a3f6235f2d9461b05fbcb7366d # v0.23.1 with: image: ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} artifact-name: ${{ inputs.image }}-${{ inputs.platform }}-${{ inputs.variant }}-sbom.spdx.json upload-artifact-retention: 40 # This step is needed to prevent pushing non-multi-arch "latest" tag - name: Remove the "latest" tag from the image 🗑️ run: docker image rmi ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }}:latest shell: bash - name: Show Docker images 📦 run: docker image ls --all shell: bash ================================================ FILE: .github/actions/create-dev-env/action.yml ================================================ name: Build environment description: Create a build environment runs: using: composite steps: - name: Set Up Python 🐍 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: 3.12 - name: Install Dev Dependencies 📦 run: | pip install --upgrade pip pip install --upgrade -r requirements-dev.txt shell: bash # We need to have a recent docker version # More info: https://github.com/jupyter/docker-stacks/pull/2255 # Can be removed after Docker Engine is updated # https://github.com/actions/runner-images/issues/11766 - name: Set Up Docker 🐳 uses: docker/setup-docker-action@1a6edb0ba9ac496f6850236981f15d8f9a82254d # v5.0.0 with: set-host: true ================================================ FILE: .github/actions/free-disk-space/action.yml ================================================ name: "Free Disk Space (Ubuntu)" description: "A GitHub Action to free up disk space on an Ubuntu GitHub Actions runner." runs: using: "composite" steps: - name: Installing rmz shell: bash run: | curl -fsSL --tlsv1.2 --proto '=https' https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash > /dev/null 2>&1 cargo binstall -qy rmz ln -s ~/.cargo/bin/rmz /usr/local/bin/rmz - name: Freeing up disk space shell: bash run: | sudo rmz -f /usr/local/lib/android || true sudo rmz -f /usr/share/dotnet || true sudo rmz -f /opt/ghc /usr/local/.ghcup || true sudo rmz -f /usr/share/swift || true sudo rmz -f /usr/share/miniconda || true sudo rmz -f "${AGENT_TOOLSDIRECTORY}" || true ================================================ FILE: .github/actions/load-image/action.yml ================================================ name: Load Docker image description: Download the image tar and load it to Docker inputs: image: description: Image name required: true platform: description: Image platform required: true variant: description: Variant tag prefix required: true runs: using: composite steps: - name: Download built image 📥 uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ inputs.image }}-${{ inputs.platform }}-${{ inputs.variant }}.tar.zst path: /tmp/jupyter/images/ - name: Load downloaded image to docker 📥 run: | zstd \ --uncompress \ --stdout \ --rm \ /tmp/jupyter/images/${{ inputs.image }}-${{ inputs.platform }}-${{ inputs.variant }}.tar.zst \ | docker load shell: bash - name: Show Docker images 📦 run: docker image ls --all shell: bash ================================================ FILE: .github/dependabot.yml ================================================ # To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/dependabot-options-reference # We're adding `[FAST_BUILD]` prefix to commit messages (this adds it to PR title) # This triggers a faster build, see more info in `.github/workflows/docker.yml` version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: weekly commit-message: prefix: "[FAST_BUILD] " - package-ecosystem: github-actions directory: .github/actions/apply-single-tags/ schedule: interval: weekly commit-message: prefix: "[FAST_BUILD] " - package-ecosystem: github-actions directory: .github/actions/create-dev-env/ schedule: interval: weekly commit-message: prefix: "[FAST_BUILD] " # This action is only used for some images, so full build is required - package-ecosystem: github-actions directory: .github/actions/free-disk-space/ schedule: interval: weekly - package-ecosystem: github-actions directory: .github/actions/load-image/ schedule: interval: weekly commit-message: prefix: "[FAST_BUILD] " ================================================ FILE: .github/pull_request_template.md ================================================ ## Describe your changes ## Issue ticket if applicable ## Checklist (especially for first-time contributors) - [ ] I have performed a self-review of my code - [ ] If it is a core feature, I have added thorough tests - [ ] I will try not to use force-push to make the review process easier for reviewers - [ ] I have updated the documentation for significant changes ================================================ FILE: .github/workflows/contributed-recipes.yml ================================================ name: Test the contributed recipes env: REGISTRY: quay.io OWNER: ${{ github.repository_owner }} on: schedule: # Images are rebuilt at 03:00 on Monday UTC # So we're testing recipes one hour in advance # They will also be tested after building images - cron: "0 2 * * 1" pull_request: paths: - ".github/workflows/contributed-recipes.yml" - "docs/using/recipe_code/**" push: branches: - main paths: - ".github/workflows/contributed-recipes.yml" - "docs/using/recipe_code/**" workflow_dispatch: workflow_call: inputs: # There is no good way to detect if the workflow was called using workflow_call # https://github.com/actions/runner/discussions/1884 called-using-workflow-call: description: "Was the workflow called using workflow_call" required: true type: boolean permissions: contents: read jobs: generate-matrix: runs-on: ubuntu-24.04 timeout-minutes: 1 outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Calculate recipes matrix 🛠 id: set-matrix run: docs/using/recipe_code/generate_matrix.py >> "${GITHUB_OUTPUT}" env: REPOSITORY_OWNER: ${{ github.repository_owner }} build: runs-on: ${{ matrix.runs-on }} timeout-minutes: 10 needs: generate-matrix if: github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru' steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Load image to Docker 📥 if: ${{ inputs.called-using-workflow-call && matrix.parent-image != '' }} uses: ./.github/actions/load-image with: image: ${{ matrix.parent-image }} platform: ${{ matrix.platform }} variant: default # Not pulling the image, because it might be loaded from previous step or will be downloaded automatically - name: Build recipe with parent image 🛠 if: ${{ matrix.parent-image != '' }} run: | docker build \ --rm --force-rm \ --tag my-custom-image \ -f ./${{ matrix.dockerfile }} \ --build-arg BASE_IMAGE=${{ env.REGISTRY }}/${{ env.OWNER }}/${{ matrix.parent-image }} \ ./ env: DOCKER_BUILDKIT: 1 # Full logs for CI build BUILDKIT_PROGRESS: plain working-directory: docs/using/recipe_code shell: bash # Not pulling the image, because it might be loaded from previous step or will be downloaded automatically - name: Build recipe without parent image 🛠 if: ${{ matrix.parent-image == '' }} run: | docker build \ --rm --force-rm \ --tag my-custom-image \ -f ./${{ matrix.dockerfile }} \ ./ env: DOCKER_BUILDKIT: 1 # Full logs for CI build BUILDKIT_PROGRESS: plain working-directory: docs/using/recipe_code shell: bash strategy: matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} ================================================ FILE: .github/workflows/docker-build-test-upload.yml ================================================ name: Download a parent image, build a new one, and test it; upload the image, tags, build history line and manifest to GitHub artifacts env: REGISTRY: quay.io OWNER: ${{ github.repository_owner }} on: workflow_call: inputs: parent-image: description: Parent image name required: true type: string parent-variant: description: Parent variant tag prefix required: false type: string default: default image: description: Image name required: true type: string variant: description: Variant tag prefix required: false type: string default: default platform: description: Image platform required: true type: string runs-on: description: GitHub Actions Runner image required: true type: string timeout-minutes: description: Timeout in minutes required: true type: number permissions: contents: read jobs: build-test-upload: runs-on: ${{ inputs.runs-on }} timeout-minutes: ${{ inputs.timeout-minutes }} steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Compute commit hash tag 🏷 id: hash run: echo "tag=${GITHUB_SHA::12}" >> "$GITHUB_OUTPUT" shell: bash - name: Free disk space 🧹 uses: ./.github/actions/free-disk-space if: contains(inputs.variant, 'cuda') || inputs.image == 'datascience-notebook' || inputs.image == 'all-spark-notebook' - name: Create dev environment 📦 uses: ./.github/actions/create-dev-env - name: Load parent built image to Docker 📥 if: inputs.parent-image != '' uses: ./.github/actions/load-image with: image: ${{ inputs.parent-image }} platform: ${{ inputs.platform }} variant: ${{ inputs.parent-variant }} - name: Pull base ubuntu image 📥 if: inputs.parent-image == '' run: docker pull ubuntu:24.04 shell: bash - name: Build image 🛠 run: | docker build \ --rm --force-rm \ --tag ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} \ images/${{ inputs.image }}/${{ inputs.variant != 'default' && inputs.variant || '.' }}/ \ --build-arg REGISTRY=${{ env.REGISTRY }} \ --build-arg OWNER=${{ env.OWNER }} env: DOCKER_BUILDKIT: 1 # Full logs for CI build BUILDKIT_PROGRESS: plain shell: bash - name: Write tags file 🏷 run: | python3 -m tagging.apps.write_tags_file \ --registry ${{ env.REGISTRY }} \ --owner ${{ env.OWNER }} \ --image ${{ inputs.image }} \ --variant ${{ inputs.variant }} \ --tags-dir /tmp/jupyter/tags/ shell: bash - name: Upload tags file 💾 uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: path: /tmp/jupyter/tags/${{ inputs.platform }}-${{ inputs.variant }}-${{ inputs.image }}.txt retention-days: 3 archive: false - name: Write manifest and build history file 🏷 run: | python3 -m tagging.apps.write_manifest \ --registry ${{ env.REGISTRY }} \ --owner ${{ env.OWNER }} \ --image ${{ inputs.image }} \ --variant ${{ inputs.variant }} \ --hist-lines-dir /tmp/jupyter/hist_lines/ \ --manifests-dir /tmp/jupyter/manifests/ \ --repository ${{ github.repository }} shell: bash - name: Upload manifest file 💾 uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: path: /tmp/jupyter/manifests/${{ inputs.platform }}-${{ inputs.variant }}-${{ inputs.image }}-${{ steps.hash.outputs.tag }}.md retention-days: 3 archive: false - name: Upload build history line 💾 uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: path: /tmp/jupyter/hist_lines/${{ inputs.platform }}-${{ inputs.variant }}-${{ inputs.image }}-${{ steps.hash.outputs.tag }}.txt retention-days: 3 archive: false - name: Save image as a tar for later use 💾 run: | mkdir -p /tmp/jupyter/images/ docker save \ ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} \ | zstd > /tmp/jupyter/images/${{ inputs.image }}-${{ inputs.platform }}-${{ inputs.variant }}.tar.zst shell: bash - name: Upload image as artifact 💾 uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: path: /tmp/jupyter/images/${{ inputs.image }}-${{ inputs.platform }}-${{ inputs.variant }}.tar.zst retention-days: 3 archive: false - name: Run tests ✅ run: | python3 -m tests.run_tests \ --registry ${{ env.REGISTRY }} \ --owner ${{ env.OWNER }} \ --image ${{ inputs.image }} shell: bash ================================================ FILE: .github/workflows/docker-tag-merge.yml ================================================ name: Merge single platform tags env: REGISTRY: quay.io PUSH_TO_REGISTRY: ${{ (github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru') && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') }} on: workflow_call: inputs: image: description: Image name required: true type: string variant: description: Variant tag prefix required: true type: string timeout-minutes: description: Timeout in minutes default: 5 type: number secrets: REGISTRY_USERNAME: required: true REGISTRY_TOKEN: required: true permissions: contents: read jobs: tag-merge: runs-on: ubuntu-24.04 timeout-minutes: ${{ inputs.timeout-minutes }} steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Create dev environment 📦 uses: ./.github/actions/create-dev-env - name: Download aarch64 tags file 🏷 uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: aarch64-${{ inputs.variant }}-${{ inputs.image }}.txt path: /tmp/jupyter/tags/ - name: Download x86_64 tags file 🏷 uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: x86_64-${{ inputs.variant }}-${{ inputs.image }}.txt path: /tmp/jupyter/tags/ - name: Login to Registry 🔐 if: env.PUSH_TO_REGISTRY == 'true' run: | docker login ${{ env.REGISTRY }} \ --username ${{ secrets.REGISTRY_USERNAME }} \ --password ${{ secrets.REGISTRY_TOKEN }} || \ docker login ${{ env.REGISTRY }} \ --username ${{ secrets.REGISTRY_USERNAME }} \ --password ${{ secrets.REGISTRY_TOKEN }} shell: bash id: login - name: Merge tags for the images 🔀 run: | python3 -m tagging.apps.merge_tags \ --image ${{ inputs.image }} \ --variant ${{ inputs.variant }} \ --tags-dir /tmp/jupyter/tags/ shell: bash - name: Logout from Registry 🔐 if: always() && env.PUSH_TO_REGISTRY == 'true' && steps.login.outcome == 'success' run: | docker logout ${{ env.REGISTRY }} shell: bash ================================================ FILE: .github/workflows/docker-tag-push-merge.yml ================================================ name: Download a Docker image and its tags from GitHub artifacts, apply them, and push the image to the Registry; then merge them on: workflow_call: inputs: image: description: Image name required: true type: string variant: description: Variant tag prefix required: true type: string secrets: REGISTRY_USERNAME: required: true REGISTRY_TOKEN: required: true permissions: contents: read jobs: tag-push: uses: ./.github/workflows/docker-tag-push.yml with: image: ${{ inputs.image }} variant: ${{ inputs.variant }} secrets: REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} tag-merge: uses: ./.github/workflows/docker-tag-merge.yml needs: tag-push with: image: ${{ inputs.image }} variant: ${{ inputs.variant }} secrets: REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }} ================================================ FILE: .github/workflows/docker-tag-push.yml ================================================ name: Download a Docker image and its tags from GitHub artifacts, apply them, and push the image to the Registry env: REGISTRY: quay.io OWNER: ${{ github.repository_owner }} PUSH_TO_REGISTRY: ${{ (github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru') && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') }} on: workflow_call: inputs: image: description: Image name required: true type: string variant: description: Variant tag prefix required: true type: string timeout-minutes: description: Timeout in minutes default: 25 type: number secrets: REGISTRY_USERNAME: required: true REGISTRY_TOKEN: required: true permissions: contents: read jobs: tag-push: runs-on: ubuntu-24.04 timeout-minutes: ${{ inputs.timeout-minutes }} strategy: matrix: platform: [aarch64, x86_64] steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Free disk space 🧹 uses: ./.github/actions/free-disk-space if: contains(inputs.variant, 'cuda') || inputs.image == 'datascience-notebook' || inputs.image == 'all-spark-notebook' - name: Create dev environment 📦 uses: ./.github/actions/create-dev-env - name: Download image tar and apply tags 🏷 uses: ./.github/actions/apply-single-tags with: image: ${{ inputs.image }} variant: ${{ inputs.variant }} platform: ${{ matrix.platform }} - name: Login to Registry 🔐 if: env.PUSH_TO_REGISTRY == 'true' run: | docker login ${{ env.REGISTRY }} \ --username ${{ secrets.REGISTRY_USERNAME }} \ --password ${{ secrets.REGISTRY_TOKEN }} || \ docker login ${{ env.REGISTRY }} \ --username ${{ secrets.REGISTRY_USERNAME }} \ --password ${{ secrets.REGISTRY_TOKEN }} shell: bash id: login - name: Push single platform images to Registry 📤 if: env.PUSH_TO_REGISTRY == 'true' run: | docker push --all-tags ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} || \ docker push --all-tags ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} shell: bash - name: Logout from Registry 🔐 if: always() && env.PUSH_TO_REGISTRY == 'true' && steps.login.outcome == 'success' run: | docker logout ${{ env.REGISTRY }} shell: bash ================================================ FILE: .github/workflows/docker-wiki-update.yml ================================================ name: Download build history lines and manifests from GitHub artifacts and push them to the GitHub wiki # We're doing everything in one workflow on purpose # This way we make sure we don't access wiki pages from several jobs simultaneously env: PUSH_TO_REGISTRY: ${{ github.ref == 'refs/heads/main' || github.event_name == 'schedule' }} on: workflow_call: permissions: contents: write jobs: wiki-update: runs-on: ubuntu-24.04 timeout-minutes: 1 steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - name: Create dev environment 📦 uses: ./.github/actions/create-dev-env - name: Compute commit hash tag 🏷 id: hash run: echo "tag=${GITHUB_SHA::12}" >> "$GITHUB_OUTPUT" shell: bash - name: Download all history lines 📥 uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: pattern: "*-${{ steps.hash.outputs.tag }}.txt" path: /tmp/jupyter/hist_lines/ merge-multiple: true - name: Download all manifests 📥 uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: pattern: "*-${{ steps.hash.outputs.tag }}.md" path: /tmp/jupyter/manifests/ merge-multiple: true - name: Checkout Wiki Repo 📃 uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: ${{ github.repository }}.wiki path: wiki_src/ - name: Update wiki 🏷 run: | python3 -m wiki.update_wiki \ --wiki-dir wiki_src/ \ --hist-lines-dir /tmp/jupyter/hist_lines/ \ --manifests-dir /tmp/jupyter/manifests/ \ --repository ${{ github.repository }} shell: bash - name: Push Wiki to GitHub 📤 if: env.PUSH_TO_REGISTRY == 'true' uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v7.1.0 with: commit_message: "Automated wiki publish for ${{ github.sha }}" repository: wiki_src/ ================================================ FILE: .github/workflows/docker.yml ================================================ name: Docker Stacks # [FAST_BUILD] in the PR title makes this workflow only build # the `jupyter/docker-stacks-foundation` and `jupyter/base-notebook` images # This allows to run CI faster if a full build is not required # This only works for a `pull_request` event and does not affect `push` to the `main` branch on: schedule: # Weekly, at 03:00 on Monday UTC - cron: "0 3 * * 1" pull_request: paths: - ".github/workflows/docker.yml" # We use local reusable workflows to make architecture clean and simple # https://docs.github.com/en/actions/sharing-automations/reusing-workflows - ".github/workflows/docker-build-test-upload.yml" - ".github/workflows/docker-tag-merge.yml" - ".github/workflows/docker-tag-push-merge.yml" - ".github/workflows/docker-tag-push.yml" - ".github/workflows/docker-wiki-update.yml" # We use local composite actions to combine multiple workflow steps within one action # https://docs.github.com/en/actions/sharing-automations/creating-actions/about-custom-actions#composite-actions - ".github/actions/apply-single-tags/action.yml" - ".github/actions/create-dev-env/action.yml" - ".github/actions/free-disk-space/action.yml" - ".github/actions/load-image/action.yml" - "images/**" - "!images/*/README.md" - "tagging/**" - "!tagging/README.md" - "tests/**" - "!tests/README.md" - "wiki/**" - "requirements-dev.txt" push: branches: - main paths: - ".github/workflows/docker.yml" - ".github/workflows/docker-build-test-upload.yml" - ".github/workflows/docker-tag-merge.yml" - ".github/workflows/docker-tag-push-merge.yml" - ".github/workflows/docker-tag-push.yml" - ".github/workflows/docker-wiki-update.yml" - ".github/actions/apply-single-tags/action.yml" - ".github/actions/create-dev-env/action.yml" - ".github/actions/free-disk-space/action.yml" - ".github/actions/load-image/action.yml" - "images/**" - "!images/*/README.md" - "tagging/**" - "!tagging/README.md" - "tests/**" - "!tests/README.md" - "wiki/**" - "requirements-dev.txt" workflow_dispatch: # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/control-the-concurrency-of-workflows-and-jobs concurrency: # Only cancel in-progress jobs or runs for the current workflow - matches against branch & tags group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true permissions: contents: read jobs: aarch64-foundation: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: "" image: docker-stacks-foundation platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 10 x86_64-foundation: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: "" image: docker-stacks-foundation platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 10 aarch64-base: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: docker-stacks-foundation image: base-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 15 needs: aarch64-foundation x86_64-base: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: docker-stacks-foundation image: base-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 15 needs: x86_64-foundation aarch64-minimal: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: base-notebook image: minimal-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 15 needs: aarch64-base if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-minimal: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: base-notebook image: minimal-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 15 needs: x86_64-base if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-scipy: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: minimal-notebook image: scipy-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 15 needs: aarch64-minimal if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-scipy: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: minimal-notebook image: scipy-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 15 needs: x86_64-minimal if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-r: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: minimal-notebook image: r-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 15 needs: aarch64-minimal if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-r: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: minimal-notebook image: r-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 15 needs: x86_64-minimal if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-julia: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: minimal-notebook image: julia-notebook platform: aarch64 runs-on: ubuntu-24.04-arm # This workflow sometimes takes quite long to build timeout-minutes: 30 needs: aarch64-minimal if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-julia: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: minimal-notebook image: julia-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 20 needs: x86_64-minimal if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-tensorflow: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: tensorflow-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 15 needs: aarch64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-tensorflow: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: tensorflow-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 15 needs: x86_64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-tensorflow-cuda: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: tensorflow-notebook variant: cuda platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 25 needs: aarch64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-tensorflow-cuda: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: tensorflow-notebook variant: cuda platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 25 needs: x86_64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-pytorch: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pytorch-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 20 needs: aarch64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-pytorch: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pytorch-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 20 needs: x86_64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-pytorch-cuda12: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pytorch-notebook variant: cuda12 platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 25 needs: aarch64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-pytorch-cuda12: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pytorch-notebook variant: cuda12 platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 25 needs: x86_64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-pytorch-cuda13: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pytorch-notebook variant: cuda13 platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 25 needs: aarch64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-pytorch-cuda13: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pytorch-notebook variant: cuda13 platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 25 needs: x86_64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-datascience: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: datascience-notebook platform: aarch64 runs-on: ubuntu-24.04-arm # This workflow sometimes takes quite long to build timeout-minutes: 30 needs: aarch64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-datascience: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: datascience-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 25 needs: x86_64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-pyspark: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pyspark-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 20 needs: aarch64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-pyspark: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: scipy-notebook image: pyspark-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 15 needs: x86_64-scipy if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} aarch64-all-spark: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: pyspark-notebook image: all-spark-notebook platform: aarch64 runs-on: ubuntu-24.04-arm timeout-minutes: 20 needs: aarch64-pyspark if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} x86_64-all-spark: uses: ./.github/workflows/docker-build-test-upload.yml with: parent-image: pyspark-notebook image: all-spark-notebook platform: x86_64 runs-on: ubuntu-24.04 timeout-minutes: 15 needs: x86_64-pyspark if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} contributed-recipes: uses: ./.github/workflows/contributed-recipes.yml with: called-using-workflow-call: true # Contributed recipes only use these images # If recipes using other images will be added, they should be added here as well # # contributed-recipes will give an error if the image is not yet built and uploaded needs: [aarch64-base, x86_64-base, aarch64-minimal, x86_64-minimal] tag-push-merge: uses: ./.github/workflows/docker-tag-push-merge.yml with: image: ${{ matrix.image }} variant: ${{ matrix.variant }} secrets: REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} strategy: matrix: image: [ docker-stacks-foundation, base-notebook, minimal-notebook, scipy-notebook, r-notebook, julia-notebook, tensorflow-notebook, pytorch-notebook, datascience-notebook, pyspark-notebook, all-spark-notebook, ] variant: [default] include: - image: tensorflow-notebook variant: cuda - image: pytorch-notebook variant: cuda12 - image: pytorch-notebook variant: cuda13 needs: [ contributed-recipes, aarch64-foundation, aarch64-base, aarch64-minimal, aarch64-scipy, aarch64-r, aarch64-julia, aarch64-tensorflow, aarch64-tensorflow-cuda, aarch64-pytorch, aarch64-pytorch-cuda12, aarch64-pytorch-cuda13, aarch64-datascience, aarch64-pyspark, aarch64-all-spark, x86_64-foundation, x86_64-base, x86_64-minimal, x86_64-scipy, x86_64-r, x86_64-julia, x86_64-tensorflow, x86_64-tensorflow-cuda, x86_64-pytorch, x86_64-pytorch-cuda12, x86_64-pytorch-cuda13, x86_64-datascience, x86_64-pyspark, x86_64-all-spark, ] if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} tag-push-merge-fast: uses: ./.github/workflows/docker-tag-push-merge.yml with: image: ${{ matrix.image }} variant: ${{ matrix.variant }} secrets: REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} strategy: matrix: image: [docker-stacks-foundation, base-notebook] variant: [default] needs: [aarch64-foundation, aarch64-base, x86_64-foundation, x86_64-base] if: contains(github.event.pull_request.title, '[FAST_BUILD]') wiki-update: uses: ./.github/workflows/docker-wiki-update.yml needs: tag-push-merge if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} permissions: contents: write wiki-update-fast: uses: ./.github/workflows/docker-wiki-update.yml needs: tag-push-merge-fast if: contains(github.event.pull_request.title, '[FAST_BUILD]') permissions: contents: write ================================================ FILE: .github/workflows/pre-commit.yml ================================================ name: Run pre-commit hooks on: pull_request: push: branches: - main workflow_dispatch: permissions: contents: read jobs: run-hooks: runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set Up Python 🐍 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: 3.12 - name: Install pre-commit 📦 run: | pip install --upgrade pip pip install --upgrade pre-commit - name: Run pre-commit hooks ✅ run: pre-commit run --all-files --hook-stage manual ================================================ FILE: .github/workflows/registry-move.yml ================================================ name: Move some images from Docker Hub to Quay.io env: OWNER: ${{ github.repository_owner }} PUSH_TO_REGISTRY: ${{ (github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru') && (github.ref == 'refs/heads/main') }} on: pull_request: paths: - ".github/workflows/registry-move.yml" push: branches: - main paths: - ".github/workflows/registry-move.yml" workflow_dispatch: permissions: contents: read jobs: registry-move: # To be able to use the latest skopeo runs-on: macos-latest timeout-minutes: 5 if: github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru' steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install skopeo and Docker 📦 run: | brew install skopeo brew install --cask docker - name: Login to Quay.io 🔐 if: env.PUSH_TO_REGISTRY == 'true' run: | skopeo login quay.io \ --username ${{ secrets.QUAY_USERNAME }} \ --password ${{ secrets.QUAY_ROBOT_TOKEN }} - name: Move image from Docker Hub to Quay.io 🐳 if: env.PUSH_TO_REGISTRY == 'true' && matrix.tag != 'tag-for-testing' run: | skopeo copy \ --multi-arch all \ docker://${{ env.OWNER }}/${{ matrix.image }}:${{ matrix.tag }} \ docker://quay.io/${{ env.OWNER }}/${{ matrix.image }}:${{ matrix.tag }} strategy: fail-fast: false matrix: image: [ docker-stacks-foundation, base-notebook, minimal-notebook, scipy-notebook, r-notebook, julia-notebook, tensorflow-notebook, pytorch-notebook, datascience-notebook, pyspark-notebook, all-spark-notebook, ] tag: [tag-for-testing] ================================================ FILE: .github/workflows/registry-overviews.yml ================================================ name: Update Registry overviews env: OWNER: ${{ github.repository_owner }} on: push: branches: - main paths: - ".github/workflows/registry-overviews.yml" - "images/*/README.md" workflow_dispatch: permissions: contents: read jobs: update-overview: runs-on: ubuntu-24.04 timeout-minutes: 1 if: github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru' steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Push README to Registry 🐳 uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 env: DOCKER_APIKEY: ${{ secrets.APIKEY__QUAY_IO }} with: destination_container_repo: quay.io/${{ env.OWNER }}/${{ matrix.image }} provider: quay readme_file: images/${{ matrix.image }}/README.md strategy: matrix: image: [ docker-stacks-foundation, base-notebook, minimal-notebook, scipy-notebook, r-notebook, julia-notebook, tensorflow-notebook, pytorch-notebook, datascience-notebook, pyspark-notebook, all-spark-notebook, ] ================================================ FILE: .github/workflows/sphinx.yml ================================================ name: Build Sphinx Documentation and check links on: schedule: # Weekly, at 03:00 on Monday UTC - cron: "0 3 * * 1" pull_request: paths: - ".github/workflows/sphinx.yml" - "Makefile" - "docs/**" # These files are also rendered as docs pages - "README.md" - "CHANGELOG.md" # These files are used to generate some code snippets in the docs - "tagging/manifests/apt_packages.py" - "tagging/manifests/manifest_interface.py" - "tagging/taggers/sha.py" - "tagging/taggers/tagger_interface.py" push: branches: - main paths: - ".github/workflows/sphinx.yml" - "Makefile" - "docs/**" - "README.md" - "CHANGELOG.md" - "tagging/manifests/apt_packages.py" - "tagging/manifests/manifest_interface.py" - "tagging/taggers/sha.py" - "tagging/taggers/tagger_interface.py" workflow_dispatch: permissions: contents: read jobs: build-docs: runs-on: ubuntu-24.04 timeout-minutes: 10 if: github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru' || github.event_name != 'schedule' steps: - name: Checkout Repo ⚡️ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - name: Set Up Python 🐍 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: 3.12 - name: Install Doc Dependencies 📦 run: | pip install --upgrade pip pip install --upgrade -r docs/requirements.txt - name: Build Documentation 📖 run: make docs - name: Check Documentation URLs 🔗 run: make linkcheck-docs || make linkcheck-docs ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # UV # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. #uv.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc ################################################################## # The content above is copied from # # https://github.com/github/gitignore/blob/main/Python.gitignore # # Please, add the content only below these lines # ################################################################## # Mac OS X .DS_Store # VS Code project configuration .vscode/ # PyCharm project configuration .idea/ ================================================ FILE: .hadolint.yaml ================================================ --- ignored: - DL3006 - DL3008 - DL3013 ================================================ FILE: .markdownlint.yaml ================================================ # Default state for all rules default: true # MD013/line-length - Line length MD013: # Number of characters line_length: 200 tables: false ================================================ FILE: .pre-commit-config.yaml ================================================ --- # pre-commit is a tool to perform a predefined set of tasks manually and/or # automatically before git commits are made. # # Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level # # Common tasks # # - Run on all files: pre-commit run --all-files # - Register git hooks: pre-commit install --install-hooks # # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks exclude: ^LICENSE.md$ repos: # Autoupdate: Python code - repo: https://github.com/asottile/pyupgrade rev: 75992aaa40730136014f34227e0135f63fc951b4 # frozen: v3.21.2 hooks: - id: pyupgrade args: [--py312-plus] # Automatically sort python imports - repo: https://github.com/PyCQA/isort rev: a333737ed43df02b18e6c95477ea1b285b3de15a # frozen: 8.0.1 hooks: - id: isort args: [--profile, black] # Autoformat: Python code - repo: https://github.com/psf/black-pre-commit-mirror rev: ea488cebbfd88a5f50b8bd95d5c829d0bb76feb8 # frozen: 26.1.0 hooks: - id: black args: [--target-version=py312] # Check python code static typing - repo: https://github.com/pre-commit/mirrors-mypy rev: a66e98df7b4aeeb3724184b332785976d062b92e # frozen: v1.19.1 hooks: - id: mypy args: [--config, ./mypy.ini] additional_dependencies: [ "beautifulsoup4", "numpy", "pytest", "requests", "tenacity", "urllib3", "types-beautifulsoup4", "types-python-dateutil", "types-requests", "types-tabulate", "types-urllib3", ] # Unfortunately, `pre-commit` only runs on modified files # This doesn't work well with `mypy --follow-imports error` # See: https://github.com/pre-commit/mirrors-mypy/issues/34#issuecomment-1062160321 # # To work around this we run `mypy` only in manual mode # So it won't run as part of `git commit` command, # but it will still be run as part of `pre-commit` workflow and give expected results stages: [manual] # Autoformat: YAML, JSON, Markdown, etc. - repo: https://github.com/rbubley/mirrors-prettier rev: c2bc67fe8f8f549cc489e00ba8b45aa18ee713b1 # frozen: v3.8.1 hooks: - id: prettier # `pre-commit sample-config` default hooks - repo: https://github.com/pre-commit/pre-commit-hooks rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0 hooks: - id: check-added-large-files - id: check-executables-have-shebangs - id: check-shebang-scripts-are-executable - id: end-of-file-fixer - id: requirements-txt-fixer - id: trailing-whitespace # Lint: Dockerfile - repo: https://github.com/hadolint/hadolint rev: 4e697ba704fd23b2409b947a319c19c3ee54d24f # frozen: v2.14.0 hooks: - id: hadolint-docker entry: hadolint/hadolint:v2.14.0 hadolint # Lint: Dockerfile # We're linting .dockerfile files as well - repo: https://github.com/hadolint/hadolint rev: 4e697ba704fd23b2409b947a319c19c3ee54d24f # frozen: v2.14.0 hooks: - id: hadolint-docker name: Lint *.dockerfile Dockerfiles entry: hadolint/hadolint:v2.12.1-beta hadolint types: [file] files: \.dockerfile$ # Lint: YAML - repo: https://github.com/adrienverge/yamllint rev: cba56bcde1fdd01c1deb3f945e69764c291a6530 # frozen: v1.38.0 hooks: - id: yamllint args: ["-d {extends: relaxed, rules: {line-length: disable}}", "-s"] # Lint: Bash scripts - repo: https://github.com/openstack/bashate rev: 5798d24d571676fc407e81df574c1ef57b520f23 # frozen: 2.1.1 hooks: - id: bashate args: ["--ignore=E006"] # Lint: Shell scripts - repo: https://github.com/shellcheck-py/shellcheck-py rev: 745eface02aef23e168a8afb6b5737818efbea95 # frozen: v0.11.0.1 hooks: - id: shellcheck args: ["-x"] # Lint: Python - repo: https://github.com/PyCQA/flake8 rev: d93590f5be797aabb60e3b09f2f52dddb02f349f # frozen: 7.3.0 hooks: - id: flake8 # Lint: Markdown - repo: https://github.com/DavidAnson/markdownlint-cli2 rev: 5387279b3b4c24822c0f86d4df4f28b37e3e8992 # frozen: v0.21.0 hooks: - id: markdownlint-cli2 args: [--fix] # Strip output from Jupyter notebooks - repo: https://github.com/kynan/nbstripout rev: f5da19ce3b7b40e97c12ee9cd8ce97f48f97ddf7 # frozen: 0.9.1 hooks: - id: nbstripout # nbQA provides tools from the Python ecosystem like # pyupgrade, isort, black, and flake8, adjusted for notebooks. - repo: https://github.com/nbQA-dev/nbQA rev: f96ec7f3b26a32619435686eb5813235f7e3327e # frozen: 1.9.1 hooks: - id: nbqa-pyupgrade args: [--py312-plus] - id: nbqa-isort - id: nbqa-black args: [--target-version=py312] - id: nbqa-flake8 # Run black on python code blocks in documentation files. - repo: https://github.com/adamchainz/blacken-docs rev: dda8db18cfc68df532abf33b185ecd12d5b7b326 # frozen: 1.20.0 hooks: - id: blacken-docs # --skip-errors is added to allow us to have python syntax highlighting even if # the python code blocks include jupyter-specific additions such as % or ! # See https://github.com/adamchainz/blacken-docs/issues/127 for an upstream # feature request about this. args: [--target-version=py312, --skip-errors] # pre-commit.ci config reference: https://pre-commit.ci/#configuration ci: autoupdate_schedule: monthly # Docker hooks do not work in pre-commit.ci # See: skip: [hadolint-docker] ================================================ FILE: .readthedocs.yaml ================================================ # Read the Docs configuration file for Sphinx projects # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the OS, Python version and other tools you might need build: os: ubuntu-22.04 tools: python: "3.12" # You can also specify other tool versions: # nodejs: "20" # rust: "1.70" # golang: "1.20" jobs: post_checkout: - git fetch --unshallow || true # Build documentation in the "docs/" directory with Sphinx sphinx: configuration: docs/conf.py # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs # builder: "dirhtml" # Fail on all warnings to avoid broken references # fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub # formats: # - pdf # - epub # Optional but recommended, declare the Python requirements required # to build your documentation # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html python: install: - requirements: docs/requirements.txt ================================================ FILE: CHANGELOG.md ================================================ # Changelog This changelog only contains breaking and/or significant changes manually introduced to this repository (using Pull Requests). All image manifests can be found in [the wiki](https://github.com/jupyter/docker-stacks/wiki). ## 2025-12-31 Affected: `pytorch-notebook`. - **Non-breaking:** `pytorch-notebook`: Build pytorch cuda13 image instead of cuda11 ([#2391](https://github.com/jupyter/docker-stacks/pull/2391)) ## 2025-12-02 Affected: `tensorflow-notebook`, `pytorch-notebook`. - **Non-breaking:** Enable CUDA build for ARM64 ([#2352](https://github.com/jupyter/docker-stacks/pull/2352)). ## 2025-11-29 Affected: all images. - **Breaking:** Use Docker v29 and `docker buildx imagetools create` ([#2368](https://github.com/jupyter/docker-stacks/pull/2368)). ## 2025-11-24 Affected: all images. - **Non-breaking:** Add Dev Container support ([#2358](https://github.com/jupyter/docker-stacks/pull/2358)). - **Non-breaking:** Add recipe on running Jupyter Docker Stacks with Singularity ([#2357](https://github.com/jupyter/docker-stacks/pull/2357)). ## 2025-11-06 Affected: `scipy-notebook`. - **Breaking:** `scipy-notebook`: Remove facets package installation ([#2347](https://github.com/jupyter/docker-stacks/pull/2347)). ## 2025-09-16 Affected: all images. - **Non-breaking:** Publish SBOM using anchore/sbom-action ([#2317](https://github.com/jupyter/docker-stacks/pull/2317)). ## 2025-08-15 Affected: all images. - **Breaking:** `docker-stacks-foundation`: switch to Python 3.13 ([#2163](https://github.com/jupyter/docker-stacks/pull/2163)). ## 2025-04-13 Affected: `tensorflow-notebook`. - **Non-breaking:** `tesnorflow-notebook`: Install latest tensorflow ([#2263](https://github.com/jupyter/docker-stacks/pull/2263)). ## 2025-04-12 Affected: all images. - **Non-breaking:** `docker-stacks-foundation`: Pin libxml2 to avoid ABI breakage ([#2283](https://github.com/jupyter/docker-stacks/pull/2283)). ## 2025-04-11 Affected: all images. - **Non-breaking:** Make docker tag-push depend on contributed recipes in CI ([#2282](https://github.com/jupyter/docker-stacks/pull/2282)). ## 2025-04-01 Affected: all images. - **Non-breaking:** Apply and merge tags in the same place ([#2274](https://github.com/jupyter/docker-stacks/pull/2274)). ## 2025-03-23 Affected: `tensorflow-notebook`. - **Non-breaking:** `tensorflow-notebook`: Use mamba to install jupyter-server-proxy ([#2262](https://github.com/jupyter/docker-stacks/pull/2262)). ## 2025-03-22 Affected: all images. - **Non-breaking:** Use tty for running docker commands by default ([#2260](https://github.com/jupyter/docker-stacks/pull/2260)). - **Non-breaking:** Improve logs around running docker ([#2261](https://github.com/jupyter/docker-stacks/pull/2261)). ## 2025-03-21 Affected: all images. - **Non-breaking:** Refactor TrackedContainer run_detached/exec_cmd functions ([#2256](https://github.com/jupyter/docker-stacks/pull/2256)). - **Non-breaking:** Do not allocate TTY in tests if not needed ([#2257](https://github.com/jupyter/docker-stacks/pull/2257)). - **Non-breaking:** `base-notebook`: Flush output in Python before running execvp ([#2258](https://github.com/jupyter/docker-stacks/pull/2258)). ## 2025-03-20 Affected: all images except `docker-stacks-foundation`. - **Non-breaking:** `base-notebook`: Refactor healthcheck tests to use one function ([#2254](https://github.com/jupyter/docker-stacks/pull/2254)). - **Non-breaking:** `base-notebook`: Test server listening on IPv4/IPv6 ([#2255](https://github.com/jupyter/docker-stacks/pull/2255)). ## 2025-03-12 Affected: all images. - **Non-breaking:** Add `conda` and `mamba` version taggers ([#2251](https://github.com/jupyter/docker-stacks/pull/2251)). - **Non-breaking:** Make taggers and manifests functions ([#2252](https://github.com/jupyter/docker-stacks/pull/2252)). ## 2025-02-21 Affected: all images. - **Non-breaking:** Better tagging directory structure ([#2228](https://github.com/jupyter/docker-stacks/pull/2228)). - **Non-breaking:** Better testing directory structure ([#2231](https://github.com/jupyter/docker-stacks/pull/2231)). ## 2025-02-18 Affected: all images. - **Non-breaking:** switch from `ubuntu-22.04-arm` to `ubuntu-24.04-arm` runners ([#2209](https://github.com/jupyter/docker-stacks/pull/2209)). - **Non-breaking:** don't create extra free space in runners for cuda images ([#2218](https://github.com/jupyter/docker-stacks/pull/2218)). - **Non-breaking:** revert "Pin some packages to fix `r-notebook` and `datascience-notebook` under aarch64" ([#2220](https://github.com/jupyter/docker-stacks/pull/2220)). - **Non-breaking:** Simplify and improve `test_packages.py` ([#2219](https://github.com/jupyter/docker-stacks/pull/2219)). - **Non-breaking:** Use Python 3.12 for internal code ([#2222](https://github.com/jupyter/docker-stacks/pull/2222)). ## 2025-02-17 Affected: all images. - **Non-breaking:** build contributed recipes in PRs ([#2212](https://github.com/jupyter/docker-stacks/pull/2212), [#2213](https://github.com/jupyter/docker-stacks/pull/2213)). - **Non-breaking:** remove information about Docker Hub images from Quay.io READMEs ([#2211](https://github.com/jupyter/docker-stacks/pull/2211)). - **Non-breaking:** first upload artifacts and then run tests to make sure we can easily debug broken images ([#2214](https://github.com/jupyter/docker-stacks/pull/2214)). - **Non-Breaking:** aarch64 `r-notebook`, `datascience-notebook`: pin some packages to fix `r-notebook` and `datascience-notebook` under aarch64 ([#2215](https://github.com/jupyter/docker-stacks/pull/2215)). - **Non-breaking:** don't use matrix.image-variant, use 2 separate variables ([#2217](https://github.com/jupyter/docker-stacks/pull/2217)). ## 2025-02-11 Affected: all images. - **Non-breaking:** start using `ubuntu-22.04-arm` GitHub-hosted `aarch64` runners ([#2202](https://github.com/jupyter/docker-stacks/pull/2202)). ## 2024-12-03 Affected: all images. - **Breaking:** `docker-stacks-foundation`: switch to `mamba` v2 ([#2147](https://github.com/jupyter/docker-stacks/pull/2147)). More information about changes made: . ## 2024-11-08 Affected: all images except `docker-stacks-foundation`. - **Breaking:** `base-notebook`: stop installing `nodejs` from `conda-forge` ([#2172](https://github.com/jupyter/docker-stacks/pull/2172)). Reason: It isn't a direct dependency on anything in the images anymore, and increased the image size by ~150MB. ## 2024-11-06 Affected: all images except `docker-stacks-foundation`. - **Non-breaking:** `base-notebook`: install `jupyterhub-base` and `nodejs` packages instead of `jupyterhub` package ([#2171](https://github.com/jupyter/docker-stacks/pull/2171)). ## 2024-10-23 Affected: all images. - **Breaking:** `docker-stacks-foundation`: switch to Python 3.12 ([#2072](https://github.com/jupyter/docker-stacks/pull/2072)). ## 2024-10-22 Affected: `pyspark-notebook` and `all-spark-notebook` images. - **Breaking:** `pyspark-notebook`: start using Spark 4.0.0 preview versions ([#2159](https://github.com/jupyter/docker-stacks/pull/2159)). `sparklyr` doesn't seem to support Spark v4 yet when using Spark locally. Reason: Spark v3 is not compatible with Python 3.12, and [the voting group has decided](https://github.com/jupyter/docker-stacks/pull/2072#issuecomment-2414123851) to switch to Spark v4 preview version. ## 2024-10-09 Affected: users building a custom set of images. - **Breaking:** rename: `ROOT_CONTAINER`->`ROOT_IMAGE`, `BASE_CONTAINER`->`BASE_IMAGE` ([#2154](https://github.com/jupyter/docker-stacks/issues/2154), [#2155](https://github.com/jupyter/docker-stacks/pull/2155)). ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Project `jupyter/docker-stacks` Code of Conduct Please see the [Project Jupyter Code of Conduct](https://github.com/jupyter/governance/blob/HEAD/conduct/code_of_conduct.md). ================================================ FILE: CONTRIBUTING.md ================================================ Thanks for contributing! Please see the **Contributor Guide** section in [the documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/) for information about how to contribute [issues](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/issues.html), [features](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/features.html), [recipes](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/recipes.html), [tests](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/tests.html), and [community-maintained stacks](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/stacks.html). ## Our Copyright Policy Jupyter uses a shared copyright model. Each contributor maintains copyright over their contributions to Jupyter. But, it is important to note that these contributions are typically only changes to the repositories. Thus, the Jupyter source code, in its entirety is not the copyright of any single person or institution. Instead, it is the collective copyright of the entire Jupyter Development Team. If individual contributors want to maintain a record of what changes/contributions they have specific copyright on, they should indicate their copyright in the commit message of the change, when they commit the change to one of the Jupyter repositories. With this in mind, the following banner should be used in any source code file to indicate the copyright and license terms: ```text # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ``` ================================================ FILE: LICENSE.md ================================================ BSD 3-Clause License Copyright (c) 2001-2015, IPython Development Team Copyright (c) 2015-, Jupyter Development Team Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: Makefile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. .PHONY: docs help test SHELL:=bash REGISTRY?=quay.io OWNER?=jupyter # Enable BuildKit for Docker build export DOCKER_BUILDKIT:=1 # All the images listed in the build dependency order ALL_IMAGES:= \ docker-stacks-foundation \ base-notebook \ minimal-notebook \ scipy-notebook \ r-notebook \ julia-notebook \ tensorflow-notebook \ pytorch-notebook \ datascience-notebook \ pyspark-notebook \ all-spark-notebook # https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html help: @echo "jupyter/docker-stacks" @echo "=====================" @echo "Replace % with a stack directory name (e.g., make build/minimal-notebook)" @echo @grep -E '^[a-zA-Z0-9_%/-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' # Note that `ROOT_IMAGE` and `PYTHON_VERSION` arguments are only applicable to the `docker-stacks-foundation` image build/%: DOCKER_BUILD_ARGS?= build/%: ROOT_IMAGE?=ubuntu:24.04 build/%: PYTHON_VERSION?=3.13 build/%: ## build the latest image for a stack using the system's architecture docker build $(DOCKER_BUILD_ARGS) --rm --force-rm \ --tag "$(REGISTRY)/$(OWNER)/$(notdir $@)" \ "./images/$(notdir $@)" \ --build-arg REGISTRY="$(REGISTRY)" \ --build-arg OWNER="$(OWNER)" \ --build-arg ROOT_IMAGE="$(ROOT_IMAGE)" \ --build-arg PYTHON_VERSION="$(PYTHON_VERSION)" @echo -n "Built image size: " @docker images "$(REGISTRY)/$(OWNER)/$(notdir $@):latest" --format "{{.Size}}" build-all: $(foreach I, $(ALL_IMAGES), build/$(I)) ## build all stacks check-outdated/%: ## check the outdated mamba/conda packages in a stack and produce a report pytest tests/by_image/docker-stacks-foundation/test_outdated.py \ --registry "$(REGISTRY)" \ --owner "$(OWNER)" \ --image "$(notdir $@)" check-outdated-all: $(foreach I, $(ALL_IMAGES), check-outdated/$(I)) ## check all the stacks for outdated packages cont-stop-all: ## stop all containers @echo "Stopping all containers ..." -docker stop --time 0 $(shell docker ps --all --quiet) 2> /dev/null cont-rm-all: ## remove all containers @echo "Removing all containers ..." -docker rm --force $(shell docker ps --all --quiet) 2> /dev/null cont-clean-all: cont-stop-all cont-rm-all ## clean all containers (stop + rm) docs: ## build HTML documentation sphinx-build -W --keep-going --color docs/ docs/_build/ linkcheck-docs: ## check for broken links sphinx-build -W --keep-going --color -b linkcheck docs/ docs/_build/ hook/%: VARIANT?=default hook/%: REPOSITORY?=$(OWNER)/docker-stacks hook/%: ## run post-build hooks for an image python3 -m tagging.apps.write_tags_file \ --registry "$(REGISTRY)" \ --owner "$(OWNER)" \ --image "$(notdir $@)" \ --variant "$(VARIANT)" \ --tags-dir /tmp/jupyter/tags/ python3 -m tagging.apps.write_manifest \ --registry "$(REGISTRY)" \ --owner "$(OWNER)" \ --image "$(notdir $@)" \ --variant "$(VARIANT)" \ --hist-lines-dir /tmp/jupyter/hist_lines/ \ --manifests-dir /tmp/jupyter/manifests/ \ --repository "$(REPOSITORY)" python3 -m tagging.apps.apply_tags \ --registry "$(REGISTRY)" \ --owner "$(OWNER)" \ --image "$(notdir $@)" \ --variant "$(VARIANT)" \ --platform "$(shell uname -m)" \ --tags-dir /tmp/jupyter/tags/ hook-all: $(foreach I, $(ALL_IMAGES), hook/$(I)) ## run post-build hooks for all images img-list: ## list jupyter images @echo "Listing $(OWNER) images ..." docker images "$(OWNER)/*" docker images "*/$(OWNER)/*" img-rm-dang: ## remove dangling images (tagged None) @echo "Removing dangling images ..." -docker rmi --force $(shell docker images -f "dangling=true" --quiet) 2> /dev/null img-rm-jupyter: ## remove jupyter images @echo "Removing $(OWNER) images ..." -docker rmi --force $(shell docker images --quiet "$(OWNER)/*") 2> /dev/null -docker rmi --force $(shell docker images --quiet "*/$(OWNER)/*") 2> /dev/null img-rm: img-rm-dang img-rm-jupyter ## remove dangling and jupyter images pull/%: ## pull a jupyter image docker pull "$(REGISTRY)/$(OWNER)/$(notdir $@)" pull-all: $(foreach I, $(ALL_IMAGES), pull/$(I)) ## pull all images push/%: ## push all tags for a jupyter image docker push --all-tags "$(REGISTRY)/$(OWNER)/$(notdir $@)" push-all: $(foreach I, $(ALL_IMAGES), push/$(I)) ## push all tagged images run-shell/%: ## run a bash in interactive mode in a stack docker run -it --rm "$(REGISTRY)/$(OWNER)/$(notdir $@)" $(SHELL) run-sudo-shell/%: ## run bash in interactive mode as root in a stack docker run -it --rm --user root "$(REGISTRY)/$(OWNER)/$(notdir $@)" $(SHELL) test/%: ## run tests against a stack python3 -m tests.run_tests \ --registry "$(REGISTRY)" \ --owner "$(OWNER)" \ --image "$(notdir $@)" test-all: $(foreach I, $(ALL_IMAGES), test/$(I)) ## test all stacks ================================================ FILE: README.md ================================================ # Jupyter Docker Stacks [![GitHub Actions badge](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml/badge.svg)](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml?query=branch%3Amain "Docker images build status") [![Read the Docs badge](https://img.shields.io/readthedocs/jupyter-docker-stacks.svg)](https://jupyter-docker-stacks.readthedocs.io/en/latest/ "Documentation build status") [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/jupyter/docker-stacks/main.svg)](https://results.pre-commit.ci/latest/github/jupyter/docker-stacks/main "pre-commit.ci build status") [![Discourse badge](https://img.shields.io/discourse/users.svg?color=%23f37626&server=https%3A%2F%2Fdiscourse.jupyter.org)](https://discourse.jupyter.org/ "Jupyter Discourse Forum") [![Binder badge](https://static.mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jupyter/docker-stacks/main?urlpath=lab/tree/README.ipynb "Launch a quay.io/jupyter/base-notebook container on mybinder.org") Jupyter Docker Stacks are a set of ready-to-run [Docker images](https://quay.io/organization/jupyter) containing Jupyter applications and interactive computing tools. You can use a stack image to do any of the following (and more): - Start a personal Jupyter Server with the JupyterLab frontend (default) - Run JupyterLab for a team using JupyterHub - Start a personal Jupyter Server with the Jupyter Notebook frontend in a local Docker container - Write your own project Dockerfile ## Quick Start You can [try the quay.io/jupyter/base-notebook image](https://mybinder.org/v2/gh/jupyter/docker-stacks/main?urlpath=lab/tree/README.ipynb) on . Otherwise, the examples below may help you get started if you [have Docker installed](https://docs.docker.com/get-started/get-docker/), know [which Docker image](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html) you want to use, and want to launch a single Jupyter Application in a container. The [User Guide on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/) describes additional uses and features in detail. ```{note} Since `2023-10-20` our images are only pushed to `Quay.io` registry. Older images are available on Docker Hub, but they will no longer be updated. ``` ### Example 1 This command pulls the `jupyter/scipy-notebook` image tagged `2025-12-31` from Quay.io if it is not already present on the local host. It then starts a container running a Jupyter Server with the JupyterLab frontend and exposes the container's internal port `8888` to port `10000` of the host machine: ```bash docker run -p 10000:8888 quay.io/jupyter/scipy-notebook:2025-12-31 ``` You can modify the port on which the container's port is exposed by [changing the value of the `-p` option](https://docs.docker.com/engine/containers/run/#exposed-ports) to `-p 8888:8888`. Visiting `http://:10000/?token=` in a browser loads JupyterLab, where: - The `hostname` is the name of the computer running Docker - The `token` is the secret token printed in the console. The container remains intact for restart after the Server exits. ### Example 2 This command pulls the `jupyter/datascience-notebook` image tagged `2025-12-31` from Quay.io if it is not already present on the local host. It then starts an _ephemeral_ container running a Jupyter Server with the JupyterLab frontend and exposes the server on host port 10000. ```bash docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work quay.io/jupyter/datascience-notebook:2025-12-31 ``` The use of the `-v` flag in the command mounts the current working directory on the host (`${PWD}` in the example command) as `/home/jovyan/work` in the container. The server logs appear in the terminal. Visiting `http://:10000/?token=` in a browser loads JupyterLab. Due to the usage of [the `--rm` flag](https://docs.docker.com/reference/cli/docker/container/run/#rm) Docker automatically cleans up the container and removes the file system when the container exits, but any changes made to the `~/work` directory and its files in the container will remain intact on the host. [The `-i` flag](https://docs.docker.com/reference/cli/docker/container/run/#interactive) keeps the container's `STDIN` open, and lets you send input to the container through standard input. [The `-t` flag](https://docs.docker.com/reference/cli/docker/container/run/#tty) attaches a pseudo-TTY to the container. ```{note} By default, [jupyter's root_dir](https://jupyter-server.readthedocs.io/en/latest/other/full-config.html) is `/home/jovyan`. So, new notebooks will be saved there, unless you change the directory in the file browser. To change the default directory, you must specify `ServerApp.root_dir` by adding this line to the previous command: `start-notebook.py --ServerApp.root_dir=/home/jovyan/work`. ``` ## Choosing Jupyter frontend JupyterLab is the default for all the Jupyter Docker Stacks images. It is still possible to switch back to Jupyter Notebook (or to launch a different startup command). You can achieve this by passing the environment variable `DOCKER_STACKS_JUPYTER_CMD=notebook` (or any other valid `jupyter` subcommand) at container startup; more information is available in the [documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html#alternative-commands). ## Resources - [Documentation on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/) - [Issue Tracker on GitHub](https://github.com/jupyter/docker-stacks/issues) - [Jupyter Discourse Forum](https://discourse.jupyter.org/) - [Jupyter Website](https://jupyter.org) - [Images on Quay.io](https://quay.io/organization/jupyter) ## Acknowledgments - Starting from `2022-07-05`, `aarch64` self-hosted runners were sponsored by [`@mathbunnyru`](https://github.com/mathbunnyru/). Please, consider [sponsoring his work](https://github.com/sponsors/mathbunnyru) on GitHub - Starting from `2023-10-31`, `aarch64` self-hosted runners are sponsored by an amazing [`2i2c non-profit organization`](https://2i2c.org) - Starting from `2025-02-11`, we use GitHub-hosted `aarch64` runners ## CPU Architectures - We publish containers for both `x86_64` and `aarch64` platforms - Single-platform images have either `aarch64-` or `x86_64-` tag prefixes, for example, `quay.io/jupyter/base-notebook:aarch64-python-3.11.6` - Starting from `2022-09-21`, we create multi-platform images (except `tensorflow-notebook`) - Starting from `2023-06-01`, we create a multi-platform `tensorflow-notebook` image as well - Starting from `2024-02-24`, we create CUDA enabled variants of `pytorch-notebook` image for `x86_64` platform - Starting from `2024-03-26`, we create CUDA enabled variant of `tensorflow-notebook` image for `x86_64` platform ## Using old images [![Python versions badge](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://www.python.org/downloads/ "Python versions supported") This project only builds one set of images at a time. If you want to use the older `Ubuntu` and/or `Python` version, you can use the following images: | Build Date | Ubuntu | Python | Tag | | ------------ | ------ | ------ | -------------- | | 2022-10-09 | 20.04 | 3.7 | `1aac87eb7fa5` | | 2022-10-09 | 20.04 | 3.8 | `a374cab4fcb6` | | 2022-10-09 | 20.04 | 3.9 | `5ae537728c69` | | 2022-10-09 | 20.04 | 3.10 | `f3079808ca8c` | | 2022-10-09 | 22.04 | 3.7 | `b86753318aa1` | | 2022-10-09 | 22.04 | 3.8 | `7285848c0a11` | | 2022-10-09 | 22.04 | 3.9 | `ed2908bbb62e` | | 2023-05-30 | 22.04 | 3.10 | `4d70cf8da953` | | 2024-08-26 | 22.04 | 3.11 | `00987883e58d` | | 2024-10-22 | 24.04 | 3.11 | `b74418220768` | | 2025-08-11 | 24.04 | 3.12 | `82d322f00937` | | weekly build | 24.04 | 3.13 | `latest` | ## Contributing Please see the [the documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/) for information about how to contribute [issues](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/issues.html), [features](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/features.html), [recipes](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/recipes.html), [tests](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/tests.html), and [community-maintained stacks](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/stacks.html). ## LICENSE This project is licensed under the terms of the Modified BSD License (also known as New or Revised or 3-Clause BSD). ## About the Jupyter Development Team The Jupyter Development Team is the set of all contributors to the Jupyter project. This includes all of the Jupyter subprojects. The core team that coordinates development on GitHub can be found here: . ## Our Copyright Policy Jupyter uses a shared copyright model. Each contributor maintains copyright over their contributions to Jupyter. But, it is important to note that these contributions are typically only changes to the repositories. Thus, the Jupyter source code, in its entirety is not the copyright of any single person or institution. Instead, it is the collective copyright of the entire Jupyter Development Team. If individual contributors want to maintain a record of what changes/contributions they have specific copyright on, they should indicate their copyright in the commit message of the change, when they commit the change to one of the Jupyter repositories. With this in mind, the following banner should be used in any source code file to indicate the copyright and license terms: ```text # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ``` ## Alternatives - [b-data](https://github.com/b-data)'s JupyterLab docker stacks - For [R](https://github.com/b-data/jupyterlab-r-docker-stack), [Python](https://github.com/b-data/jupyterlab-python-docker-stack), [MAX/Mojo](https://github.com/b-data/jupyterlab-mojo-docker-stack) and [Julia](https://github.com/b-data/jupyterlab-julia-docker-stack). With [code-server](https://github.com/coder/code-server) next to JupyterLab. Just Python – no [Conda](https://github.com/conda/conda) / [Mamba](https://github.com/mamba-org/mamba). - [rocker/binder](https://rocker-project.org/images/versioned/binder.html) - From the R focused [rocker-project](https://rocker-project.org), lets you run both RStudio and Jupyter either standalone or in a JupyterHub - [jupyter/repo2docker](https://github.com/jupyterhub/repo2docker) - Turn git repositories into Jupyter-enabled Docker Images - [openshift/source-to-image](https://github.com/openshift/source-to-image) - A tool for building artifacts from source code and injecting them into docker images - [jupyter-on-openshift/jupyter-notebooks](https://github.com/jupyter-on-openshift/jupyter-notebooks) - OpenShift compatible S2I builder for basic notebook images ================================================ FILE: SECURITY.md ================================================ # Security Policy ## Supported Versions Jupyter Docker Stacks only provides security updates for the latest version of each image. ## Reporting a Vulnerability The Jupyter Vulnerability Handling Process is described in detail in the [security documentation](https://github.com/jupyter/security/blob/main/docs/vulnerability-handling.md). ================================================ FILE: binder/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # https://quay.io/repository/jupyter/base-notebook?tab=tags ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/base-notebook:2025-12-31 FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] ENV TAG="2025-12-31" COPY --chown=${NB_UID}:${NB_GID} binder/README.ipynb "${HOME}"/README.ipynb RUN jupyter labextension disable "@jupyterlab/apputils-extension:announcements" ================================================ FILE: binder/README.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# jupyter/base-notebook on Binder\n", "\n", "Run the cells below to inspect what's in the [jupyter/base-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-base-notebook) image from the Jupyter Docker Stacks project.\n", "\n", "You can launch the classic notebook interface in Binder by replacing `lab/tree/*` with `tree/` in the JupyterLab URL." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "print(\n", " f\"This container is using tag {os.environ['TAG']} of the jupyter/base-notebook image\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The Server is running as the following user." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!id" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here's the contents of that user's home directory, the default notebook directory for the server." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!ls -al" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`mamba` is available in the user's path." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!which mamba" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The user has read/write access to the root mamba environment." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!ls -l /opt/conda" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The following packages are mamba-installed in the base image to support [Jupyter Notebook](https://github.com/jupyter/notebook), [JupyterLab](https://github.com/jupyterlab/jupyterlab), and their use in [JupyterHub](https://github.com/jupyterhub/jupyterhub) environments (e.g., [MyBinder](https://mybinder.org/))." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!mamba list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Other images in the [jupyter/docker-stacks project](https://github.com/jupyter/docker-stacks) include additional libraries. See the [Jupyter Docker Stacks documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/) for full details." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: docs/conf.py ================================================ # Configuration file for the Sphinx documentation builder. # # For the full list of built-in configuration values, see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "docker-stacks" copyright = "2025, Project Jupyter" author = "Project Jupyter" version = "latest" release = "latest" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [] templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] language = "en" # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "alabaster" html_static_path = ["_static"] # The file above was generated using sphinx 8.1.3 with this command: # sphinx-quickstart --project "docker-stacks" --author "Project Jupyter" -v "latest" -r "latest" -l en --no-sep --no-makefile --no-batchfile # These are custom options for this project html_theme = "sphinx_book_theme" html_title = "Docker Stacks documentation" html_logo = "_static/jupyter-logo.svg" html_theme_options = { "logo": { "text": html_title, }, "navigation_with_keys": False, "path_to_docs": "docs", "repository_branch": "main", "repository_url": "https://github.com/jupyter/docker-stacks", "use_download_button": True, "use_edit_page_button": True, "use_issues_button": True, "use_repository_button": True, } html_last_updated_fmt = "%Y-%m-%d" extensions = ["myst_parser", "sphinx_copybutton", "sphinx_last_updated_by_git"] source_suffix = { ".rst": "restructuredtext", ".md": "markdown", } pygments_style = "sphinx" # MyST configuration reference: https://myst-parser.readthedocs.io/en/latest/configuration.html myst_heading_anchors = 3 linkcheck_ignore = [ r".*github\.com.*#", # javascript based anchors r"http://127\.0\.0\.1:.*", # various examples r"https://mybinder\.org/v2/gh/.*", # lots of 500 errors r"https://packages\.ubuntu\.com/search\?keywords=openjdk", # frequent read timeouts r"https://anaconda\.org\/conda-forge", # frequent read timeouts ] linkcheck_allowed_redirects = { r"https://results\.pre-commit\.ci/latest/github/jupyter/docker-stacks/main": r"https://results\.pre-commit\.ci/run/github/.*", # Latest main CI build r"https://github\.com/jupyter/docker-stacks/issues/new.*": r"https://github\.com/login.*", # GitHub wants user to be logon to use this features r"https://github\.com/orgs/jupyter/teams/docker-image-maintainers/members": r"https://github\.com/login.*", } ================================================ FILE: docs/contributing/features.md ================================================ # New Features Thank you for contributing to the Jupyter Docker Stacks! We review pull requests for new features (e.g., new packages, new scripts, new flags) to balance the value of the images to the Jupyter community with the cost of maintaining the images over time. ## Suggesting a New Feature Please follow the process below to suggest a new feature for inclusion in one of the core stacks: 1. Open a [GitHub feature request issue](https://github.com/jupyter/docker-stacks/issues/new?template=feature_request.yml) describing the feature you'd like to contribute. 2. Discuss with the maintainers whether the addition makes sense in [one of the core stacks](../using/selecting.md#core-stacks), as a [way to build a custom set of images](../using/custom-images.md), as a [recipe in the documentation](recipes.md), as a [community stack](stacks.md), or as something else entirely. ## Selection Criteria Roughly speaking, we evaluate new features based on the following criteria: - **Usefulness to Jupyter users**: Is the feature generally applicable across domains? Does it work with JupyterLab, Jupyter Notebook, JupyterHub, etc.? - **Fit with the image purpose**: Does the feature match the theme of the stack to which it will be added? Would it fit better in a new community stack? - **The complexity of build/runtime configuration**: How many lines of code does the feature require in one of the Dockerfiles or startup scripts? Does it require new scripts entirely? Do users need to adjust how they use the images? - **Impact on image metrics**: How many bytes does the feature and its dependencies add to the image(s)? How many minutes do they add to the build time? - **Ability to support the addition**: Can existing maintainers answer user questions and address future build issues? Are the contributors interested in helping with long-term maintenance? Can we write tests to ensure the feature continues to work over the years? ## Submitting a Pull Request If there's agreement that the feature belongs in one or more of the core stacks: 1. Implement the feature in a local clone of the `jupyter/docker-stacks` project. 2. Please, build the image locally before submitting a pull request. It shortens the debugging cycle by taking some load off GitHub Actions, which graciously provides free build services for open-source projects like this one. If you use `make`, call: ```bash make build/ ``` 3. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. 4. Watch for GitHub to report a build success or failure for your PR on GitHub. 5. Discuss changes with the maintainers and address any build issues. ================================================ FILE: docs/contributing/issues.md ================================================ # Project Issues We appreciate you taking the time to report an issue you encountered while using the Jupyter Docker Stacks. Please review the following guidelines when reporting your problem. - Please use GitHub's "Report a Vulnerability" button under Security > Advisories on the appropriate repo, e.g. [report here for Jupyter Docker Stacks](https://github.com/jupyter/docker-stacks/security/advisories). You may also send an email to , but the GitHub reporting system is preferred. - If you think your problem is unique to the Jupyter Docker Stacks images, please search the [jupyter/docker-stacks issue tracker](https://github.com/jupyter/docker-stacks/issues?q=is%3Aissue%20) to see if someone else has already reported the same problem. If not, please open a [GitHub bug report issue](https://github.com/jupyter/docker-stacks/issues/new?template=bug_report.yml) and provide all the information requested in the issue template. Additionally, check the [Troubleshooting Common Problems](../using/troubleshooting.md) page in the documentation before submitting an issue. - If the issue you're seeing is with one of the open-source libraries included in the Docker images and is reproducible outside the images, please file a bug with the appropriate open-source project. - If you have a general question about how to use the Jupyter Docker Stacks in your environment, in conjunction with other tools, customizations, and so on, please post your question on the [Jupyter Discourse site](https://discourse.jupyter.org). ================================================ FILE: docs/contributing/lint.md ================================================ # Lint To enforce some rules, **linters** are used in this project. Linters can be run either during the **development phase** (by the developer) or the **integration phase** (by GitHub Actions). To integrate and enforce this process in the project lifecycle, we are using **git hooks** through [pre-commit](https://pre-commit.com/). ## Using pre-commit hooks ### Pre-commit hook installation _pre-commit_ is a Python package that needs to be installed. To achieve this, use the generic task to install all Python development dependencies. ```sh # Install all development dependencies for the project pip install --upgrade -r requirements-dev.txt # It can also be installed directly pip install pre-commit ``` Then the git hooks scripts configured for the project in `.pre-commit-config.yaml` need to be installed in the local git repository. ```sh pre-commit install ``` ### Run Now, _pre-commit_ (and so configured hooks) will run automatically on `git commit` on each changed file. However, you can also run it against all files manually. ```{note} Hadolint pre-commit uses Docker to run, so `docker` should be running while executing this command. ``` ```sh pre-commit run --all-files --hook-stage manual ``` ```{note} We're running `pre-commit` with `--hook-stage manual`, because `pre-commit` is run on modified files only, which doesn't work well with `mypy --follow-imports error`. More information can be found in [`.pre-commit-config.yaml` file](https://github.com/jupyter/docker-stacks/blob/main/.pre-commit-config.yaml) ``` ## Image Lint To comply with [Docker best practices](https://docs.docker.com/build/building/best-practices/), we are using the [Hadolint](https://github.com/hadolint/hadolint) tool to analyze each `Dockerfile`. ### Ignoring Rules Sometimes it is necessary to ignore [some rules](https://github.com/hadolint/hadolint#rules). The following rules are ignored by default for all images in the `.hadolint.yaml` file. - [`DL3006`][dl3006]: We use a specific policy to manage image tags. - The `docker-stacks-foundation` `FROM` clause is fixed but based on an argument (`ARG`). - Building downstream images from (`FROM`) the latest is done on purpose. - [`DL3008`][dl3008]: System packages are always updated (`apt-get`) to the latest version. - [`DL3013`][dl3013]: We always install the latest packages using `pip` The preferred way to ignore other rules is to flag them in the `Dockerfile`. You can use a special comment directly above the Dockerfile instruction you want to make an exception for. Ignore rule comments look like `# hadolint ignore=DL3001,SC1081`. For example: ```dockerfile FROM ubuntu # hadolint ignore=DL3003,SC1035 RUN cd /tmp && echo "hello!" ``` [dl3006]: https://github.com/hadolint/hadolint/wiki/DL3006 [dl3008]: https://github.com/hadolint/hadolint/wiki/DL3008 [dl3013]: https://github.com/hadolint/hadolint/wiki/DL3013 ================================================ FILE: docs/contributing/packages.md ================================================ # Package Updates Generally, we do not pin package versions in our `Dockerfile`s. Dependency resolution is a difficult thing to do. All this means that packages might have old versions. Images are rebuilt weekly, so usually, packages receive updates quite frequently. ```{note} We pin major.minor version of Python, so this will stay the same even after invoking the `mamba update` command. ``` ## Outdated packages To help identify packages that can be updated, you can use the following helper tool. It will list all the outdated packages installed in the `Dockerfile`. Dependencies are filtered to display only the requested packages. ```bash make check-outdated/base-notebook # INFO test_outdated:test_outdated.py:80 3/8 (38%) packages could be updated # INFO test_outdated:test_outdated.py:82 # Package Current Newest # ---------- --------- -------- # conda 4.7.12 4.8.2 # jupyterlab 1.2.5 2.0.0 # python 3.7.4 3.8.2 ``` ================================================ FILE: docs/contributing/recipes.md ================================================ # New Recipes We welcome contributions of [recipes](../using/recipes.md), which are short examples of using, configuring, or extending the Docker Stacks for inclusion in the documentation site. Follow the process below to add a new recipe: 1. Open the `docs/using/recipes.md` source file. 2. Add a second-level Markdown heading naming your recipe at the bottom of the file (e.g., `## Slideshows with JupyterLab and RISE`) 3. Write the body of your recipe under the heading, including whatever command line, links, etc. you need. 4. If you have a Dockerfile, please put it in a `recipe_code` subdirectory. This file will be built automatically by [contributed-recipes workflow](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/contributed-recipes.yml). 5. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. Maintainers will respond and work with you to address any formatting or content issues. ================================================ FILE: docs/contributing/stacks.md ================================================ # Community Stacks We love to see the community create and share new Jupyter Docker images. We've put together a [cookiecutter project](https://github.com/jupyter/cookiecutter-docker-stacks) and the documentation below to help you get started defining, building, and sharing your Jupyter environments in Docker. Following these steps will: 1. Set up a project on GitHub containing a Dockerfile based on any image we provide. 2. Configure GitHub Actions to build and test your image when users submit pull requests to your repository. 3. Configure Quay.io to host your images for others to use. 4. Update the [list of community stacks](../using/selecting.md#community-stacks) in this documentation to include your image. This approach mirrors how we build and share the core stack images. Feel free to follow it or pave your path using alternative services and build tools. ## Creating a Project First, install [cookiecutter](https://github.com/cookiecutter/cookiecutter) using _pip_ or _mamba_: ```bash pip install cookiecutter # or mamba install cookiecutter ``` Run the cookiecutter command pointing to the [jupyter/cookiecutter-docker-stacks](https://github.com/jupyter/cookiecutter-docker-stacks) project on GitHub. ```bash cookiecutter https://github.com/jupyter/cookiecutter-docker-stacks.git ``` Enter a name for your new stack image. This will serve as both the git repository name and the part of the Docker image name after the slash. ```text stack_name [my-jupyter-stack]: ``` Enter the user or organization name under which this stack will reside on Docker Hub. You must have access to manage this Docker Hub organization to push images here. ```text stack_org [my-project]: ``` Select an image from the `jupyter/docker-stacks` project that will serve as the base for your new image. ```text stack_base_image [quay.io/jupyter/base-notebook]: ``` Enter a longer description of the stack for your README. ```text stack_description [my-jupyter-stack is a community-maintained Jupyter Docker Stack image]: ``` Create a GitHub repository to store your project. Initialize your project as a Git repository and push it to GitHub. ```bash cd git init git add . git commit -m 'Seed repo' git remote add origin git push -u origin main ``` ## Exploring GitHub Actions 1. By default, the newly `.github/workflows/docker.yaml` will trigger the CI pipeline whenever you push to your `main` branch and when any Pull Requests are made to your repository. For more details on this configuration, visit the [GitHub Actions documentation on triggers](https://docs.github.com/en/actions/reference/workflows-and-actions/events-that-trigger-workflows). 2. Go to your repository and click on the **Actions** tab. From there, you can click on the workflows on the left-hand side of the screen. ![GitHub page for jupyter/docker-stacks with the Actions tab active and a rectangle around the "Build Docker Images" workflow in the UI](../_static/contributing/stacks/github-actions-tab.png) ```{note} The first run is expected to fail because we haven't yet added Docker credentials to push the image ``` 3. In the next screen, you will see information about the workflow run and duration. If you click the button with the workflow name again, you will see the logs for the workflow steps. ![GitHub Actions page showing the "Build Docker Images" workflow](../_static/contributing/stacks/github-actions-workflow.png) ## Configuring Docker Hub ```{note} Jupyter Docker Stacks are hosted on Quay.io, but in this example, we show you how to host your image on Docker Hub. ``` Now, configure Docker Hub to build your stack image and push it to the Docker Hub repository whenever you merge a GitHub pull request to the main branch of your project. 1. Visit [https://hub.docker.com/](https://hub.docker.com/) and log in. 2. Create a new repository - make sure to use the correct namespace (account or organization). Enter the name of the image matching the one you entered when prompted with `stack_name` by the cookiecutter. ![Docker Hub - 'Create repository' page with the name field set to "My specialized jupyter stack"](../_static/contributing/stacks/docker-repo-name.png) 3. Enter a description for your image. 4. Click on your avatar in the top-right corner and select Account Settings. ![The Docker Hub page zoomed into the user's settings and accounts menu](../_static/contributing/stacks/docker-user-dropdown.png) 5. Click on **Security** and then click on the **New Access Token** button. ![Docker Hub - Account page with the "Security" tab active and a rectangle highlighting the "New Access Token" button in the UI](../_static/contributing/stacks/docker-org-security.png) 6. Enter a meaningful name for your token and click on **Generate** ![Docker Hub - New Access Token page with the name field set to "test-stack token"](../_static/contributing/stacks/docker-org-create-token.png) 7. Copy the personal access token displayed on the next screen. ```{note} **You will not be able to see it again after you close the pop-up window**. ``` 8. Head back to your GitHub repository and click on the **Settings tab**. 9. Click on the **Secrets and variables->Actions** section and then on the **New repository secret** button in the top right corner. ![GitHub page with the "Setting" tab active and a rectangle highlighting the "New repository secret" button in the UI](../_static/contributing/stacks/github-create-secrets.png) 10. Create a **DOCKERHUB_TOKEN** secret and paste the Personal Access Token from Docker Hub in the **value** field. ![GitHub - Actions/New secret page with the Name field set to "DOCKERHUB_TOKEN"](../_static/contributing/stacks/github-secret-token.png) 11. Now you're ready to go and you can restart a failed workflow. ## Defining Your Image Make edits to the Dockerfile in your project to add third-party libraries and configure Jupyter applications. Refer to the Dockerfiles for the core stacks (e.g., [jupyter/datascience-notebook](https://github.com/jupyter/docker-stacks/blob/main/images/datascience-notebook/Dockerfile)) to get a feel for what's possible and the best practices. [Submit pull requests](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) to your project repository on GitHub. Ensure your image builds correctly on GitHub Actions before merging to the main branch. After merging to the main branch, your image will be built and pushed to the Docker Hub automatically. ## Sharing Your Image Finally, if you'd like to add a link to your project to this documentation site, please do the following: 1. Fork the [jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) GitHub repository. 2. Open the `docs/using/selecting.md` source file and locate the **Community Stacks** section in your fork. 3. Add a table entry with a link to your project, a binder link, and a short description of what your Docker image contains. 4. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. Maintainers will respond and work with you to address any formatting or content issues. ================================================ FILE: docs/contributing/tests.md ================================================ # Image Tests We greatly appreciate Pull Requests that extend the automated tests that vet the basic functionality of the Docker images. ## How the Tests Work A [GitHub Actions workflow](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/docker-build-test-upload.yml) runs tests against pull requests submitted to the `jupyter/docker-stacks` repository. We use the `pytest` module to run tests on the image. `conftest.py` and `pytest.ini` in the `tests` folder define the environment in which tests are run. [Read `pytest` documentation](https://docs.pytest.org/en/latest/contents.html). The actual image-specific test files are located in folders like `tests/by_image//` (e.g., `tests/by_image/docker-stacks-foundation/`, etc.). ```{note} If your test is located in `tests/by_image//`, it will be run against the `jupyter/` image and against all the [images inherited from this image](../using/selecting.md#image-relationships). ``` Many tests make use of global [pytest fixtures](https://docs.pytest.org/en/latest/reference/fixtures.html) defined in the [conftest.py](https://github.com/jupyter/docker-stacks/blob/main/tests/conftest.py) file. ## Unit tests You can add a unit test if you want to run a Python script in one of our images. You should create a `tests/by_image//units/` directory, if it doesn't already exist, and put your file there. Files in this folder will be executed in the container when tests are run. You can see an [TensorFlow package example here](https://github.com/jupyter/docker-stacks/blob/HEAD/tests/by_image/tensorflow-notebook/units/unit_tensorflow.py). ## Contributing New Tests Please follow the process below to add new tests: 1. Add your test code to one of the modules in the `tests/by_image//` directory or create a new module. 2. Build one or more images you intend to test and run the tests locally. If you use `make`, call: ```bash make build/ make test/ ``` 3. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. 4. Watch for GitHub to report a build success or failure for your PR on GitHub. 5. Discuss changes with the maintainers and address any issues running the tests on GitHub. ================================================ FILE: docs/index.rst ================================================ .. include:: ../README.md :parser: myst_parser.sphinx_ Table of Contents ----------------- .. toctree:: :maxdepth: 2 :caption: User Guide using/selecting using/running using/common using/specifics using/recipes using/custom-images using/troubleshooting using/faq using/changelog .. toctree:: :maxdepth: 2 :caption: Contributor Guide contributing/issues contributing/features contributing/tests contributing/lint contributing/recipes contributing/stacks contributing/packages .. toctree:: :maxdepth: 2 :caption: Maintainer Guide maintaining/new-images-and-packages-policy maintaining/tagging maintaining/tasks .. toctree:: :maxdepth: 2 :caption: Getting Help Issue Tracker on GitHub Jupyter Discourse Forum Jupyter Website ================================================ FILE: docs/maintaining/new-images-and-packages-policy.md ================================================ # New images / packages policy There are many things we consider while adding new images and packages. Here is a non-exhaustive list of things we do care about: 1. **Software health**, details, and maintenance status - reasonable versioning is adopted, and the version is considered to be stable - has been around for several years - the package maintains documentation - a changelog is actively maintained - a release procedure with helpful automation is established - multiple people are involved in the maintenance of the project - provides a `conda-forge` package besides a `pypi` package, where both are kept up to date - supports both `x86_64` and `aarch64` architectures 2. **Installation consequences** - GitHub Actions build time - Image sizes - All requirements should be installed as well 3. Jupyter Docker Stacks _**image fit**_ - new package or stack is changing (or inherits from) the most suitable stack 4. **Software impact** for users of docker-stacks images - How this image can help existing users, or maybe reduce the need to build new images 5. Why it shouldn't just be a documented **recipe** 6. Impact on **security** - Does the package open additional ports, or add new web endpoints, that could be exploited? With all this in mind, we have a voting group, that consists of [@mathbunnyru](https://github.com/mathbunnyru), [@consideRatio](https://github.com/consideRatio), [@yuvipanda](https://github.com/yuvipanda), and [@manics](https://github.com/manics). This voting group is responsible for accepting or declining new packages and stacks. The change is accepted, if there are **at least 2 positive votes**. ================================================ FILE: docs/maintaining/tagging.md ================================================ # Tags and manifests The main purpose of the source code in [the `tagging` folder](https://github.com/jupyter/docker-stacks/tree/main/tagging) is to properly write tags file, build history line and manifest for a single-platform image, apply these tags, and then merge single-platform images into one multi-arch image. ## What is a tag and a manifest A tag is a label attached to a Docker image identifying specific attributes or versions. For example, an image `jupyter/base-notebook` with Python 3.10.5 will have a full image name `quay.io/jupyter/base-notebook:python-3.10.5`. These tags are pushed to our [Quay.io registry](https://quay.io/organization/jupyter). A manifest is a description of important image attributes written in Markdown format. For example, we dump all `conda` packages with their versions into the manifest. ## Main principles - All images are organized in a hierarchical tree. More info on [image relationships](../using/selecting.md#image-relationships). - `TaggerInterface` and `ManifestInterface` are interfaces for functions to generate tags and manifest pieces by running commands in Docker containers. - Tags and manifests are reevaluated for each image in the hierarchy since values may change between parent and child images. - To tag an image and create its manifest and build history line, run `make hook/` (e.g., `make hook/base-notebook`). ## Utils ### DockerRunner `DockerRunner` is a helper class to easily run a docker container and execute commands inside this container: ```{literalinclude} tagging_examples/docker_runner.py :language: py :lines: 3- ``` ### GitHelper `GitHelper` methods are run in the current `git` repo and give the information about the last commit hash and commit message: ```{literalinclude} tagging_examples/git_helper.py :language: py :lines: 3- ``` The prefix of commit hash (namely, 12 letters) is used as an image tag to make it easy to inherit from a fixed version of a docker image. ## Taggers and Manifests ### Tagger `Tagger` is a function that runs commands inside a docker container to calculate a tag for an image. All the taggers follow `TaggerInterface`: ```{literalinclude} ../../tagging/taggers/tagger_interface.py :language: py :start-at: TaggerInterface ``` So, the `tagger(container)` gets a docker container as an input and returns a tag. For example: ```{literalinclude} ../../tagging/taggers/sha.py :language: py :start-at: def ``` - `taggers/` subdirectory contains all taggers. - `apps/write_tags_file.py`, `apps/apply_tags.py`, and `apps/merge_tags.py` are Python executables used to write tags for an image, apply tags from a file, and create multi-arch images. ### Manifest All manifest functions except `build_info_manifest` follow `ManifestInterface` and `manifest(container)` method returns a piece of the manifest. ```{literalinclude} ../../tagging/manifests/manifest_interface.py :language: py :start-at: ManifestInterface ``` For example: ```{literalinclude} ../../tagging/manifests/apt_packages.py :language: py :start-at: def ``` where: - `quoted_output(container, cmd)` simply runs the command inside a container using `DockerRunner.exec_cmd` and wraps it to triple quotes to create a valid markdown piece. It also adds the command which was run to the markdown piece. - `manifests/` subdirectory contains all the manifests. - `apps/write_manifest.py` is a Python executable to create the build manifest and history line for an image. ## Images Hierarchy All images' dependencies on each other and what taggers and manifests are applicable to them are defined in `hierarchy/images_hierarchy.py`. `hierarchy/get_taggers.py` and `hierarchy/get_manifests.py` define functions to get the taggers and manifests for a specific image. ================================================ FILE: docs/maintaining/tagging_examples/docker_runner.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tagging.utils.docker_runner import DockerRunner with DockerRunner("ubuntu") as container: DockerRunner.exec_cmd(container, cmd="env") ================================================ FILE: docs/maintaining/tagging_examples/git_helper.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tagging.utils.git_helper import GitHelper print("Git hash:", GitHelper.commit_hash()) print("Git message:", GitHelper.commit_message()) ================================================ FILE: docs/maintaining/tasks.md ================================================ # Maintainer Playbook ## Merging Pull Requests To build new images and publish them to the Registry, do the following: 1. Make sure GitHub Actions status checks pass for the PR. 2. Merge the PR. 3. Monitor the merge commit GitHub Actions status. ```{note} GitHub Actions are pretty reliable, so please investigate if some error occurs. Building Docker images in PRs is the same as building them in the default branch. The only difference is that single-platform images are pushed to Registry and then tags are merged for `x86_64` and `aarch64`. ``` 4. Avoid merging another PR to the main branch until all pending builds in the main branch are complete. This way, you will know which commit might have broken the build and also have the correct tags for moving tags (like the `Python` version). ## Updating Python version When a new `Python` version is released, we wait for: - all the dependencies to be available (as wheels or in `conda-forge`). - the first `Python` patch release for this version. This allows us to avoid many bugs, which can happen in a major release. ## Updating the Ubuntu Base Image `jupyter/docker-stacks-foundation` is based on the LTS Ubuntu docker image. We wait for the first point release of the new LTS Ubuntu before updating the version. Other images are directly or indirectly inherited from `jupyter/docker-stacks-foundation`. We rebuild our images automatically each week, which means they frequently receive updates. When there's a security fix in the Ubuntu base image, it's a good idea to manually trigger the rebuild of images [from the GitHub Actions workflow UI](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml). Pushing the `Run Workflow` button will trigger this process. ## Adding a New Core Image to the Registry ```{note} In general, we do not add new core images and ask contributors to either create a [recipe](../using/recipes.md) or [community stack](../contributing/stacks.md). We have a [policy](./new-images-and-packages-policy.md), which we consider when adding new images or new packages to existing images. ``` [Take a look at an example](https://github.com/jupyter/docker-stacks/pull/1936/files) of adding a new image. When there's a new stack definition, check before merging the PR: 1. PR includes an update to the stack overview diagram [in the documentation](../using/selecting.md#image-relationships). The image links to the [blockdiag source](http://interactive.blockdiag.com/) used to create it. 2. PR updates the [Makefile](https://github.com/jupyter/docker-stacks/blob/main/Makefile). 3. Necessary Tagger(s)/Manifest(s) are added for the new image in the [tagging](https://github.com/jupyter/docker-stacks/tree/main/tagging) folder. 4. A new repository is created in the `jupyter` organization in the Registry, and it's named after the stack folder in the git repo. 5. Robot `Write` permission is added in the `Repository Settings`. ## Adding a New Registry Owner Account 1. Visit 2. Add the maintainer's username. ## Restarting a failed build If an automated build in GitHub Actions fails, you can restart the failed jobs on GitHub. You can also download the artifacts and investigate them for any issues. ================================================ FILE: docs/requirements.txt ================================================ # ReadTheDocs environment contains old package versions preinstalled # So, to ensure we have modern packages, we pin minimum versions of the packages we need docutils>=0.17.1 myst-parser>=0.18.0 sphinx>=4.5.0 sphinx-book-theme>=1.0.0 sphinx-copybutton>=0.5.0 sphinx-last-updated-by-git>=0.3.4 ================================================ FILE: docs/using/changelog.md ================================================ ```{include} ../../CHANGELOG.md ``` ================================================ FILE: docs/using/common.md ================================================ # Common Features Except for `jupyter/docker-stacks-foundation`, a container launched from any Jupyter Docker Stacks image runs a Jupyter Server with the JupyterLab frontend. The container does so by executing a `start-notebook.py` script. This script configures the internal container environment and then runs `jupyter lab`, passing any command-line arguments received. This page describes the options supported by the startup script and how to bypass it to run alternative commands. ## Jupyter Server Options You can pass [Jupyter Server options](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html) to the `start-notebook.py` script when launching the container. 1. For example, to secure the Jupyter Server with a [custom password](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html#preparing-a-hashed-password) hashed using `jupyter_server.auth.passwd()` instead of the default token, you can run the following (this hash was generated for the `my-password` password): ```bash docker run -it --rm -p 8888:8888 quay.io/jupyter/base-notebook \ start-notebook.py --PasswordIdentityProvider.hashed_password='argon2:$argon2id$v=19$m=10240,t=10,p=8$JdAN3fe9J45NvK/EPuGCvA$O/tbxglbwRpOFuBNTYrymAEH6370Q2z+eS1eF4GM6Do' ``` 2. To set the [base URL](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html#running-the-notebook-with-a-customized-url-prefix) of the Jupyter Server, you can run the following: ```bash docker run -it --rm -p 8888:8888 quay.io/jupyter/base-notebook \ start-notebook.py --ServerApp.base_url=/customized/url/prefix/ ``` ## Docker Options You may instruct the `start-notebook.py` script to customize the container environment before launching the Server. You do so by passing arguments to the `docker run` command. ### User-related configurations - `-e NB_USER=` - The desired username and associated home folder. The default value is `jovyan`. Setting `NB_USER` redefines the `jovyan` default user and ensures that the desired user has the correct file permissions for the new home directory created at `/home/`. For this option to take effect, you **must** run the container with `--user root`, set the working directory `-w "/home/"` and set the environment variable `-e CHOWN_HOME=yes`. _Example usage:_ ```bash docker run -it --rm \ -p 8888:8888 \ --user root \ -e NB_USER="my-username" \ -e CHOWN_HOME=yes \ -w "/home/my-username" \ quay.io/jupyter/base-notebook ``` ```{note} If you set `NB_USER` to `root`, the `root` home dir will be set to `/home/root`. See discussion [here](https://github.com/jupyter/docker-stacks/issues/2042). ``` - `-e NB_UID=` - Instructs the startup script to switch the numeric user ID of `${NB_USER}` to the given value. The default value is `1000`. This feature is useful when mounting host volumes with specific owner permissions. You **must** run the container with `--user root` for this option to take effect. (The startup script will `su ${NB_USER}` after adjusting the user ID.) Instead, you might consider using the modern Docker-native options [`--user`](https://docs.docker.com/engine/containers/run/#user) and [`--group-add`](https://docs.docker.com/engine/containers/run/#additional-groups) - see the last bullet in this section for more details. See bullet points regarding `--user` and `--group-add`. - `-e NB_GID=` - Instructs the startup script to change the primary group of `${NB_USER}` to `${NB_GID}` (the new group is added with a name of `${NB_GROUP}` if it is defined. Otherwise, the group is named `${NB_USER}`). This feature is useful when mounting host volumes with specific group permissions. You **must** run the container with `--user root` for this option to take effect. (The startup script will `su ${NB_USER}` after adjusting the group ID.) Instead, you might consider using modern Docker options `--user` and `--group-add`. See bullet points regarding `--user` and `--group-add`. The user is added to the supplemental group `users` (gid 100) to grant write access to the home directory and `/opt/conda`. If you override the user/group logic, ensure the user stays in the group `users` if you want them to be able to modify files in the image. - `-e NB_GROUP=` - The name used for `${NB_GID}`, which defaults to `${NB_USER}`. This group name is only used if `${NB_GID}` is specified and completely optional: there is only a cosmetic effect. - `--user 5000 --group-add users` - Launches the container with a specific user ID and adds that user to the `users` group so that it can modify files in the default home directory and `/opt/conda`. You can use these arguments as alternatives to setting `${NB_UID}` and `${NB_GID}`. ## Permission-specific configurations - `-e NB_UMASK=` - Configures Jupyter to use a different `umask` value from default, i.e. `022`. For example, if setting `umask` to `002`, new files will be readable and writable by group members instead of the owner only. [Check this Wikipedia article](https://en.wikipedia.org/wiki/Umask) for an in-depth description of `umask` and suitable values for multiple needs. While the default `umask` value should be sufficient for most use cases, you can set the `NB_UMASK` value to fit your requirements. ```{note} When `NB_UMASK` is set, it only applies to the Jupyter process itself - you cannot use it to set a `umask` for additional files created during `run-hooks.sh`. For example, via `pip` or `conda`. If you need to set a `umask` for these, you **must** set the `umask` value for each command. ``` - `-e CHOWN_HOME=yes` - Instructs the startup script to change the `${NB_USER}` home directory owner and group to the current value of `${NB_UID}` and `${NB_GID}`. This change will take effect even if the user home directory is mounted from the host using `-v` as described below. The change is **not** applied recursively by default. You can modify the `chown` behavior by setting `CHOWN_HOME_OPTS` (e.g., `-e CHOWN_HOME_OPTS='-R'`). - `-e CHOWN_EXTRA=","` - Instructs the startup script to change the owner and group of each comma-separated container directory to the current value of `${NB_UID}` and `${NB_GID}`. The change is **not** applied recursively by default. You can modify the `chown` behavior by setting `CHOWN_EXTRA_OPTS` (e.g., `-e CHOWN_EXTRA_OPTS='-R'`). - `-e GRANT_SUDO=yes` - Instructs the startup script to grant the `NB_USER` user passwordless `sudo` capability. You do **not** need this option to allow the user to `conda` or `pip` install additional packages. This option is helpful for cases when you wish to give `${NB_USER}` the ability to install OS packages with `apt` or modify other root-owned files in the container. You **must** run the container with `--user root` for this option to take effect. (The `start-notebook.py` script will `su ${NB_USER}` after adding `${NB_USER}` to sudoers.) **You should only enable `sudo` if you trust the user or if the container runs on an isolated host.** ### Additional runtime configurations - `-e GEN_CERT=yes` - Instructs the startup script to generate a self-signed SSL certificate. Configures Jupyter Server to use it to accept encrypted HTTPS connections. - `-e DOCKER_STACKS_JUPYTER_CMD=` - Instructs the startup script to run `jupyter ${DOCKER_STACKS_JUPYTER_CMD}` instead of the default `jupyter lab` command. See [Switching back to the classic notebook or using a different startup command][switch_back] for available options. This setting is helpful in container orchestration environments where setting environment variables is more straightforward than changing command line parameters. - `-e RESTARTABLE=yes` - Runs Jupyter in a loop so that quitting Jupyter does not cause the container to exit. This may be useful when installing extensions that require restarting Jupyter. - `-v /some/host/folder/for/work:/home/jovyan/work` - Mounts a host machine directory as a folder in the container. This configuration is useful for preserving notebooks and other work even after the container has been destroyed. **You must grant the within-container notebook user or group (`NB_UID` or `NB_GID`) write access to the host directory (e.g., `sudo chown 1000 /some/host/folder/for/work`).** - `-e JUPYTER_ENV_VARS_TO_UNSET=ADMIN_SECRET_1,ADMIN_SECRET_2` - Unsets specified environment variables in the default startup script. The variables are unset after the hooks have been executed but before the command provided to the startup script runs. - `-e NOTEBOOK_ARGS="--log-level='DEBUG' --dev-mode"` - Adds custom options to add to `jupyter` commands. This way, the user could use any option supported by the `jupyter` subcommand. - `-e JUPYTER_PORT=8117` - Changes the port in the container that Jupyter is using to the value of the `${JUPYTER_PORT}` environment variable. This may be useful if you run multiple instances of Jupyter in swarm mode and want to use a different port for each instance. ## Startup Hooks You can further customize the container environment by adding shell scripts (`*.sh`) to be sourced or executables (`chmod +x`) to be run to the paths below: - `/usr/local/bin/start-notebook.d/` - handled **before** any of the standard options noted above is applied - `/usr/local/bin/before-notebook.d/` - handled **after** all the standard options noted above are applied and ran right before the Server launches [Open the `run-hooks.sh` script](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/run-hooks.sh) and how it's used in the [`start.sh`](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/start.sh) script for execution details. ## SSL Certificates You may mount an SSL key and certificate file into a container and configure the Jupyter Server to use them to accept HTTPS connections. For example, to mount a host folder containing a `notebook.key` and `notebook.crt` and use them, you might run the following: ```bash docker run -it --rm -p 8888:8888 \ -v /some/host/folder:/etc/ssl/notebook \ quay.io/jupyter/base-notebook \ start-notebook.py \ --ServerApp.keyfile=/etc/ssl/notebook/notebook.key \ --ServerApp.certfile=/etc/ssl/notebook/notebook.crt ``` Alternatively, you may mount a single PEM file containing both the key and certificate. For example: ```bash docker run -it --rm -p 8888:8888 \ -v /some/host/folder/notebook.pem:/etc/ssl/notebook.pem \ quay.io/jupyter/base-notebook \ start-notebook.py \ --ServerApp.certfile=/etc/ssl/notebook.pem ``` In either case, Jupyter Server expects the key and certificate to be a **base64 encoded text file**. The certificate file or PEM may contain one or more certificates (e.g., server, intermediate, and root). For additional information about using SSL, see the following: - The [docker-stacks/examples](https://github.com/jupyter/docker-stacks/tree/main/examples) for information about how to use [Let's Encrypt](https://letsencrypt.org/) certificates when you run these stacks on a publicly visible domain. - The [`jupyter_server_config.py`](https://github.com/jupyter/docker-stacks/blob/main/images/base-notebook/jupyter_server_config.py) file for how this Docker image generates a self-signed certificate. - The [Jupyter Server documentation](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html#securing-a-jupyter-server) for best practices about securing a public Server in general. ## Alternative Commands ### Switching back to the classic notebook or using a different startup command JupyterLab, built on top of Jupyter Server, is now the default for all the images of the stack. However, switching back to the classic notebook or using a different startup command is still possible. You can achieve this by setting the environment variable `DOCKER_STACKS_JUPYTER_CMD` at container startup. The table below shows some options. Since `Jupyter Notebook v7` `jupyter-server` is used as a backend. | `DOCKER_STACKS_JUPYTER_CMD` | Frontend | | --------------------------- | ---------------- | | `lab` (default) | JupyterLab | | `notebook` | Jupyter Notebook | | `nbclassic` | NbClassic | | `server` | None | | `retro`\* | RetroLab | ```{note} - Changing frontend for **JupyterHub singleuser image** is described in [JupyterHub docs](https://jupyterhub.readthedocs.io/en/latest/howto/configuration/config-user-env.html#switching-back-to-the-classic-notebook). - \* `retro` is not installed at this time, but it could be the case in the future or in a community stack. - Any other valid `jupyter` subcommand that starts the Jupyter Application can be used. ``` Example: ```bash # Run Jupyter Server with the Jupyter Notebook frontend docker run -it --rm \ -p 8888:8888 \ -e DOCKER_STACKS_JUPYTER_CMD=notebook \ quay.io/jupyter/base-notebook # Executing the command: start-notebook.py # Executing: jupyter notebook # ... # Use Jupyter NBClassic frontend docker run -it --rm \ -p 8888:8888 \ -e DOCKER_STACKS_JUPYTER_CMD=nbclassic \ quay.io/jupyter/base-notebook # Executing the command: start-notebook.py # Executing: jupyter nbclassic # ... ``` ### `start.sh` Most of the configuration options in the `start-notebook.py` script are handled by an internal `start.sh` script that automatically runs before the command provided to the container (it's set as the container entrypoint). This allows you to specify an arbitrary command that takes advantage of all these features. For example, to run the text-based `ipython` console in a container, do the following: ```bash docker run -it --rm quay.io/jupyter/base-notebook ipython ``` This script is handy when you derive a new Dockerfile from this image and install additional Jupyter applications with subcommands like `jupyter console`, `jupyter kernelgateway`, etc. ## Conda Environments The default Python 3.x [Conda environment](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) resides in `/opt/conda`. The `/opt/conda/bin` directory is part of the default `jovyan` user's `${PATH}`. That directory is also searched for binaries when run using `sudo` (`sudo my_binary` will search for `my_binary` in `/opt/conda/bin/`). The `jovyan` user has full read/write access to the `/opt/conda` directory. You can use either `mamba`, `pip`, or `conda` (`mamba` is recommended) to install new packages without any additional permissions. ```bash # install a package into the default (python 3.x) environment and cleanup it after # the installation mamba install --yes some-package && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" pip install --no-cache-dir some-package && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" conda install --yes some-package && \ conda clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ``` ### Using Alternative Channels Conda is configured by default to use only the [`conda-forge`](https://anaconda.org/conda-forge) channel. However, you can use alternative channels, either one-shot by overwriting the default channel in the installation command or by configuring `mamba` to use different channels. The examples below show how to use the [anaconda default channels](https://repo.anaconda.com/pkgs/main) instead of `conda-forge` to install packages. ```bash # using defaults channels to install a package mamba install --channel defaults humanize # configure conda to add default channels at the top of the list conda config --system --prepend channels defaults # install a package mamba install --yes humanize && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ``` [switch_back]: #switching-back-to-the-classic-notebook-or-using-a-different-startup-command ================================================ FILE: docs/using/custom-images.md ================================================ # Building a custom set of images This section describes how to build a custom set of images. It may be helpful if you need to change the Ubuntu or Python version, or to make a significant change to the build process itself. This project only builds one set of images at a time. If you want to use older images, [take a look here](../index.rst/#using-old-images). ## Automating your build using template cookiecutter project If you wish to build your own image on top of one of our images and automate your build process, please, [take a look at cookiecutter template](../contributing/stacks.md). ## Custom arguments Our repository provides several customization points: - `ROOT_IMAGE` (docker argument) - the parent image for `docker-stacks-foundation` image - `PYTHON_VERSION` (docker argument) - the Python version to install in `docker-stacks-foundation` image - `REGISTRY`, `OWNER`, `BASE_IMAGE` (docker arguments) - they allow to specify parent image for all the other images - `REGISTRY`, `OWNER` (part of `env` in some GitHub workflows) - these allow to properly tag and refer to images during following steps: - [`build-test-upload`](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/docker-build-test-upload.yml) - [`contributed-recipes`](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/contributed-recipes.yml) - [`tag-push-merge`](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/docker-tag-push-merge.yml) These customization points can't be changed during runtime. Read more about [Docker build arguments](https://docs.docker.com/build/building/variables/#arg-usage-example) and [GitHub environment variables for a single workflow](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#defining-environment-variables-for-a-single-workflow). ## Building stack images with custom arguments A selection of prebuilt images are available from [Quay.io](https://quay.io/organization/jupyter), however, it's impossible to cater to everybody's needs. For extensive customization with an automated build pipeline, you may wish to create a [community-maintained stack](../contributing/stacks), however, for minor customizations, this may be overkill. For example, you may wish to use the same Jupyter stacks but built on a different base image, or built with a different Python version. To achieve this you can use [Docker Bake](https://docs.docker.com/build/bake/) to build the stacks locally with custom arguments. ```{note} Custom arguments may result in build errors due to incompatibility. If so your use-case may require a fully customized stack. ``` As a basic example, if you want to build a custom image based on the `minimal-notebook` image using `Python 3.12`, then with a Dockerfile like: ```{code-block} Dockerfile :caption: Dockerfile ARG BASE_IMAGE=minimal-notebook FROM $BASE_IMAGE ... ``` Include the file below in your project: ```{literalinclude} recipe_code/docker-bake.custom-python.hcl :force: :language: hcl :caption: docker-bake.hcl ``` To build this stack, in the same directory run: ```bash docker buildx bake ``` Docker Bake then determines the correct build order from the `contexts` parameters and builds the stack as requested. This image can then be run the same way as any other image provided by this project, for example: ```bash docker run -it --rm -p 8888:8888 custom-jupyter ``` or referenced in a Docker Compose file. ## Forking our repository If for some reason, you need to change more things in our images, feel free to fork it and change it any way you want. If your customization is easy to backport to the main repo and might be helpful for other users, feel free to create a PR. It is almost always a great idea to keep your diff as small as possible and to merge/rebase the latest version of our repo in your project. ================================================ FILE: docs/using/faq.md ================================================ # Frequently Asked Questions (FAQ) ## How to persist user data There are two types of data you might want to persist. 1. If you want to persist your environment (i.e. packages installed by `mamba`, `conda`, `pip`, `apt-get`, and so on), then you should create an inherited image and install packages only once while building your Dockerfile. Take a look at [an example of using `mamba` and `pip`](./recipes.md#using-mamba-install-recommended-or-pip-install-in-a-child-docker-image) in a child image. ```{note} If you install a package inside a running container (for example you run `pip install ` in a terminal), it won't be preserved when you next run your image. To make it work, install this package in your inherited image and rerun the `docker build` command. ``` 2. If you want to persist user data (files created by you, like `Python` scripts, notebooks, text files, and so on), then you should use a [Docker bind mount](https://docs.docker.com/engine/storage/bind-mounts/) or [Docker Volume](https://docs.docker.com/engine/storage/volumes/). You can find [an example of using a bind mount here](./running.md#example-2). There is also [a mount troubleshooting section](./troubleshooting.md#permission-denied-when-mounting-volumes) if you experience any issues. ## Why we don't add your favorite package We have lots of users with different packages they want to use. Adding them all is impossible, so we have several images to choose from. [Choose the image](selecting.md) that is closest to your needs, and feel free to [add your package on top of our images](recipes.md#using-mamba-install-recommended-or-pip-install-in-a-child-docker-image). ## Who is `jovyan` As described [in this issue comment](https://github.com/jupyter/docker-stacks/issues/358#issuecomment-288844834): ```text Jo·vy·an /ˈjōvēən/ noun – an inhabitant of Jupyter ``` `Jovyan` is often a special term used to describe members of the Jupyter community. It is also used as the user ID in the Jupyter Docker stacks or referenced in conversations. You can find more information on [the Jupyter Community documentation](https://docs.jupyter.org/en/latest/community/content-community.html#what-is-a-jovyan). ## How to give root permissions to the user We have a [recipe for enabling root permissions](recipes.md#using-sudo-within-a-container). ================================================ FILE: docs/using/recipe_code/custom_environment.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/minimal-notebook FROM $BASE_IMAGE # Name your environment and choose the Python version ARG env_name=python313 ARG py_ver=3.13 # You can add additional libraries here RUN mamba create --yes -p "${CONDA_DIR}/envs/${env_name}" \ python=${py_ver} \ 'ipykernel' \ 'jupyterlab' && \ mamba clean --all -f -y # Alternatively, you can comment out the lines above and uncomment those below # if you'd prefer to use a YAML file present in the docker build context # COPY --chown=${NB_UID}:${NB_GID} environment.yml /tmp/ # RUN mamba env create -p "${CONDA_DIR}/envs/${env_name}" -f /tmp/environment.yml && \ # mamba clean --all -f -y # Create Python kernel and link it to jupyter RUN "${CONDA_DIR}/envs/${env_name}/bin/python" -m ipykernel install --user --name="${env_name}" && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Any additional `pip` installs can be added by using the following line # Using `mamba` is highly recommended though RUN "${CONDA_DIR}/envs/${env_name}/bin/pip" install --no-cache-dir \ 'flake8' # This changes the custom Python kernel so that the custom environment will # be activated for the respective Jupyter Notebook and Jupyter Console # hadolint ignore=DL3059 RUN /opt/setup-scripts/activate_notebook_custom_env.py "${env_name}" # Comment the line above and uncomment the section below instead to activate the custom environment by default # Note: uncommenting this section makes "${env_name}" default both for Jupyter Notebook and Terminals # More information here: https://github.com/jupyter/docker-stacks/pull/2047 # USER root # RUN \ # # This changes a startup hook, which will activate the custom environment for the process # echo conda activate "${env_name}" >> /usr/local/bin/before-notebook.d/10activate-conda-env.sh && \ # # This makes the custom environment default in Jupyter Terminals for all users which might be created later # echo conda activate "${env_name}" >> /etc/skel/.bashrc && \ # # This makes the custom environment default in Jupyter Terminals for already existing NB_USER # echo conda activate "${env_name}" >> "/home/${NB_USER}/.bashrc" USER ${NB_UID} ================================================ FILE: docs/using/recipe_code/dask_jupyterlab.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE # Install the Dask dashboard RUN mamba install --yes 'dask-labextension' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Dask Scheduler port EXPOSE 8787 ================================================ FILE: docs/using/recipe_code/docker-bake.custom-python.hcl ================================================ group "default" { targets = ["custom-notebook"] } target "foundation" { context = "https://github.com/jupyter/docker-stacks.git#main:images/docker-stacks-foundation" args = { PYTHON_VERSION = "3.13" } tags = ["docker-stacks-foundation"] } target "base-notebook" { context = "https://github.com/jupyter/docker-stacks.git#main:images/base-notebook" contexts = { docker-stacks-foundation = "target:foundation" } args = { BASE_IMAGE = "docker-stacks-foundation" } tags = ["base-notebook"] } target "minimal-notebook" { context = "https://github.com/jupyter/docker-stacks.git#main:images/minimal-notebook" contexts = { base-notebook = "target:base-notebook" } args = { BASE_IMAGE = "base-notebook" } tags = ["minimal-notebook"] } target "custom-notebook" { context = "." contexts = { minimal-notebook = "target:minimal-notebook" } args = { BASE_IMAGE = "minimal-notebook" } tags = ["custom-jupyter"] } ================================================ FILE: docs/using/recipe_code/generate_matrix.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import json from pathlib import Path THIS_DIR = Path(__file__).parent.resolve() RUNS_ON = ["ubuntu-24.04", "ubuntu-24.04-arm"] ARM_INCOMPATIBLE_IMAGES = {"oracledb.dockerfile"} BASE_IMAGE_PREFIX = "ARG BASE_IMAGE=" def extract_base_image(dockerfile: Path) -> str: """Extract base image from dockerfile""" for line in dockerfile.read_text().splitlines(): if line.startswith(BASE_IMAGE_PREFIX): full_image = line[len(BASE_IMAGE_PREFIX) :] image_name = full_image[full_image.rfind("/") + 1 :] return "" if ":" in image_name else image_name raise RuntimeError(f"Base image not found in {dockerfile}") def get_platform(runs_on: str) -> str: """Get platform architecture based on runner""" return "x86_64" if runs_on == "ubuntu-24.04" else "aarch64" def generate_matrix() -> dict[str, list[dict[str, str]]]: """Generate build matrix for GitHub Actions""" dockerfiles = sorted(THIS_DIR.glob("*.dockerfile")) configurations: list[dict[str, str]] = [] for dockerfile in dockerfiles: dockerfile_name = dockerfile.name for run in RUNS_ON: # Skip ARM builds for incompatible images if dockerfile_name in ARM_INCOMPATIBLE_IMAGES and run == "ubuntu-24.04-arm": continue configurations.append( { "dockerfile": dockerfile_name, "runs-on": run, "platform": get_platform(run), "parent-image": extract_base_image(dockerfile), } ) return {"include": configurations} if __name__ == "__main__": print(f"matrix={json.dumps(generate_matrix())}") ================================================ FILE: docs/using/recipe_code/ijavascript.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE USER root RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ make \ g++ && \ apt-get clean && rm -rf /var/lib/apt/lists/* USER ${NB_UID} # NodeJS <= 20 is required # https://github.com/n-riesco/ijavascript/issues/184 RUN mamba install --yes nodejs=20.* && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # hadolint ignore=DL3016 RUN npm install -g ijavascript # hadolint ignore=DL3059 RUN ijsinstall ================================================ FILE: docs/using/recipe_code/jupyterhub_version.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE RUN mamba install --yes 'jupyterhub-singleuser==5.2.1' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipe_code/mamba_install.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE RUN mamba install --yes 'flake8' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Install from the requirements.txt file COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/ RUN mamba install --yes --file /tmp/requirements.txt && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipe_code/manpage_install.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # `/etc/dpkg/dpkg.cfg.d/excludes` contains several `path-exclude`s, including man pages # Remove it, then install man, install docs RUN rm /etc/dpkg/dpkg.cfg.d/excludes && \ apt-get update --yes && \ dpkg -l | grep ^ii | cut -d' ' -f3 | xargs apt-get install --yes --no-install-recommends --reinstall man && \ apt-get clean && rm -rf /var/lib/apt/lists/* USER ${NB_UID} ================================================ FILE: docs/using/recipe_code/microsoft_odbc.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root ENV MSSQL_DRIVER="ODBC Driver 18 for SQL Server" ENV PATH="/opt/mssql-tools18/bin:${PATH}" RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends curl gnupg2 lsb-release && \ curl -fsSL "https://packages.microsoft.com/keys/microsoft.asc" | gpg --dearmor -o /usr/share/keyrings/microsoft-prod.gpg && \ curl "https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list" > /etc/apt/sources.list.d/mssql-release.list && \ apt-get update --yes && \ ACCEPT_EULA=Y apt-get install --yes --no-install-recommends msodbcsql18 && \ # optional: for bcp and sqlcmd ACCEPT_EULA=Y apt-get install --yes --no-install-recommends mssql-tools18 && \ # optional: for unixODBC development headers apt-get install --yes --no-install-recommends unixodbc-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* # Switch back to jovyan to avoid accidental container runs as root USER ${NB_UID} RUN mamba install --yes 'pyodbc' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipe_code/oracledb.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE USER root # Install Java & Oracle SQL Instant Client RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends software-properties-common && \ add-apt-repository universe && \ apt-get update --yes && \ apt-get install --yes --no-install-recommends alien default-jre default-jdk openjdk-11-jdk libaio1t64 && \ apt-get clean && rm -rf /var/lib/apt/lists/* # Oracle ARG INSTANTCLIENT_MAJOR_VERSION=23 ARG INSTANTCLIENT_BIN_SUFFIX=${INSTANTCLIENT_MAJOR_VERSION}.6.0.24.10-1.el9.x86_64.rpm ARG INSTANTCLIENT_URL=https://download.oracle.com/otn_software/linux/instantclient/2360000 # Then install Oracle SQL Instant client, SQL+Plus, tools, and JDBC. # Note: You may need to change the URL to a newer version. # See: https://www.oracle.com/database/technologies/instant-client/linux-x86-64-downloads.html RUN mkdir "/opt/oracle" WORKDIR "/opt/oracle" # alien doesn't work well with sqlplus, so skipping it for now RUN wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-basiclite-${INSTANTCLIENT_BIN_SUFFIX}" && \ alien --install --scripts "oracle-instantclient-basiclite-${INSTANTCLIENT_BIN_SUFFIX}" && \ wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-sqlplus-${INSTANTCLIENT_BIN_SUFFIX}" && \ # alien --install --scripts "oracle-instantclient-sqlplus-${INSTANTCLIENT_BIN_SUFFIX}" && \ wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-tools-${INSTANTCLIENT_BIN_SUFFIX}" && \ alien --install --scripts "oracle-instantclient-tools-${INSTANTCLIENT_BIN_SUFFIX}" && \ wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-jdbc-${INSTANTCLIENT_BIN_SUFFIX}" && \ alien --install --scripts "oracle-instantclient-jdbc-${INSTANTCLIENT_BIN_SUFFIX}" && \ chown -R "${NB_UID}":"${NB_GID}" "${HOME}/.rpmdb" && \ rm -f ./*.rpm # And configure variables RUN echo "ORACLE_HOME=/usr/lib/oracle/${INSTANTCLIENT_MAJOR_VERSION}/client64" >> "${HOME}/.bashrc" && \ echo "PATH=\"${ORACLE_HOME}/bin:${PATH}\"" >> "${HOME}/.bashrc" && \ echo "LD_LIBRARY_PATH=\"${ORACLE_HOME}/lib:${LD_LIBRARY_PATH}\"" >> "${HOME}/.bashrc" && \ echo "export ORACLE_HOME" >> "${HOME}/.bashrc" && \ echo "export PATH" >> "${HOME}/.bashrc" && \ echo "export LD_LIBRARY_PATH" >> "${HOME}/.bashrc" # Add credentials for /redacted/ using Oracle DB. WORKDIR /usr/lib/oracle/${INSTANTCLIENT_MAJOR_VERSION}/client64/lib/network/admin/ # Add a wildcard `[]` on the last letter of the filename to avoid throwing an error if the file does not exist. # See: https://stackoverflow.com/questions/31528384/conditional-copy-add-in-dockerfile COPY cwallet.ss[o] ./ COPY sqlnet.or[a] ./ COPY tnsnames.or[a] ./ # Switch back to jovyan to avoid accidental container runs as root USER "${NB_UID}" WORKDIR "${HOME}" # Install `oracledb` Python library to use Oracle SQL Instant Client RUN mamba install --yes 'oracledb' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipe_code/pip_install.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE # Install in the default python3 environment RUN pip install --no-cache-dir 'flake8' && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Install from the requirements.txt file COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/ RUN pip install --no-cache-dir --requirement /tmp/requirements.txt && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipe_code/requirements.txt ================================================ autoflake ================================================ FILE: docs/using/recipe_code/rise_jupyterlab.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE RUN mamba install --yes 'jupyterlab_rise' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipe_code/spellcheck_notebook_v6.dockerfile ================================================ # Using Docker Hub here, because this image is old and not pushed to Quay.io ARG BASE_IMAGE=docker.io/jupyter/base-notebook:notebook-6.5.4 FROM $BASE_IMAGE RUN pip install --no-cache-dir 'jupyter_contrib_nbextensions' && \ jupyter contrib nbextension install --user && \ # can modify or enable additional extensions here jupyter nbclassic-extension enable spellchecker/main --user && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipe_code/xgboost.dockerfile ================================================ ARG BASE_IMAGE=quay.io/jupyter/base-notebook FROM $BASE_IMAGE RUN mamba install --yes 'py-xgboost' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: docs/using/recipes.md ================================================ # Contributed Recipes Users sometimes share interesting ways of using the Jupyter Docker Stacks. We encourage users to [contribute these recipes](../contributing/recipes.md) to the documentation in case they prove helpful to other community members by submitting a pull request to `docs/using/recipes.md`. The sections below capture this knowledge. All the recipes here assume you would like to use an image built by this project and install some things on top of it. If you would like to build a custom set of images, [take a look at the docs](custom-images.md). ## Using `sudo` within a container Password authentication is disabled for the `NB_USER` (e.g., `jovyan`). We made this choice to avoid distributing images with a weak default password that users ~might~ will forget to change before running a container on a publicly accessible host. You can grant the within-container `NB_USER` passwordless `sudo` access by adding `--user root` and `-e GRANT_SUDO=yes` to your Docker command line or appropriate container orchestrator config. For example: ```bash docker run -it --rm \ --user root \ -e GRANT_SUDO=yes \ quay.io/jupyter/base-notebook ``` **You should only enable `sudo` if you trust the user and/or if the container is running on an isolated host.** See [Docker security documentation](https://docs.docker.com/engine/security/userns-remap/) for more information about running containers as `root`. ## Using `mamba install` (recommended) or `pip install` in a Child Docker image Create a new Dockerfile like the one shown below. To use a requirements.txt file, first, create your `requirements.txt` file with the listing of packages desired. ```{literalinclude} recipe_code/mamba_install.dockerfile :language: docker ``` `pip` usage is similar: ```{literalinclude} recipe_code/pip_install.dockerfile :language: docker ``` Then build a new image. ```bash docker build --rm --tag my-custom-image . ``` You can then run the image as follows: ```bash docker run -it --rm \ -p 8888:8888 \ my-custom-image ``` ## Add a custom conda environment and Jupyter kernel The default version of `Python` that ships with the image may not be the version you want. The instructions below permit adding a conda environment with a different `Python` version and making it accessible to Jupyter. You may also use older images like `jupyter/base-notebook:python-3.10`. We also maintain a [full build history](https://github.com/jupyter/docker-stacks/wiki). ```{literalinclude} recipe_code/custom_environment.dockerfile :language: docker ``` ## Dask JupyterLab Extension [Dask JupyterLab Extension](https://github.com/dask/dask-labextension) provides a JupyterLab extension to manage Dask clusters, as well as embed Dask's dashboard plots directly into JupyterLab panes. Create the Dockerfile as: ```{literalinclude} recipe_code/dask_jupyterlab.dockerfile :language: docker ``` And build the image as: ```bash docker build --rm --tag my-custom-image . ``` Once built, run using the command: ```bash docker run -it --rm \ -p 8888:8888 \ -p 8787:8787 \ my-custom-image ``` ## Let's Encrypt a Server ```{warning} This recipe is not tested and might be broken. ``` See the README for basic automation here which includes steps for requesting and renewing a Let's Encrypt certificate. Ref: ## Slideshows with JupyterLab and RISE [RISE](https://github.com/jupyterlab-contrib/rise): "Live" Reveal.js JupyterLab Slideshow Extension. ```{note} We're providing the recipe to install the JupyterLab extension. You can find the original Jupyter Notebook extension [here](https://github.com/damianavila/RISE) ``` ```{literalinclude} recipe_code/rise_jupyterlab.dockerfile :language: docker ``` ## xgboost ```{literalinclude} recipe_code/xgboost.dockerfile :language: docker ``` ## Running behind an nginx proxy ```{warning} This recipe is not tested and might be broken. ``` Sometimes it is helpful to run the Jupyter instance behind an nginx proxy, for example: - you would prefer to access the notebook at a server URL with a path (`https://example.com/jupyter`) rather than a port (`https://example.com:8888`) - you may have many services in addition to Jupyter running on the same server and want nginx to help improve server performance in managing the connections Here is a [quick example of NGINX configuration](https://gist.github.com/cboettig/8643341bd3c93b62b5c2) to get started. You'll need a server, a `.crt`, and a `.key` file for your server, and `docker` & `docker-compose` installed. Then download the files at that gist and run `docker-compose up` to test it out. Customize the `nginx.conf` file to set the desired paths and add other services. ## Host volume mounts and notebook errors If you are mounting a host directory as `/home/jovyan/work` in your container, and you receive permission errors or connection errors when you create a notebook, be sure that the `jovyan` user (`UID=1000` by default) has read/write access to the directory on the host. Alternatively, specify the UID of the `jovyan` user on container startup using the `-e NB_UID` option described in the [Common Features, Docker Options section](common.md#docker-options) Ref: ## Manpage installation Most images, including our Ubuntu base image, ship without manpages installed to save space. You can use the following Dockerfile to inherit from one of our images to enable manpages: ```{literalinclude} recipe_code/manpage_install.dockerfile :language: docker ``` Adding the documentation on top of the existing image wastes a lot of space and requires reinstalling every system package, which can take additional time and bandwidth. Enabling manpages in the base Ubuntu layer prevents this image bloat. To achieve this, use the previous `Dockerfile`'s commands with the original `ubuntu` image as your base image: ```dockerfile FROM ubuntu:24.04 ``` Be sure to check the current base image in `jupyter/docker-stacks-foundation` before building. ## JupyterHub We also have contributed recipes for using JupyterHub. ### Use JupyterHub's DockerSpawner You can find [an example of using DockerSpawner](https://github.com/jupyterhub/jupyterhub-deploy-docker/tree/main/basic-example). ### Containers with a specific version of JupyterHub The version of `jupyterhub` in your image should match the version in JupyterHub itself. To use a specific version of JupyterHub, do the following: ```{literalinclude} recipe_code/jupyterhub_version.dockerfile :language: docker ``` ## Spark A few suggestions have been made regarding using Docker Stacks with Spark. ### Using PySpark with AWS S3 ```{warning} This recipe is not tested and might be broken. ``` Using Spark session for Hadoop 2.7.3 ```python import os # To figure out what version of Hadoop, run: # ls /usr/local/spark/jars/hadoop* os.environ["PYSPARK_SUBMIT_ARGS"] = ( '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell' ) import pyspark myAccessKey = input() mySecretKey = input() spark = ( pyspark.sql.SparkSession.builder.master("local[*]") .config("spark.hadoop.fs.s3a.access.key", myAccessKey) .config("spark.hadoop.fs.s3a.secret.key", mySecretKey) .getOrCreate() ) df = spark.read.parquet("s3://myBucket/myKey") ``` Using Spark context for Hadoop 2.6.0 ```python import os os.environ["PYSPARK_SUBMIT_ARGS"] = ( "--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell" ) import pyspark sc = pyspark.SparkContext("local[*]") from pyspark.sql import SQLContext sqlContext = SQLContext(sc) hadoopConf = sc._jsc.hadoopConfiguration() myAccessKey = input() mySecretKey = input() hadoopConf.set("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem") hadoopConf.set("fs.s3.awsAccessKeyId", myAccessKey) hadoopConf.set("fs.s3.awsSecretAccessKey", mySecretKey) df = sqlContext.read.parquet("s3://myBucket/myKey") ``` Ref: ### Using Local Spark JARs ```{warning} This recipe is not tested and might be broken. ``` ```python import os os.environ["PYSPARK_SUBMIT_ARGS"] = ( "--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell" ) import pyspark from pyspark.streaming.kafka import KafkaUtils from pyspark.streaming import StreamingContext sc = pyspark.SparkContext() ssc = StreamingContext(sc, 1) broker = "" directKafkaStream = KafkaUtils.createDirectStream( ssc, ["test1"], {"metadata.broker.list": broker} ) directKafkaStream.pprint() ssc.start() ``` Ref: ### Using spark-packages.org ```{warning} This recipe is not tested and might be broken. ``` If you'd like to use packages from [spark-packages.org](https://spark-packages.org/), see [https://gist.github.com/parente/c95fdaba5a9a066efaab](https://gist.github.com/parente/c95fdaba5a9a066efaab) for an example of how to specify the package identifier in the environment before creating a SparkContext. Ref: ### Use jupyter/all-spark-notebooks with an existing Spark/YARN cluster ```{warning} This recipe is not tested and might be broken. ``` ```dockerfile FROM quay.io/jupyter/all-spark-notebook # Set env vars for pydoop ENV HADOOP_HOME=/usr/local/hadoop-2.7.3 ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 ENV HADOOP_CONF_HOME=/usr/local/hadoop-2.7.3/etc/hadoop ENV HADOOP_CONF_DIR=/usr/local/hadoop-2.7.3/etc/hadoop USER root # Add proper open-jdk-8 not the jre only, needed for pydoop RUN echo 'deb https://cdn-fastly.deb.debian.org/debian jessie-backports main' > /etc/apt/sources.list.d/jessie-backports.list && \ apt-get update --yes && \ apt-get install --yes --no-install-recommends -t jessie-backports openjdk-8-jdk && \ rm /etc/apt/sources.list.d/jessie-backports.list && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ # Add Hadoop binaries wget --progress=dot:giga https://mirrors.ukfast.co.uk/sites/ftp.apache.org/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz && \ tar -xvf hadoop-2.7.3.tar.gz -C /usr/local && \ chown -R "${NB_USER}:users" /usr/local/hadoop-2.7.3 && \ rm -f hadoop-2.7.3.tar.gz && \ # Install os dependencies required for pydoop, pyhive apt-get update --yes && \ apt-get install --yes --no-install-recommends build-essential python-dev libsasl2-dev && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ # Remove the example hadoop configs and replace # with those for our cluster. # Alternatively, this could be mounted as a volume rm -f /usr/local/hadoop-2.7.3/etc/hadoop/* # Download this from ambari/cloudera manager and copy it here COPY example-hadoop-conf/ /usr/local/hadoop-2.7.3/etc/hadoop/ # Spark-Submit doesn't work unless I set the following RUN echo "spark.driver.extraJavaOptions -Dhdp.version=2.5.3.0-37" >> /usr/local/spark/conf/spark-defaults.conf && \ echo "spark.yarn.am.extraJavaOptions -Dhdp.version=2.5.3.0-37" >> /usr/local/spark/conf/spark-defaults.conf && \ echo "spark.master=yarn" >> /usr/local/spark/conf/spark-defaults.conf && \ echo "spark.hadoop.yarn.timeline-service.enabled=false" >> /usr/local/spark/conf/spark-defaults.conf && \ chown -R "${NB_USER}:users" /usr/local/spark/conf/spark-defaults.conf && \ # Create an alternative HADOOP_CONF_HOME so we can mount as a volume and repoint # using ENV var if needed mkdir -p /etc/hadoop/conf/ && \ chown "${NB_USER}":users /etc/hadoop/conf/ USER ${NB_UID} # Install useful jupyter extensions and python libraries like : # - Dashboards # - PyDoop # - PyHive RUN pip install --no-cache-dir 'jupyter_dashboards' 'faker' && \ jupyter dashboards quick-setup --sys-prefix && \ pip2 install --no-cache-dir 'pyhive' 'pydoop' 'thrift' 'sasl' 'thrift_sasl' 'faker' && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" USER root # Ensure we overwrite the kernel config so that toree connects to cluster RUN jupyter toree install --sys-prefix --spark_opts="\ --master yarn \ --deploy-mode client \ --driver-memory 512m \ --executor-memory 512m \ --executor-cores 1 \ --driver-java-options \ -Dhdp.version=2.5.3.0-37 \ --conf spark.hadoop.yarn.timeline-service.enabled=false \ " USER ${NB_UID} ``` Credit: [britishbadger](https://github.com/britishbadger) from [docker-stacks/issues/369](https://github.com/jupyter/docker-stacks/issues/369) ## Run Server inside an already secured environment (i.e., with no token) The default security is very good. There are use cases, encouraged by containers, where the jupyter container and the system it runs within lie inside the security boundary. It is convenient to launch the server without a password or token in these use cases. In this case, you should use the `start-notebook.py` script to launch the server with no token: For JupyterLab: ```bash docker run -it --rm \ quay.io/jupyter/base-notebook \ start-notebook.py --IdentityProvider.token='' ``` For Jupyter Notebook: ```bash docker run -it --rm \ -e DOCKER_STACKS_JUPYTER_CMD=notebook \ quay.io/jupyter/base-notebook \ start-notebook.py --IdentityProvider.token='' ``` ## Enable nbclassic-extension spellchecker for markdown (or any other nbclassic-extension) ```{note} This recipe only works for NBClassic with Jupyter Notebook < 7. It is recommended to use [jupyterlab-spellchecker](https://github.com/jupyterlab-contrib/spellchecker) in modern environments. ``` ```{literalinclude} recipe_code/spellcheck_notebook_v6.dockerfile :language: docker ``` ## Enable Delta Lake in Spark notebooks ```{warning} This recipe is not tested and might be broken. ``` Please note that the [Delta Lake](https://delta.io/) packages are only available for Spark version > `3.0`. By adding the properties to `spark-defaults.conf`, the user no longer needs to enable Delta support in each notebook. ```dockerfile FROM quay.io/jupyter/pyspark-notebook RUN mamba install --yes 'delta-spark' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" USER root RUN echo 'spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension' >> "${SPARK_HOME}/conf/spark-defaults.conf" && \ echo 'spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog' >> "${SPARK_HOME}/conf/spark-defaults.conf" USER ${NB_UID} # Trigger download of delta lake files RUN echo "from pyspark.sql import SparkSession" > /tmp/init-delta.py && \ echo "from delta import *" >> /tmp/init-delta.py && \ echo "spark = configure_spark_with_delta_pip(SparkSession.builder).getOrCreate()" >> /tmp/init-delta.py && \ python /tmp/init-delta.py && \ rm /tmp/init-delta.py ``` ## Add Custom Fonts in Scipy notebook ```{warning} This recipe is not tested and might be broken. ``` The example below is a Dockerfile to load Source Han Sans with normal weight, usually used for the web. ```dockerfile FROM quay.io/jupyter/scipy-notebook RUN PYV=$(ls "${CONDA_DIR}/lib" | grep ^python) && \ MPL_DATA="${CONDA_DIR}/lib/${PYV}/site-packages/matplotlib/mpl-data" && \ wget --progress=dot:giga -P "${MPL_DATA}/fonts/ttf/" https://mirrors.cloud.tencent.com/adobe-fonts/source-han-sans/SubsetOTF/CN/SourceHanSansCN-Normal.otf && \ sed -i 's/#font.family/font.family/g' "${MPL_DATA}/matplotlibrc" && \ sed -i 's/#font.sans-serif:/font.sans-serif: Source Han Sans CN,/g' "${MPL_DATA}/matplotlibrc" && \ sed -i 's/#axes.unicode_minus: True/axes.unicode_minus: False/g' "${MPL_DATA}/matplotlibrc" && \ rm -rf "/home/${NB_USER}/.cache/matplotlib" && \ python -c 'import matplotlib.font_manager;print("font loaded: ",("Source Han Sans CN" in [f.name for f in matplotlib.font_manager.fontManager.ttflist]))' ``` ## Enable clipboard in pandas on Linux systems ```{warning} This recipe is not tested and might be broken. ``` ```{admonition} Additional notes This solution works on Linux host systems. It is not required on Windows and won't work on macOS. ``` To enable the `pandas.read_clipboard()` functionality, you need to have `xclip` installed (installed in `minimal-notebook` and all the inherited images) and add these options when running `docker`: `-e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix`, i.e.: ```bash docker run -it --rm \ -e DISPLAY \ -v /tmp/.X11-unix:/tmp/.X11-unix \ quay.io/jupyter/minimal-notebook ``` ## Install ijavascript kernel in your image The example below is a Dockerfile to install the [IJavascript kernel](https://github.com/n-riesco/ijavascript). ```{literalinclude} recipe_code/ijavascript.dockerfile :language: docker ``` ## Add Microsoft SQL Server ODBC driver The following recipe demonstrates how to add functionality to read from and write to an instance of Microsoft SQL server in your notebook. ```{literalinclude} recipe_code/microsoft_odbc.dockerfile :language: docker ``` You can now use `pyodbc` and `sqlalchemy` to interact with the database. Pre-built images are hosted in the [Realiserad/jupyter-docker-mssql](https://github.com/Realiserad/jupyter-docker-mssql) repository. ## Add Oracle SQL Instant client, SQL\*Plus, and other tools (Version 21.x) ```{note} This recipe only works for x86_64 architecture. ``` The following recipe demonstrates how to add functionality to connect to an Oracle Database using [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html) in your notebook. This recipe installs version `21.11.0.0.0`. Nonetheless, go to the [Oracle Instant Client Download page](https://www.oracle.com/es/database/technologies/instant-client/linux-x86-64-downloads.html) for the complete list of versions available. You may need to perform different steps for older versions; they may be explained in the "Installation instructions" section of the Downloads page. ```{literalinclude} recipe_code/oracledb.dockerfile :language: docker ``` ## Running Jupyter Docker Stacks with Singularity You can also start Jupyter Docker Stacks containers using **Singularity** instead of Docker. For example: ```bash singularity run --bind "${PWD}:/home/${USER}/work" --containall docker://quay.io/jupyter/datascience-notebook:2025-12-31 ``` - `--bind "${PWD}:/home/${USER}/work"` mounts your current working directory into the container at `/home/$USER/work`. When running the image with Singularity, the container uses your host username inside the container. Therefore, the bind target is `/home/${USER}/work` instead of the usual `/home/jovyan/work`. - `--containall` starts the container in a fully isolated environment, ignoring most of the host’s filesystem and environment except for explicitly bound paths. By default, Singularity would bind your home directory automatically. If you have Python packages installed there, this may cause conflicts with packages inside the container. Using `--containall` avoids such interference. ================================================ FILE: docs/using/running.md ================================================ # Running a Container Using one of the Jupyter Docker Stacks requires two choices: 1. Which Docker image you wish to use 2. How you wish to start Docker containers from that image This section provides details about the second. ## Using the Docker CLI You can launch a local Docker container from the Jupyter Docker Stacks using the [Docker command-line interface](https://docs.docker.com/reference/cli/docker/). There are numerous ways to configure containers using CLI. The following are some common patterns. ### Example 1 This command pulls the `jupyter/scipy-notebook` image tagged `2025-12-31` from Quay.io if it is not already present on the local host. It then starts a container running a Jupyter Server with the JupyterLab frontend and exposes the server on host port 8888. The server logs appear in the terminal and include a URL to the server. ```bash docker run -it -p 8888:8888 quay.io/jupyter/scipy-notebook:2025-12-31 # Entered start.sh with args: jupyter lab # ... # To access the server, open this file in a browser: # file:///home/jovyan/.local/share/jupyter/runtime/jpserver-7-open.html # Or copy and paste one of these URLs: # http://eca4aa01751c:8888/lab?token=d4ac9278f5f5388e88097a3a8ebbe9401be206cfa0b83099 # http://127.0.0.1:8888/lab?token=d4ac9278f5f5388e88097a3a8ebbe9401be206cfa0b83099 ``` Pressing `Ctrl-C` twice shuts down the Server but leaves the container intact on disk for later restart or permanent deletion using commands like the following: ```bash # list containers docker ps --all # CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES # eca4aa01751c quay.io/jupyter/scipy-notebook:2025-12-31 "tini -g -- start-no…" About a minute ago Exited (0) 5 seconds ago silly_panini # start the stopped container docker start --attach -i eca4aa01751c # Entered start.sh with args: jupyter lab # ... # remove the stopped container docker rm eca4aa01751c # eca4aa01751c ``` ### Example 2 This command pulls the `jupyter/r-notebook` image tagged `2025-12-31` from Quay.io if it is not already present on the local host. It then starts a container running a Jupyter Server and exposes the server on host port 10000. The server logs appear in the terminal and include a URL to the Server but with the internal container port (8888) instead of the correct host port (10000). ```bash docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work quay.io/jupyter/r-notebook:2025-12-31 ``` Pressing `Ctrl-C` twice shuts down the Server and immediately destroys the Docker container. New files and changes in `~/work` in the container will be preserved. Any other changes made in the container will be lost. ```{note} By default, [jupyter's root_dir](https://jupyter-server.readthedocs.io/en/latest/other/full-config.html) is `/home/jovyan`. So, new notebooks will be saved there, unless you change the directory in the file browser. To change the default directory, you will need to specify `ServerApp.root_dir` by adding this line to the previous command: `start-notebook.py --ServerApp.root_dir=/home/jovyan/work`. ``` ### Example 3 This command pulls the `jupyter/all-spark-notebook` image currently tagged `latest` from Quay.io if an image tagged `latest` is not already present on the local host. It then starts a container named `notebook` running a JupyterLab server and exposes the server on a randomly selected port. ```bash docker run --detach -P --name notebook quay.io/jupyter/all-spark-notebook ``` where: - `--detach`: will run the container in detached mode You can also use the following docker commands to see the port and Jupyter Server token: ```bash # get the random host port assigned to the container port 8888 docker port notebook 8888 # 0.0.0.0:49153 # :::49153 # get the notebook token from the logs docker logs --tail 3 notebook # Or copy and paste one of these URLs: # http://878f1a9b4dfa:8888/lab?token=d336fa63c03f064ff15ce7b269cab95b2095786cf9ab2ba3 # or http://127.0.0.1:8888/lab?token=d336fa63c03f064ff15ce7b269cab95b2095786cf9ab2ba3 ``` Together, the URL to visit on the host machine to access the server, in this case, is . The container runs in the background until stopped and/or removed by additional Docker commands: ```bash # stop the container docker stop notebook # notebook # remove the container permanently docker rm notebook # notebook ``` ## Using the Podman CLI An alternative to using the Docker CLI is to use the Podman CLI. Podman is mostly compatible with Docker. ### Podman example If we use Podman instead of Docker in the situation given in _Example 2_, it will look like this: The example makes use of rootless Podman; in other words, the Podman command is run from a regular user account. In a Bash shell, set the shell variables _uid_ and _gid_ to the UID and GID of the user _jovyan_ in the container. ```bash uid=1000 gid=100 ``` Set the shell variables _subuidSize_ and _subgidSize_ to the number of subordinate UIDs and GIDs, respectively. ```bash subuidSize=$(( $(podman info --format "{{ range .Host.IDMappings.UIDMap }}+{{.Size }}{{end }}" ) - 1 )) subgidSize=$(( $(podman info --format "{{ range .Host.IDMappings.GIDMap }}+{{.Size }}{{end }}" ) - 1 )) ``` This command pulls the `quay.io/jupyter/r-notebook` image tagged `2025-12-31` from Quay.io if it is not already present on the local host. It then starts a container running a Jupyter Server with the JupyterLab frontend and exposes the server on host port 10000. The server logs appear in the terminal and include a URL to the server but with the internal container port (8888) instead of the correct host port (10000). ```bash podman run -it --rm -p 10000:8888 \ -v "${PWD}":/home/jovyan/work --user $uid:$gid \ --uidmap $uid:0:1 --uidmap 0:1:$uid --uidmap $(($uid+1)):$(($uid+1)):$(($subuidSize-$uid)) \ --gidmap $gid:0:1 --gidmap 0:1:$gid --gidmap $(($gid+1)):$(($gid+1)):$(($subgidSize-$gid)) \ quay.io/jupyter/r-notebook:2025-12-31 ``` ```{warning} The `podman run` options `--uidmap` and `--gidmap` can be used to map the container user _jovyan_ to the regular user on the host when running rootless Podman. The same Podman command should not be run with sudo (i.e. running rootful Podman) because then the mapping would map the container user _jovyan_ to the root user on the host. It's a good security practice to run programs with as few privileges as possible. ``` ```{note} The `podman run` command in the example above maps all subuids and subgids of the user into the container. That works fine but is actually more than needed. The `podman run` option `--userns=auto` will, for instance, not be possible to use as long as there are no unused subuids and subgids available. The example could be improved by investigating more in detail which UIDs and GIDs need to be available in the container and then only map them. ``` Pressing `Ctrl-C` twice shuts down the Server and immediately destroys the Docker container. New files and changes in `~/work` in the container will be preserved. Any other changes made in the container will be lost. ## Using Binder A [Binder](https://mybinder.org/) is a service that allows you to create and share custom computing environments for projects in version control. You can use any of the Jupyter Docker Stacks images as a basis for a Binder-compatible Dockerfile. See the [docker-stacks example](https://mybinder.readthedocs.io/en/latest/examples/sample_repos.html#using-a-docker-image-from-the-jupyter-docker-stacks-repository) and [Using a Dockerfile](https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html) section in the [Binder documentation](https://mybinder.readthedocs.io/en/latest/index.html) for instructions. ## Using JupyterHub You can configure JupyterHub to launch Docker containers from the Jupyter Docker Stacks images. If you've been following the [Zero to JupyterHub with Kubernetes](https://z2jh.jupyter.org/en/latest/) guide, see the [Use an existing Docker image](https://z2jh.jupyter.org/en/latest/jupyterhub/customizing/user-environment.html#choose-and-use-an-existing-docker-image) section for details. If you have a custom JupyterHub deployment, see the [Picking or building a Docker image](https://jupyterhub-dockerspawner.readthedocs.io/en/latest/docker-image.html) instructions for the [dockerspawner](https://github.com/jupyterhub/dockerspawner) instead. ## Using Other Tools and Services You can use the Jupyter Docker Stacks with any Docker-compatible technology (e.g., [Docker Compose](https://docs.docker.com/compose/), [docker-py](https://github.com/docker/docker-py), or your favorite cloud container service). See the documentation of the tool, library, or service for details about how to reference, configure, and launch containers from these images. ================================================ FILE: docs/using/selecting.md ================================================ # Selecting an Image - [Core Stacks](#core-stacks) - [Image Relationships](#image-relationships) - [Community Stacks](#community-stacks) > **Pull vs Build: Quick Guidance** > > Images are published on **Quay.io** registry and most users should start by pulling an existing image that closely matches their needs. > Pulling a pre-built image is fast and suitable for common use cases such as running notebooks, teaching, or standard data science workflows. > > Building images locally is recommended only when additional customization is required, for example: > > - adding system-level packages that cannot be installed at runtime, > - preinstalling large or complex language-specific libraries, > - creating organization-specific images or heavy customization for CI. > > If unsure, try an existing Quay.io image first; if it falls short, follow the custom images guide to build a tailored image. Using one of the Jupyter Docker Stacks requires two choices: 1. Which Docker image you wish to use 2. How you wish to start Docker containers from that image This section provides details about the first. ## Core Stacks The Jupyter team maintains a set of Docker image definitions in the GitHub repository. The following sections describe these images, including their contents, relationships, and versioning strategy. ### jupyter/docker-stacks-foundation [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/docker-stacks-foundation) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/docker-stacks-foundation/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/docker-stacks-foundation?tab=tags) `jupyter/docker-stacks-foundation` is a small image supporting a majority of [options common across all core stacks](common.md). It is the basis for all other stacks on which Jupyter-related applications can be built (e.g., kernel-based containers, [nbclient](https://github.com/jupyter/nbclient) applications, etc.). As such, it does not contain application-level software like JupyterLab, Jupyter Notebook, or JupyterHub. It contains: - Package managers - [conda](https://github.com/conda/conda): "cross-platform, language-agnostic binary package manager". - [mamba](https://github.com/mamba-org/mamba): "reimplementation of the conda package manager in C++". We use this package manager by default when installing packages. - Unprivileged user `jovyan` (`uid=1000`, configurable, [see options in the common features section](./common.md) of this documentation) in group `users` (`gid=100`) with ownership over the `/home/jovyan` and `/opt/conda` paths - `tini` and a `start.sh` script as the container entry point - useful for running alternative commands in the container as applications are added (e.g. `ipython`, `jupyter kernelgateway`, `jupyter lab`) - A `run-hooks.sh` script, which can source/run files in a given directory - Options for a passwordless sudo - Common system libraries like `bzip2`, `ca-certificates`, `locales` - `wget` to download external files - No preinstalled scientific computing packages ### jupyter/base-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/base-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/base-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/base-notebook?tab=tags) `jupyter/base-notebook` adds base Jupyter Applications like JupyterLab, Jupyter Notebook, JupyterHub, and NBClassic and serves as the basis for all other stacks besides `jupyter/docker-stacks-foundation`. It contains: - Everything in `jupyter/docker-stacks-foundation` - Minimally functional Server (e.g., no LaTeX support for saving notebooks as PDFs) - `notebook`, `jupyterhub-singleuser`, and `jupyterlab` packages - A `start-notebook.py` script as the default command - A `start-singleuser.py` script useful for launching containers in JupyterHub - Options for a self-signed HTTPS certificate ```{warning} `jupyter/base-notebook` also contains `start-notebook.sh` and `start-singleuser.sh` files to maintain backward compatibility. External config that explicitly refers to those files should instead update to refer to `start-notebook.py` and `start-singleuser.py`. The shim `.sh` files will be removed at some future date. ``` ### jupyter/minimal-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/minimal-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/minimal-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/minimal-notebook?tab=tags) `jupyter/minimal-notebook` adds command-line tools useful when working in Jupyter applications. It contains: - Everything in `jupyter/base-notebook` - Common useful utilities like [curl](https://curl.se), [git](https://git-scm.com/), [nano](https://www.nano-editor.org/) (actually `nano-tiny`), [tzdata](https://www.iana.org/time-zones), [unzip](https://code.launchpad.net/ubuntu/+source/unzip), and [vi](https://www.vim.org) (actually `vim-tiny`), - [TeX Live](https://www.tug.org/texlive/) for notebook document conversion ### jupyter/r-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/r-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/r-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/r-notebook?tab=tags) `jupyter/r-notebook` includes popular packages from the R ecosystem listed below: - Everything in `jupyter/minimal-notebook` and its ancestor images - The [R](https://www.r-project.org/) interpreter and base environment - [IRKernel](https://irkernel.github.io/) to support R code in Jupyter notebooks - [tidyverse](https://tidyverse.org/) packages from [conda-forge](https://conda-forge.org/feedstock-outputs/index.html) - [caret](https://topepo.github.io/caret/index.html), [crayon](https://cran.r-project.org/web/packages/crayon/index.html), [devtools](https://cran.r-project.org/web/packages/devtools/index.html), [forecast](https://cran.r-project.org/web/packages/forecast/index.html), [hexbin](https://cran.r-project.org/web/packages/hexbin/index.html), [htmltools](https://cran.r-project.org/web/packages/htmltools/index.html), [htmlwidgets](https://www.htmlwidgets.org), [nycflights13](https://cran.r-project.org/web/packages/nycflights13/index.html), [randomforest](https://cran.r-project.org/web/packages/randomForest/index.html), [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html), [rmarkdown](https://rmarkdown.rstudio.com), [rodbc](https://cran.r-project.org/web/packages/RODBC/index.html), [rsqlite](https://cran.r-project.org/web/packages/RSQLite/index.html), [shiny](https://shiny.posit.co), [tidymodels](https://www.tidymodels.org/), [unixodbc](https://www.unixodbc.org) packages from [conda-forge](https://conda-forge.org/feedstock-outputs/index.html) ### jupyter/julia-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/julia-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/julia-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/julia-notebook?tab=tags) `jupyter/julia-notebook` includes popular packages from the Julia ecosystem listed below: - Everything in `jupyter/minimal-notebook` and its ancestor images - The [Julia](https://julialang.org/) compiler and base environment - [IJulia](https://github.com/JuliaLang/IJulia.jl) to support Julia code in Jupyter notebook - [Pluto.jl](https://plutojl.org/) reactive Julia notebook interface, made accessible with [jupyter-pluto-proxy](https://github.com/yuvipanda/jupyter-pluto-proxy) - [HDF5](https://github.com/JuliaIO/HDF5.jl) package ### jupyter/scipy-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/scipy-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/scipy-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/scipy-notebook?tab=tags) `jupyter/scipy-notebook` includes popular packages from the scientific Python ecosystem. - Everything in `jupyter/minimal-notebook` and its ancestor images - [altair](https://altair-viz.github.io), [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/), [bokeh](https://docs.bokeh.org/en/latest/), [bottleneck](https://bottleneck.readthedocs.io/en/latest/), [cloudpickle](https://github.com/cloudpipe/cloudpickle), [conda-forge::blas=\*=openblas](https://www.openblas.net), [cython](https://cython.org), [dask](https://www.dask.org/), [dill](https://pypi.org/project/dill/), [h5py](https://www.h5py.org), [jupyterlab-git](https://github.com/jupyterlab/jupyterlab-git), [matplotlib-base](https://matplotlib.org/), [numba](https://numba.pydata.org/), [numexpr](https://github.com/pydata/numexpr), [openpyxl](https://openpyxl.readthedocs.io/en/stable/), [pandas](https://pandas.pydata.org/), [patsy](https://patsy.readthedocs.io/en/latest/), [protobuf](https://protobuf.dev/getting-started/pythontutorial/), [pytables](https://www.pytables.org/), [scikit-image](https://scikit-image.org), [scikit-learn](https://scikit-learn.org/stable/), [scipy](https://scipy.org/), [seaborn](https://seaborn.pydata.org/), [sqlalchemy](https://www.sqlalchemy.org/), [statsmodel](https://www.statsmodels.org/stable/index.html), [sympy](https://www.sympy.org/en/index.html), [widgetsnbextension](https://ipywidgets.readthedocs.io/en/latest/user_install.html#installing-in-classic-jupyter-notebook), [xlrd](https://www.python-excel.org) packages - [ipympl](https://github.com/matplotlib/ipympl) and [ipywidgets](https://ipywidgets.readthedocs.io/en/stable/) for interactive visualizations and plots in Python notebooks ### jupyter/tensorflow-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/tensorflow-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/tensorflow-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/tensorflow-notebook?tab=tags) `jupyter/tensorflow-notebook` includes popular Python deep learning libraries. - Everything in `jupyter/scipy-notebook` and its ancestor images - [TensorFlow](https://www.tensorflow.org/) machine learning library - [Jupyter Server Proxy](https://jupyter-server-proxy.readthedocs.io/en/latest/) to support [TensorBoard](https://www.tensorflow.org/tensorboard) ### jupyter/pytorch-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/pytorch-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/pytorch-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/pytorch-notebook?tab=tags) `jupyter/pytorch-notebook` includes popular Python deep learning libraries. - Everything in `jupyter/scipy-notebook` and its ancestor images - [pytorch](https://pytorch.org/) machine learning library ### jupyter/datascience-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/datascience-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/datascience-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/datascience-notebook?tab=tags) `jupyter/datascience-notebook` includes libraries for data analysis from the Python, R, and Julia communities. - Everything in the `jupyter/scipy-notebook`, `jupyter/r-notebook`, and `jupyter/julia-notebook` images and their ancestor images - [rpy2](https://rpy2.github.io/doc/latest/html/index.html) package ### jupyter/pyspark-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/pyspark-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/pyspark-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/pyspark-notebook?tab=tags) `jupyter/pyspark-notebook` includes Python support for Apache Spark. - Everything in `jupyter/scipy-notebook` and its ancestor images - [Apache Spark](https://spark.apache.org/) with Hadoop binaries - [grpcio-status](https://github.com/grpc/grpc/tree/master/src/python/grpcio_status) - [grpcio](https://grpc.io/docs/languages/python/quickstart/) - [pyarrow](https://arrow.apache.org/docs/python/) ### jupyter/all-spark-notebook [Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/all-spark-notebook) | [Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/all-spark-notebook/Dockerfile) | [Quay.io image tags](https://quay.io/repository/jupyter/all-spark-notebook?tab=tags) `jupyter/all-spark-notebook` includes Python and R support for Apache Spark. - Everything in `jupyter/pyspark-notebook` and its ancestor images - [IRKernel](https://irkernel.github.io/) to support R code in Jupyter notebooks - [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html), [sparklyr](https://spark.posit.co), [ggplot2](https://ggplot2.tidyverse.org) packages ### CUDA enabled variants We provide CUDA accelerated versions of the `pytorch-notebook` and `tensorflow-notebook` images. Prepend a CUDA prefix (versioned prefix like `cuda12-` for `pytorch-notebook` or just `cuda-` for `tensorflow-notebook`) to the image tag to allow PyTorch or TensorFlow operations to use compatible NVIDIA GPUs for accelerated computation. We only build `pytorch-notebook` for the last two major versions of CUDA. The `tensorflow-notebook` image only supports the latest CUDA version listed in the [officially tested build configurations](https://www.tensorflow.org/install/source#gpu). For example, you could use the image `quay.io/jupyter/pytorch-notebook:cuda12-python-3.11.8` or `quay.io/jupyter/tensorflow-notebook:cuda-latest`. ### Image Relationships The following diagram depicts the build dependency tree of the core images. (i.e., the `FROM` statements in their Dockerfiles). Any given image inherits the complete content of all ancestor images pointing to it. [![Image inheritance diagram](../images/inherit.svg)](http://interactive.blockdiag.com/?compression=deflate&src=eJyFj8FKxDAQhu_7FKEnRYKsnmTRJ9ibe1yQaTp1x2ZnSjJxqeK7mywopFB6Cnzz_X_4Wy9u6AjezffGmA57SF7femGN9IXm2TztMpdAyApKwhmNEjQA6W6TT01qE2s68s3-8GoupKd8J1YT0CNEvG2MfTFd_gWDjQpuiLaXxN21rZQv3UquzQ2WRbEVGYpcgWKciekMvpLmrHjR0ThVVk2K0yhylNB7ufzzI9-51IH5hEDA2iwkx0kluNM8tn24vz6Pf_m4UJAnQ6bIDlenhFXjI3mClbHjFEcIQ2XNWfHAezs3f34BzoTGIw) ### Builds Every Monday and whenever a pull request is merged, images are rebuilt and pushed to [the public container registry](https://quay.io/organization/jupyter). ### Versioning via image tags Whenever a docker image is pushed to the container registry, it is tagged with: - the `latest` tag - a 12-character git commit SHA like `1ffe43816ba9` - a date formatted like `2023-01-30` - OS version like `ubuntu-22.04` - a set of software version tags like `python-3.10.8` and `lab-3.5.3` ```{warning} - Tags before `2022-07-05` were sometimes incorrect. Please, do not rely on them. - Single-platform images have either `aarch64-` or `x86_64-` tag prefixes, for example, `quay.io/jupyter/base-notebook:aarch64-python-3.11.6` ``` For stability and reproducibility, you should either reference a date formatted tag from a date before the current date (in UTC) or a git commit SHA older than the latest git commit SHA in the default branch of the [jupyter/docker-stacks GitHub repository](https://github.com/jupyter/docker-stacks/). ## Community Stacks The core stacks are but a tiny sample of what's possible when combining Jupyter with other technologies. We encourage members of the Jupyter community to create their own stacks based on the core images and link them below. See the [contributing guide](../contributing/stacks.md) for information about how to create your own Jupyter Docker Stack. | Flavor | Binder | Description | | -------------- | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | | [csharp] | [![bb]][csharp_b] | More than 200 Jupyter Notebooks with example **C#** code | | [education] | [![bb]][education_b] | **`nbgrader`** and `RISE` on top of the `datascience-notebook` image | | [ihaskell] | [![bb]][ihaskell_b] | Based on [**IHaskell**][ihaskell_project]. Includes popular packages and example notebooks | | [java] | [![bb]][java_b] | [**IJava**][ijava] kernel on top of the `minimal-notebook` image | | [sage] | [![bb]][sage_b] | [**sagemath**][sagemath] kernel on top of the `minimal-notebook` image | | [cgspatial] | [![bb]][cgspatial_b] | Major **geospatial** Python & R libraries on top of the `datascience-notebook` image | | [kotlin] | [![bb]][kotlin_b] | [**Kotlin** kernel for Jupyter/IPython][kotlin_kernel] on top of the `base-notebook` image | | [transformers] | [![bb]][transformers_b] | [**Transformers**][transformers_lib] and NLP libraries such as `Tensorflow`, `Keras`, `Jax` and `PyTorch` | | [scraper] | [![bb]][scraper_b] | **Scraper** tools (`selenium`, `chromedriver`, `beatifulsoup4`, `requests`) on `minimal-notebook` image | | [almond] | [![bb]][almond_b] | Scala kernel for Jupyter using **Almond** on top of the `base-notebook` image | | [lisp-stat] | [![bb]][lisp-stat_b] | Common Lisp statistical computing environment on top of the `minimal-notebook` image | | [sequencing] | [![bb]][sequencing_b] | Collection for bioinformatics sequencing data analysis, covering bulk RNA-seq, single-cell RNA-seq, spatial sequencing, and multi-omics | [bb]: https://static.mybinder.org/badge_logo.svg [csharp]: https://github.com/tlinnet/csharp-notebook [csharp_b]: https://mybinder.org/v2/gh/tlinnet/csharp-notebook/master [education]: https://github.com/umsi-mads/education-notebook [education_b]: https://mybinder.org/v2/gh/umsi-mads/education-notebook/master [ihaskell]: https://github.com/IHaskell/ihaskell-notebook [ihaskell_b]: https://mybinder.org/v2/gh/jamesdbrock/learn-you-a-haskell-notebook/master?urlpath=lab/tree/ihaskell_examples/ihaskell/IHaskell.ipynb [ihaskell_project]: https://github.com/IHaskell/IHaskell [java]: https://github.com/jbindinga/java-notebook [java_b]: https://mybinder.org/v2/gh/jbindinga/java-notebook/master [ijava]: https://github.com/SpencerPark/IJava [sage]: https://github.com/sharpTrick/sage-notebook [sage_b]: https://mybinder.org/v2/gh/sharpTrick/sage-notebook/master [sagemath]: https://www.sagemath.org [cgspatial]: https://github.com/SCiO-systems/cgspatial-notebook [cgspatial_b]: https://mybinder.org/v2/gh/SCiO-systems/cgspatial-notebook/master [kotlin]: https://github.com/knonm/kotlin-notebook [kotlin_b]: https://mybinder.org/v2/gh/knonm/kotlin-notebook/main [kotlin_kernel]: https://github.com/Kotlin/kotlin-jupyter [transformers]: https://github.com/ToluClassics/transformers_notebook [transformers_b]: https://mybinder.org/v2/gh/ToluClassics/transformers_notebook/main [transformers_lib]: https://huggingface.co/docs/transformers/index [scraper]: https://github.com/rgriffogoes/scraper-notebook [scraper_b]: https://mybinder.org/v2/gh/rgriffogoes/scraper-notebook/main [almond]: https://almond.sh [almond_b]: https://mybinder.org/v2/gh/almond-sh/examples/master?urlpath=lab%2Ftree%2Fnotebooks%2Findex.ipynb [lisp-stat]: https://lisp-stat.dev [lisp-stat_b]: https://mybinder.org/v2/gh/Lisp-Stat/IPS9/HEAD?urlpath=%2Fdoc%2Ftree%2Findex.ipynb [sequencing]: https://github.com/huchlab/sequencing-docker-stacks [sequencing_b]: https://mybinder.org/v2/gh/huchlab/sequencing-docker-stacks/main?urlpath=lab%2Ftree%2FREADME.ipynb ### Other GPU-accelerated notebooks | Flavor | Description | | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | [GPU-Jupyter][gpu] | Power of your NVIDIA GPU and GPU calculations using Tensorflow and Pytorch in collaborative notebooks. This is done by generating a Dockerfile that consists of the **nvidia/cuda** base image, the well-maintained **docker-stacks** that is integrated as a submodule, and GPU-able libraries like **Tensorflow**, **Keras** and **PyTorch** on top of it. | | [myLab TH Lübeck Images][gpu_thl] | Images based on the **jupyter/docker-stacks**, built and maintained at the [myLab TH Lübeck][gpu_mylab] using build scripts similar to iot-salzburg. Several images include GPU libraries. | | [PRP-GPU][prp_gpu] | PRP (Pacific Research Platform) maintained [registry][prp_reg] for jupyter stack based on NVIDIA CUDA-enabled image. Added the PRP image with Pytorch and some other Python packages and GUI Desktop notebook based on . | | [b-data][b-data] | GPU accelerated, multi-arch (`linux/amd64`, `linux/arm64/v8`) Docker images for [R][r_cuda], [Python][python_cuda] , [MAX][max_cuda] and [Julia][julia_cuda]. Derived from nvidia/cuda `devel`-flavored images. With [code-server][code-server] next to JupyterLab. Just Python – no [Conda][conda] / [Mamba][mamba]. | [gpu]: https://github.com/iot-salzburg/gpu-jupyter [gpu_thl]: https://hub.docker.com/r/hanseware/jlab-images [gpu_mylab]: https://mylab.th-luebeck.de [prp_gpu]: https://gitlab.nrp-nautilus.io/prp/jupyter-stack/-/tree/prp [prp_reg]: https://gitlab.nrp-nautilus.io/prp/jupyter-stack/container_registry [b-data]: https://github.com/b-data [r_cuda]: https://github.com/b-data/jupyterlab-r-docker-stack/blob/main/CUDA.md [python_cuda]: https://github.com/b-data/jupyterlab-python-docker-stack/blob/main/CUDA.md [max_cuda]: https://github.com/b-data/jupyterlab-mojo-docker-stack/blob/main/CUDA.md [julia_cuda]: https://github.com/b-data/jupyterlab-julia-docker-stack/blob/main/CUDA.md [code-server]: https://github.com/coder/code-server [conda]: https://github.com/conda/conda [mamba]: https://github.com/mamba-org/mamba ================================================ FILE: docs/using/specifics.md ================================================ # Image Specifics This page provides details about features specific to one or more images. ## Apache Spark™ ### Specific Docker Image Options - `-p 4040:4040` - The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images open [SparkUI (Spark Monitoring and Instrumentation UI)](https://spark.apache.org/docs/latest/monitoring.html) at default port `4040`, this option maps the `4040` port inside the docker container to the `4040` port on the host machine. ```{note} Every new spark context that is created is put onto an incrementing port (i.e. 4040, 4041, 4042, etc.), and it might be necessary to open multiple ports. ``` For example, `docker run --detach -p 8888:8888 -p 4040:4040 -p 4041:4041 quay.io/jupyter/pyspark-notebook`. #### IPython low-level output capture and forward Spark images (`pyspark-notebook` and `all-spark-notebook`) have been configured to disable IPython low-level output capture and forward system-wide. The rationale behind this choice is that Spark logs can be verbose, especially at startup when Ivy is used to load additional jars. Those logs are still available but only in the container's logs. If you want to make them appear in the notebook, you can overwrite the configuration in a user-level IPython kernel profile. To do that, you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel. ```python c.IPKernelApp.capture_fd_output = True ``` If you have no IPython profile, you can initiate a fresh one by running the following command. ```bash ipython profile create # [ProfileCreate] Generating default config file: '/home/jovyan/.ipython/profile_default/ipython_config.py' # [ProfileCreate] Generating default config file: '/home/jovyan/.ipython/profile_default/ipython_kernel_config.py' ``` ### Build an Image with a Different Version of Spark You can build a `pyspark-notebook` image with a different `Spark` version by overriding the default value of the following arguments at build time. `all-spark-notebook` is inherited from `pyspark-notebook`, so you have to first build `pyspark-notebook` and then `all-spark-notebook` to get the same version in `all-spark-notebook`. - Spark distribution is defined by the combination of Spark, Hadoop, and Scala versions, see [Download Apache Spark](https://spark.apache.org/downloads.html) and the [archive repo](https://archive.apache.org/dist/spark/) for more information. - `openjdk_version`: The version of the OpenJDK (JRE headless) distribution (`17` by default). - This version needs to match the version supported by the Spark distribution used above. - See [Spark Overview](https://spark.apache.org/docs/latest/#downloading) and [Ubuntu packages](https://packages.ubuntu.com/search?keywords=openjdk). - `spark_version` (optional): The Spark version to install, for example `3.5.0`. If not specified (this is the default), latest Spark will be installed. Note: to support Python 3.12, we currently install Spark v4 preview versions: . - `hadoop_version`: The Hadoop version (`3` by default). Note, that _Spark < 3.3_ require to specify `major.minor` Hadoop version (i.e. `3.2`). - `scala_version` (optional): The Scala version, for example `2.13` (not specified by default). Starting with _Spark >= 3.2_, the distribution file might contain the Scala version. - `spark_download_url`: URL to use for Spark downloads. You may need to use url if you want to download old Spark versions. For example, here is how to build a `pyspark-notebook` image with Spark `3.2.0`, Hadoop `3.2`, and OpenJDK `11`. ```{warning} This recipe is not tested and might be broken. ``` ```bash # From the root of the project # Build the image with different arguments docker build --rm --force-rm \ -t my-pyspark-notebook ./images/pyspark-notebook \ --build-arg openjdk_version=11 \ --build-arg spark_version=3.2.0 \ --build-arg hadoop_version=3.2 \ --build-arg spark_download_url="https://archive.apache.org/dist/spark/" # Check the newly built image docker run -it --rm my-pyspark-notebook pyspark --version # Welcome to # ____ __ # / __/__ ___ _____/ /__ # _\ \/ _ \/ _ `/ __/ '_/ # /___/ .__/\_,_/_/ /_/\_\ version 3.2.0 # /_/ # Using Scala version 2.12.15, OpenJDK 64-Bit Server VM, 11.0.21 # Branch HEAD # Compiled by user ubuntu on 2021-10-06T12:46:30Z # Revision 5d45a415f3a29898d92380380cfd82bfc7f579ea # Url https://github.com/apache/spark # Type --help for more information. ``` ### Usage Examples The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images support the use of [Apache Spark](https://spark.apache.org/) in Python and R notebooks. The following sections provide some examples of how to get started using them. #### Using Spark Local Mode Spark **local mode** is useful for experimentation on small data when you do not have a Spark cluster available. ```{warning} In these examples, Spark spawns all the main execution components in the same single JVM. You can read additional info about local mode [here](https://books.japila.pl/apache-spark-internals/local/). If you want to use all the CPU, one of the simplest ways is to set up a [Spark Standalone Cluster](https://spark.apache.org/docs/latest/spark-standalone.html). ``` ##### Local Mode in Python In a Python notebook. ```python from pyspark.sql import SparkSession # Spark session & context spark = SparkSession.builder.master("local").getOrCreate() sc = spark.sparkContext # Sum of the first 100 whole numbers rdd = sc.parallelize(range(100 + 1)) rdd.sum() # 5050 ``` ##### Local Mode in R In an R notebook with [SparkR][sparkr]. ```R library(SparkR) # Spark session & context sc <- sparkR.session("local") # Sum of the first 100 whole numbers sdf <- createDataFrame(list(1:100)) dapplyCollect(sdf, function(x) { x <- sum(x)} ) # 5050 ``` In an R notebook with [sparklyr][sparklyr]. ```R library(sparklyr) # Spark configuration conf <- spark_config() # Set the catalog implementation in-memory conf$spark.sql.catalogImplementation <- "in-memory" # Spark session & context sc <- spark_connect(master = "local", config = conf) # Sum of the first 100 whole numbers sdf_len(sc, 100, repartition = 1) %>% spark_apply(function(e) sum(e)) # 5050 ``` #### Connecting to a Spark Cluster in Standalone Mode Connection to Spark Cluster on **[Standalone Mode](https://spark.apache.org/docs/latest/spark-standalone.html)** requires the following set of steps: 0. Verify that the docker image (check the Dockerfile) and the Spark Cluster, which is being deployed, run the same version of Spark. 1. [Deploy Spark in Standalone Mode](https://spark.apache.org/docs/latest/spark-standalone.html). 2. Run the Docker container with `--net=host` in a location that is network-addressable by all of your Spark workers. (This is a [Spark networking requirement](https://spark.apache.org/docs/latest/cluster-overview.html#components).) ```{note} When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See for details._ ``` **Note**: In the following examples, we are using the Spark master URL `spark://master:7077` which shall be replaced by the URL of the Spark master. ##### Standalone Mode in Python The **same Python version** needs to be used on the notebook (where the driver is located) and on the Spark workers. The Python version used on the driver and worker side can be adjusted by setting the environment variables `PYSPARK_PYTHON` and/or `PYSPARK_DRIVER_PYTHON`, see [Spark Configuration][spark-conf] for more information. ```python from pyspark.sql import SparkSession # Spark session & context spark = SparkSession.builder.master("spark://master:7077").getOrCreate() sc = spark.sparkContext # Sum of the first 100 whole numbers rdd = sc.parallelize(range(100 + 1)) rdd.sum() # 5050 ``` ##### Standalone Mode in R In an R notebook with [SparkR][sparkr]. ```R library(SparkR) # Spark session & context sc <- sparkR.session("spark://master:7077") # Sum of the first 100 whole numbers sdf <- createDataFrame(list(1:100)) dapplyCollect(sdf, function(x) { x <- sum(x)} ) # 5050 ``` In an R notebook with [sparklyr][sparklyr]. ```R library(sparklyr) # Spark session & context # Spark configuration conf <- spark_config() # Set the catalog implementation in-memory conf$spark.sql.catalogImplementation <- "in-memory" sc <- spark_connect(master = "spark://master:7077", config = conf) # Sum of the first 100 whole numbers sdf_len(sc, 100, repartition = 1) %>% spark_apply(function(e) sum(e)) # 5050 ``` ### Define Spark Dependencies ```{note} This example is given for [Elasticsearch](https://www.elastic.co/docs/reference/elasticsearch-hadoop/installation). ``` Spark dependencies can be declared thanks to the `spark.jars.packages` property (see [Spark Configuration](https://spark.apache.org/docs/latest/configuration.html#runtime-environment) for more information). They can be defined as a comma-separated list of Maven coordinates at the creation of the Spark session. ```python from pyspark.sql import SparkSession spark = ( SparkSession.builder.appName("elasticsearch") .config( "spark.jars.packages", "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0" ) .getOrCreate() ) ``` Dependencies can also be defined in the `spark-defaults.conf`. However, it has to be done by `root`, so it should only be considered to build custom images. ```dockerfile USER root RUN echo "spark.jars.packages org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0" >> "${SPARK_HOME}/conf/spark-defaults.conf" USER ${NB_UID} ``` Jars will be downloaded dynamically at the creation of the Spark session and stored by default in `${HOME}/.ivy2/jars` (can be changed by setting `spark.jars.ivy`). ## Tensorflow The `jupyter/tensorflow-notebook` image supports the use of [Tensorflow](https://www.tensorflow.org/) in a single machine or distributed mode. ### Single Machine Mode ```python import tensorflow as tf hello = tf.Variable("Hello World!") sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) sess.run(hello) ``` ### Distributed Mode ```python import tensorflow as tf hello = tf.Variable("Hello Distributed World!") server = tf.train.Server.create_local_server() sess = tf.Session(server.target) init = tf.global_variables_initializer() sess.run(init) sess.run(hello) ``` [sparkr]: https://spark.apache.org/docs/latest/sparkr.html [sparklyr]: https://spark.posit.co [spark-conf]: https://spark.apache.org/docs/latest/configuration.html ================================================ FILE: docs/using/troubleshooting.md ================================================ # Troubleshooting Common Problems When troubleshooting, you may see unexpected behaviors or receive an error message. This section provides advice on how to identify and fix some of the most commonly encountered issues. Most of the `docker run` flags used in this document are explained in detail in the [Common Features, Docker Options section](common.md#docker-options) of the documentation. ## Permission denied when mounting volumes If you are running a Docker container while mounting a local volume or host directory using the `-v` flag like so: ```bash docker run -it --rm \ -p 8888:8888 \ -v : \ quay.io/jupyter/minimal-notebook:latest ``` you might face permissions issues when trying to access the mounted volume: ```bash # assuming we mounted the volume in /home/jovyan/stagingarea # root is the owner of the mounted volume ls -ld ~/stagingarea/ # drwxr-xr-x 2 root root 4096 Feb 1 12:55 stagingarea/ touch stagingarea/kale.txt # touch: cannot touch 'stagingarea/kale.txt': Permission denied ``` In this case, the user of the container (`jovyan`) and the owner of the mounted volume (`root`) have different permission levels and ownership over the container's directories and mounts. The following sections cover a few of these scenarios and how to fix them. **Some things to try:** 1. **Change ownership of the volume mount** You can change the ownership of the volume mount using the `chown` command. In the case of the docker-stacks images, you can set the `CHOWN_EXTRA` and `CHOWN_EXTRA_OPTS` environment variables. For example, to change the ownership of the volume mount to the `jovyan` user (non-privileged default user in the Docker images): ```bash # running in detached mode - can also be run in interactive mode docker run --detach \ -v : \ -p 8888:8888 \ --user root \ -e CHOWN_EXTRA="" \ -e CHOWN_EXTRA_OPTS="-R" \ quay.io/jupyter/minimal-notebook ``` where: - `CHOWN_EXTRA=,`: will change the ownership and group of the specified container directory (non-recursive by default). You need to provide full paths starting with `/`. - `CHOWN_EXTRA_OPTS="-R"`: will recursively change the ownership and group of the directory specified in `CHOWN_EXTRA`. - `--user root`: you **must** run the container with the root user to change ownership at runtime. Now accessing the mount should work as expected: ```bash # assuming we mounted the volume in /home/jovyan/stagingarea ls -ld ~/stagingarea # drwxr-xr-x 2 jovyan users 4096 Feb 1 12:55 stagingarea/ touch stagingarea/kale.txt # jovyan is now the owner of /home/jovyan/stagingarea # ls -la ~/stagingarea/ # -rw-r--r-- 1 jovyan users 0 Feb 1 14:41 kale.txt ``` ```{admonition} Additional notes - If you are mounting your volume inside the `/home/` directory, you can use the `-e CHOWN_HOME=yes` and `CHOWN_HOME_OPTS="-R"` flags instead of the `-e CHOWN_EXTRA` and `-e CHOWN_EXTRA_OPTS` in the example above. - This solution should work in most cases where you have created a docker volume (i.e. using the [`docker volume create --name ` command](https://docs.docker.com/engine/storage/volumes/#create-and-manage-volumes)) and mounted it using the `-v` flag in `docker run`. ``` 2. **Matching the container's UID/GID with the host's** Docker handles mounting host directories differently from mounting volumes, even though the syntax is essentially the same (i.e. `-v`). When you initialize a Docker container using the `-v`flag, the host directories are bind-mounted directly into the container. Therefore, the permissions and ownership are copied over and will be **the same** as the ones in your local host (including user ids) which may result in permissions errors when trying to access directories or create/modify files inside. Suppose your local user has a `UID` and `GID` of `1234` and `5678`, respectively. To fix the UID discrepancies between your local directories and the container's directories, you can run the container with an explicit `NB_UID` and `NB_GID` to match that of the local user: ```bash docker run -it --rm \ --user root \ -p 8888:8888 \ -e NB_UID=1234 \ -e NB_GID=5678 \ -v "${PWD}"/test:/home/jovyan/work \ quay.io/jupyter/minimal-notebook:latest # you should see an output similar to this # Update jovyan's UID:GID to 1234:5678 # Running as jovyan: bash ``` where: - `NB_UID` and `NB_GID` should match the local user's UID and GID. - You **must** use `--user root` to ensure that the `UID` and `GID` are updated at runtime. ````{admonition} Additional notes - The caveat with this approach is that since these changes are applied at runtime, you will need to re-run the same command with the appropriate flags and environment variables if you need to recreate the container (i.e. after removing/destroying it). - If you pass a numeric UID, it **must** be in the range of 0-2147483647 - This approach only updates the UID and GID of the **existing `jovyan` user** instead of creating a new user. From the above example: ```bash id # uid=1234(jovyan) gid=5678(jovyan) groups=5678(jovyan),100(users) ``` ```` ## Permission issues after changing the UID/GID and USER in the container If you have also **created a new user**, you might be experiencing any of the following issues: - the `root` user is the owner of `/home` or a mounted volume - when starting the container, you get an error such as `Failed to change ownership of the home directory.` - getting permission denied when trying to `conda install` packages **Some things to try:** 1. **Ensure the new user has ownership of `/home` and volume mounts** For example, say you want to create a user `callisto` with a `GID` and `UID` of `1234`. You will have to add the following flags to the docker run command: ```bash docker run -it --rm \ -p 8888:8888 \ --user root \ -e NB_USER=callisto \ -e NB_UID=1234 \ -e NB_GID=1234 \ -e CHOWN_HOME=yes \ -e CHOWN_HOME_OPTS="-R" \ -w "/home/callisto" \ -v "${PWD}"/test:/home/callisto/work \ quay.io/jupyter/minimal-notebook # Updated the jovyan user: # - username: jovyan -> callisto # - home dir: /home/jovyan -> /home/callisto # Update callisto UID:GID to 1234:1234 # Attempting to copy /home/jovyan to /home/callisto... # Success! # Ensuring /home/callisto is owned by 1234:1234 # Running as callisto: bash ``` where: - `-e NB_USER=callisto`: will create a new user `callisto` and automatically add it to the `users` group (does not delete jovyan) - `-e NB_UID=1234` and `-e NB_GID=1234`: will set the `UID` and `GID` of the new user (`callisto`) to `1234` - `-e CHOWN_HOME_OPTS="-R"` and `-e CHOWN_HOME=yes`: ensure that the new user is the owner of the `/home` directory and subdirectories (setting `CHOWN_HOME_OPTS="-R` will ensure this change is applied recursively) - `-w "/home/callisto"` sets the working directory to be the new user's home ```{admonition} Additional notes In the example above, the `-v` flag is used to mount the local volume onto the new user's `/home` directory. However, if you are mounting a volume elsewhere, you also need to use the `-e CHOWN_EXTRA=` flag to avoid any permission issues (see the section [Permission denied when mounting volumes](#permission-denied-when-mounting-volumes) on this page). ``` 2. **Dynamically assign the user ID and GID** The above case ensures that the `/home` directory is owned by a newly created user with a specific `UID` and `GID`, but if you want to assign the `UID` and `GID` of the new user dynamically, you can make the following adjustments: ```bash docker run -it --rm \ -p 8888:8888 \ --user root \ -e NB_USER=callisto \ -e NB_UID="$(id -u)" \ -e NB_GID="$(id -g)" \ -e CHOWN_HOME=yes \ -e CHOWN_HOME_OPTS="-R" \ -w "/home/callisto" \ -v "${PWD}"/test:/home/callisto/work \ quay.io/jupyter/minimal-notebook ``` where: - `"$(id -u)"` and `"$(id -g)"` will dynamically assign the `UID` and `GID` of the user executing the `docker run` command to the new user (`callisto`) ## Additional tips and troubleshooting commands for permission-related errors - Pass absolute paths to the `-v` flag: ```bash -v "${PWD}"/:/home/jovyan/work ``` This example uses the syntax `${PWD}`, which is replaced with the full path to the current directory at runtime. The destination path should also be an absolute path starting with a `/` such as `/home/jovyan/work`. - You might want to consider using the Docker native `--user ` and `--group-add users` flags instead of `-e NB_UID` and `-e NB_GID`: ```bash # note this will use the same UID from # the user calling the command, thus matching the local host docker run -it --rm \ -p 8888:8888 \ --user "$(id -u)" --group-add users \ -v :/home/jovyan/work quay.io/jupyter/datascience-notebook ``` This command will launch the container with a specific user UID and add that user to the `users` group to modify the files in the default `/home` and `/opt/conda` directories. Further avoiding issues when trying to `conda install` additional packages. - Use `docker inspect ` and look for the [`Mounts` section](https://docs.docker.com/engine/storage/volumes/#start-a-container-with-a-volume) to verify that the volume was created and mounted accordingly: ```json { "Mounts": [ { "Type": "volume", "Name": "my-vol", "Source": "/var/lib/docker/volumes/stagingarea/_data", "Destination": "/home/jovyan/stagingarea", "Driver": "local", "Mode": "z", "RW": true, "Propagation": "" } ] } ``` ## Problems installing conda packages from specific channels By default, the docker-stacks images have the conda channels priority set to `strict`. This may cause problems when trying to install packages from a channel with lower priority. ```bash conda config --show | grep priority # channel_priority: strict # to see its meaning conda config --describe channel_priority # checking the current channels conda config --show default_channels # default_channels: # - https://repo.anaconda.com/pkgs/main # - https://repo.anaconda.com/pkgs/r ``` **Installing packages from alternative channels:** You can install packages from other conda channels (e.g. `bioconda`) by disabling the `channel_priority` setting: ```bash # install by disabling channel priority at the command level conda install --no-channel-priority -c bioconda bioconductor-geoquery ``` Additional details are provided in the [Using Alternative Channels](../using/common.md#using-alternative-channels) section of the [Common Features](common.md) page. ## Tokens are being rejected If you are a regular user of VSCode and the Jupyter extension, you might experience either of these issues when using any of the docker-stacks images: - when clicking on the URL displayed on your command line logs, you face a "This site cannot be reached" page on your web browser - using the produced token and/or URL results in an "Invalid credentials" error on the Jupyter "Token authentication is enabled" page ```bash # example log output from the docker run command # [...] # Or copy and paste one of these URLs: # http://3d4cf3809e3f:8888/?token=996426e890f8dc22fa6835a44442b6026cba02ee61fee6a2 # or http://127.0.0.1:8888/?token=996426e890f8dc22fa6835a44442b6026cba02ee61fee6a2 ``` **Some things to try:** 1. **Find lingering Jupyter processes in the background** The first thing you want to try is to check that no other Jupyter processes are running in the background: ```bash ps aux | grep jupyter ``` If there are existing processes running, you can kill them with: ```bash # example output from the above command # my-user 3412 ... --daemon-module=vscode_datascience_helpers.jupyter_daemon # using the pid from the above log kill 3412 ``` 2. **Turn off Jupyter auto-start in VSCode** Alternatively - you might want to ensure that the `Jupyter: Disable Jupyter Auto Start` setting is turned on to avoid this issue in the future. You can achieve this from the `Settings > Jupyter` menu in VScode: ![VSCode Settings UI - Jupyter: Disable Jupyter Auto Start checkbox checked](../_static/using/troubleshooting/vscode-jupyter-settings.png) 3. **Route container to unused local port** Instead of mapping Docker port `8888` to local port `8888`, map to another unused local port. You can see an example of mapping to local port `8001`: ```bash docker run -it --rm -p 8001:8888 quay.io/jupyter/datascience-notebook ``` When the terminal provides the link to access Jupyter: , change the default port value of `8888` in the URL to the port value mapped with the `docker run` command. In this example, we use 8001, so the edited link would be: . Note: Port mapping for Jupyter has other applications outside of Docker. For example, it can be used to allow multiple Jupyter instances when using SSH to control cloud devices. ================================================ FILE: examples/README.md ================================================ # Examples These examples are not tested and might not work. Please, send PRs if you start using these examples and see some issues. ================================================ FILE: examples/docker-compose/README.md ================================================ # Docker Compose example This example demonstrate how to deploy docker-stack notebook containers to any Docker Machine-controlled host using Docker Compose. ## Prerequisites - [Docker Engine](https://docs.docker.com/engine/) 1.10.0+ - [Docker Machine](https://docs.docker.com/machine/) 0.6.0+ - [Docker Compose](https://docs.docker.com/compose/) 1.6.0+ See the [installation instructions](https://docs.docker.com/engine/installation/) for your environment. ## Quickstart Build and run a `jupyter/minimal-notebook` image on a VirtualBox VM on local desktop. ```bash # create a Docker Machine-controlled VirtualBox VM bin/vbox.sh mymachine # activate the docker machine eval "$(docker-machine env mymachine)" # build the notebook image on the machine notebook/build.sh # bring up the notebook container notebook/up.sh ``` To stop and remove the container: ```bash notebook/down.sh ``` ## FAQ ### How do I specify which docker-stack notebook image to deploy? You can customize the docker-stack notebook image to deploy by modifying the `notebook/Dockerfile`. For example, you can build and deploy a `jupyter/all-spark-notebook` by modifying the Dockerfile like so: ```dockerfile FROM quay.io/jupyter/all-spark-notebook # Your RUN commands and so on ``` Once you modify the Dockerfile, don't forget to rebuild the image. ```bash # activate the docker machine eval "$(docker-machine env mymachine)" notebook/build.sh ``` ### Can I run multiple notebook containers on the same VM? Yes. Set environment variables to specify unique names and ports when running the `up.sh` command. ```bash NAME=my-notebook PORT=9000 notebook/up.sh NAME=your-notebook PORT=9001 notebook/up.sh ``` To stop and remove the containers: ```bash NAME=my-notebook notebook/down.sh NAME=your-notebook notebook/down.sh ``` ### Where are my notebooks stored? The `up.sh` creates a Docker volume named after the notebook container with a `-work` suffix, e.g., `my-notebook-work`. ### Can multiple notebook containers share the same notebook volume? Yes. Set the `WORK_VOLUME` environment variable to the same value for each notebook. ```bash NAME=my-notebook PORT=9000 WORK_VOLUME=our-work notebook/up.sh NAME=your-notebook PORT=9001 WORK_VOLUME=our-work notebook/up.sh ``` ### How do I run over HTTPS? To run the Jupyter Server with a self-signed certificate, pass the `--secure` option to the `up.sh` script. You must also provide a password, which will be used to secure the Jupyter Server. You can specify the password by setting the `PASSWORD` environment variable, or by passing it to the `up.sh` script. ```bash PASSWORD=a_secret notebook/up.sh --secure # or notebook/up.sh --secure --password a_secret ``` ### Can I use Let's Encrypt certificate chains? Sure. If you want to secure access to publicly addressable notebook containers, you can generate a free certificate using the [Let's Encrypt](https://letsencrypt.org) service. This example includes the `bin/letsencrypt.sh` script, which runs the `letsencrypt` client to create a full-chain certificate and private key, and stores them in a Docker volume. ```{note} The script hard codes several `letsencrypt` options, one of which automatically agrees to the Let's Encrypt Terms of Service. ``` The following command will create a certificate chain and store it in a Docker volume named `mydomain-secrets`. ```bash FQDN=host.mydomain.com EMAIL=myemail@somewhere.com \ SECRETS_VOLUME=mydomain-secrets \ bin/letsencrypt.sh ``` Now run `up.sh` with the `--letsencrypt` option. You must also provide the name of the secrets volume and a password. ```bash PASSWORD=a_secret SECRETS_VOLUME=mydomain-secrets notebook/up.sh --letsencrypt # or notebook/up.sh --letsencrypt --password a_secret --secrets mydomain-secrets ``` Be aware that Let's Encrypt has a pretty [low rate limit per domain](https://community.letsencrypt.org/t/public-beta-rate-limits/4772/3) at the moment. You can avoid exhausting your limit by testing against the Let's Encrypt staging servers. To hit their staging servers, set the environment variable `CERT_SERVER=--staging`. ```bash FQDN=host.mydomain.com EMAIL=myemail@somewhere.com \ CERT_SERVER=--staging \ bin/letsencrypt.sh ``` Also, be aware that Let's Encrypt certificates are short-lived (90 days). If you need them for a longer period of time, you'll need to manually set up a cron job to run the renewal steps. (You can reuse the command above.) ### Can I deploy to any Docker Machine host? Yes, you should be able to deploy to any Docker Machine-controlled host. To make it easier to get up and running, this example includes scripts to provision Docker Machines to VirtualBox and IBM SoftLayer, but more scripts are welcome! To create a Docker machine using a VirtualBox VM on local desktop: ```bash bin/vbox.sh mymachine ``` To create a Docker machine using a virtual device on IBM SoftLayer: ```bash export SOFTLAYER_USER=my_softlayer_username export SOFTLAYER_API_KEY=my_softlayer_api_key export SOFTLAYER_DOMAIN=my.domain # Create virtual device bin/softlayer.sh myhost # Add DNS entry (SoftLayer DNS zone must exist for SOFTLAYER_DOMAIN) bin/sl-dns.sh myhost ``` ## Troubleshooting ### Unable to connect to VirtualBox VM on Mac OS X when using Cisco VPN client The Cisco VPN client blocks access to IP addresses that it does not know about, and may block access to a new VM if it is created while the Cisco VPN client is running. 1. Stop Cisco VPN client. (It does not allow modifications to route table). 2. Run `ifconfig` to list `vboxnet` virtual network devices. 3. Run `sudo route -nv add -net 192.168.99 -interface vboxnetX`, where X is the number of the virtual device assigned to the VirtualBox VM. 4. Start Cisco VPN client. ================================================ FILE: examples/docker-compose/bin/letsencrypt.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Use https://letsencrypt.org to create a certificate for a single domain # and store it in a Docker volume. set -e # Get domain and email from environment [ -z "${FQDN}" ] && \ echo "ERROR: Must set FQDN environment variable" && \ exit 1 [ -z "${EMAIL}" ] && \ echo "ERROR: Must set EMAIL environment variable" && \ exit 1 # letsencrypt certificate server type (default is production). # Set `CERT_SERVER=--staging` for staging. : "${CERT_SERVER=''}" # Create Docker volume to contain the cert : "${SECRETS_VOLUME:=my-notebook-secrets}" docker volume create --name "${SECRETS_VOLUME}" 1>/dev/null # Generate the cert and save it to the Docker volume docker run -it --rm \ -p 80:80 \ -v "${SECRETS_VOLUME}":/etc/letsencrypt \ quay.io/letsencrypt/letsencrypt:latest \ certonly \ --non-interactive \ --keep-until-expiring \ --standalone \ --standalone-supported-challenges http-01 \ --agree-tos \ --domain "${FQDN}" \ --email "${EMAIL}" \ "${CERT_SERVER}" # Set permissions so nobody can read the cert and key. # Also symlink the certs into the root of the /etc/letsencrypt # directory so that the FQDN doesn't have to be known later. docker run -it --rm \ -v "${SECRETS_VOLUME}":/etc/letsencrypt \ ubuntu \ bash -c "ln -s /etc/letsencrypt/live/${FQDN}/* /etc/letsencrypt/ && \ find /etc/letsencrypt -type d -exec chmod 755 {} +" ================================================ FILE: examples/docker-compose/bin/sl-dns.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. set -e # User must have slcli installed which slcli > /dev/null || (echo "SoftLayer cli not found (pip install softlayer)"; exit 1) USAGE="Usage: $(basename "${0}") machine_name [domain]" E_BADARGS=85 # Machine name is first command line arg MACHINE_NAME="${1}" && [ -z "${MACHINE_NAME}" ] && echo "${USAGE}" && exit ${E_BADARGS} # Use SOFTLAYER_DOMAIN env var if domain name not set as second arg DOMAIN="${2:-$SOFTLAYER_DOMAIN}" && [ -z "${DOMAIN}" ] && \ echo "Must specify domain or set SOFTLAYER_DOMAIN environment variable" && \ echo "${USAGE}" && exit ${E_BADARGS} IP=$(docker-machine ip "${MACHINE_NAME}") slcli dns record-add "${DOMAIN}" "${MACHINE_NAME}" A "${IP}" ================================================ FILE: examples/docker-compose/bin/softlayer.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Set default SoftLayer VM settings : "${SOFTLAYER_CPU:=4}" export SOFTLAYER_CPU : "${SOFTLAYER_DISK_SIZE:=100}" export SOFTLAYER_DISK_SIZE : "${SOFTLAYER_MEMORY:=4096}" export SOFTLAYER_MEMORY : "${SOFTLAYER_REGION:=wdc01}" export SOFTLAYER_REGION docker-machine create --driver softlayer "$@" ================================================ FILE: examples/docker-compose/bin/vbox.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Set reasonable default VM settings : "${VIRTUALBOX_CPUS:=4}" export VIRTUALBOX_CPUS : "${VIRTUALBOX_MEMORY_SIZE:=4096}" export VIRTUALBOX_MEMORY_SIZE docker-machine create --driver virtualbox "$@" ================================================ FILE: examples/docker-compose/notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Pick your favorite docker-stacks image FROM quay.io/jupyter/minimal-notebook USER root # Add permanent apt-get installs and other root commands here # e.g., RUN apt-get install --yes --no-install-recommends npm nodejs USER ${NB_UID} # Switch back to jovyan to avoid accidental container runs as root # Add permanent mamba/pip/conda installs, data files, other user libs here # e.g., RUN pip install --no-cache-dir flake8 ================================================ FILE: examples/docker-compose/notebook/build.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Setup environment # shellcheck source=examples/docker-compose/notebook/env.sh source "${DIR}/env.sh" # Build the notebook image docker-compose -f "${DIR}/notebook.yml" build ================================================ FILE: examples/docker-compose/notebook/down.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # Setup environment # shellcheck source=examples/docker-compose/notebook/env.sh source "${DIR}/env.sh" # Bring down the notebook container, using container name as project name docker-compose -f "${DIR}/notebook.yml" -p "${NAME}" down ================================================ FILE: examples/docker-compose/notebook/env.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Set default values for environment variables required by notebook compose # configuration file. # Container name : "${NAME:=my-notebook}" export NAME # Exposed container port : "${PORT:=80}" export PORT # Container work volume name : "${WORK_VOLUME:=${NAME}-work}" export WORK_VOLUME # Container secrets volume name : "${SECRETS_VOLUME:=${NAME}-secrets}" export SECRETS_VOLUME ================================================ FILE: examples/docker-compose/notebook/letsencrypt-notebook.yml ================================================ --- # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. version: "2" services: notebook: build: . image: my-notebook container_name: ${NAME} volumes: - "work:/home/jovyan/work" - "secrets:/etc/letsencrypt" ports: - "${PORT}:8888" environment: USE_HTTPS: "yes" PASSWORD: ${PASSWORD} command: > start-notebook.py --ServerApp.certfile=/etc/letsencrypt/fullchain.pem --ServerApp.keyfile=/etc/letsencrypt/privkey.pem volumes: work: external: name: ${WORK_VOLUME} secrets: external: name: ${SECRETS_VOLUME} ================================================ FILE: examples/docker-compose/notebook/notebook.yml ================================================ --- # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. version: "2" services: notebook: build: . image: my-notebook container_name: ${NAME} volumes: - "work:/home/jovyan/work" ports: - "${PORT}:8888" volumes: work: external: name: ${WORK_VOLUME} ================================================ FILE: examples/docker-compose/notebook/secure-notebook.yml ================================================ --- # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. version: "2" services: notebook: build: . image: my-notebook container_name: ${NAME} volumes: - "work:/home/jovyan/work" ports: - "${PORT}:8888" environment: USE_HTTPS: "yes" PASSWORD: ${PASSWORD} volumes: work: external: name: ${WORK_VOLUME} ================================================ FILE: examples/docker-compose/notebook/up.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. set -e DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" USAGE="Usage: $(basename "${0}") [--secure | --letsencrypt] [--password PASSWORD] [--secrets SECRETS_VOLUME]" # Parse args to determine security settings SECURE=${SECURE:=no} LETSENCRYPT=${LETSENCRYPT:=no} while [[ $# -gt 0 ]]; do key="${1}" case "${key}" in --secure) SECURE=yes ;; --letsencrypt) LETSENCRYPT=yes ;; --secrets) SECRETS_VOLUME="${2}" shift # past argument ;; --password) PASSWORD="${2}" export PASSWORD shift # past argument ;; *) # unknown option ;; esac shift # past argument or value done if [[ "${LETSENCRYPT}" == yes || "${SECURE}" == yes ]]; then if [ -z "${PASSWORD:+x}" ]; then echo "ERROR: Must set PASSWORD if running in secure mode" echo "${USAGE}" exit 1 fi if [ "${LETSENCRYPT}" == yes ]; then CONFIG=letsencrypt-notebook.yml if [ -z "${SECRETS_VOLUME:+x}" ]; then echo "ERROR: Must set SECRETS_VOLUME if running in letsencrypt mode" echo "${USAGE}" exit 1 fi else CONFIG=secure-notebook.yml fi export PORT=${PORT:=443} else CONFIG=notebook.yml export PORT=${PORT:=80} fi # Setup environment # shellcheck source=examples/docker-compose/notebook/env.sh source "${DIR}/env.sh" # Create a Docker volume to store notebooks docker volume create --name "${WORK_VOLUME}" # Bring up a notebook container, using container name as project name echo "Bringing up notebook '${NAME}'" docker-compose -f "${DIR}/${CONFIG}" -p "${NAME}" up -d IP=$(docker-machine ip "$(docker-machine active)") echo "Notebook ${NAME} listening on ${IP}:${PORT}" ================================================ FILE: examples/make-deploy/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Pick your favorite docker-stacks image FROM quay.io/jupyter/minimal-notebook USER root # Add permanent apt-get installs and other root commands here # e.g., RUN apt-get install --yes --no-install-recommends npm nodejs USER ${NB_UID} # Switch back to jovyan to avoid accidental container runs as root # Add permanent mamba/pip/conda installs, data files, other user libs here # e.g., RUN pip install --no-cache-dir flake8 ================================================ FILE: examples/make-deploy/Makefile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. .PHONY: help check image notebook IMAGE:=my-notebook # Common, extensible docker run command define RUN_NOTEBOOK @docker volume create --name $(WORK_VOLUME) > /dev/null -@docker rm --force $(NAME) 2> /dev/null @docker run --detach -p $(PORT):8888 \ --name $(NAME) \ -v $(WORK_VOLUME):/home/jovyan/work \ $(DOCKER_ARGS) \ $(IMAGE) bash -c "$(PRE_CMD) chown jovyan /home/jovyan/work && start-notebook.py $(ARGS)" > /dev/null @echo "DONE: Notebook '$(NAME)' listening on $$(docker-machine ip $$(docker-machine active)):$(PORT)" endef help: @cat README.md check: @which docker-machine > /dev/null || (echo "ERROR: docker-machine not found (brew install docker-machine)"; exit 1) @which docker > /dev/null || (echo "ERROR: docker not found (brew install docker)"; exit 1) @docker | grep volume > /dev/null || (echo "ERROR: docker 1.9.0+ required"; exit 1) image: DOCKER_ARGS?= image: @docker build --rm $(DOCKER_ARGS) --tag $(IMAGE) . notebook: PORT?=80 notebook: NAME?=notebook notebook: WORK_VOLUME?=$(NAME)-data notebook: check $(RUN_NOTEBOOK) # docker-machine drivers include virtualbox.makefile include softlayer.makefile # Preset notebook configurations include self-signed.makefile include letsencrypt.makefile ================================================ FILE: examples/make-deploy/README.md ================================================ # Make deploy example This folder contains a Makefile and a set of supporting files demonstrating how to run a docker-stack notebook container on a docker-machine controlled host. ## Prerequisites - make 3.81+ - Ubuntu users: Be aware of [make 3.81 defect 483086](https://bugs.launchpad.net/ubuntu/+source/make-dfsg/+bug/483086) which exists in 14.04 LTS but is fixed in 15.04+ - docker-machine 0.5.0+ - docker 1.9.0+ ## Quickstart To show what's possible, here's how to run the `jupyter/minimal-notebook` on a brand-new local virtualbox. ```bash # create a new VM make virtualbox-vm NAME=dev # make the new VM the active docker machine eval $(docker-machine env dev) # pull a docker stack and build a local image from it make image # start a Server in a container make notebook ``` The last command will log the IP address and port to visit in your browser. ## FAQ ### Can I run multiple notebook containers on the same VM? Yes. Specify a unique name and port on the `make notebook` command. ```bash make notebook NAME=my-notebook PORT=9000 make notebook NAME=your-notebook PORT=9001 ``` ### Can multiple notebook containers share their notebook directory? Yes. ```bash make notebook NAME=my-notebook PORT=9000 WORK_VOLUME=our-work make notebook NAME=your-notebook PORT=9001 WORK_VOLUME=our-work ``` ### How do I run over HTTPS? Instead of `make notebook`, run `make self-signed-notebook PASSWORD=your_desired_password`. This target gives you a notebook with a self-signed certificate. ### That self-signed certificate is a pain. Let's Encrypt? Yes. Please. ```bash make letsencrypt FQDN=host.mydomain.com EMAIL=myemail@somewhere.com make letsencrypt-notebook ``` The first command creates a Docker volume named after the notebook container with a `-secrets` suffix. It then runs the `letsencrypt` client with a slew of options (one of which has you automatically agreeing to the Let's Encrypt Terms of Service, see the Makefile). The second command mounts the secrets volume and configures Jupyter to use the full-chain certificate and private key. Be aware: Let's Encrypt has a pretty [low rate limit per domain](https://community.letsencrypt.org/t/public-beta-rate-limits/4772/3) at the moment. You can avoid exhausting your limit by testing against the Let's Encrypt staging servers. To hit their staging servers, set the environment variable `CERT_SERVER=--staging`. ```bash make letsencrypt FQDN=host.mydomain.com EMAIL=myemail@somewhere.com CERT_SERVER=--staging ``` Also, keep in mind Let's Encrypt certificates are short-lived: 90 days at the moment. You'll need to manually set up a cron job to run the renewal steps at the moment. (You can reuse the first command above.) ### My pip/conda/apt-get installs disappear every time I restart the container. Can I make them permanent? ```bash # add your pip, conda, apt-get, etc. permanent features to the Dockerfile where # indicated by the comments in the Dockerfile vi Dockerfile make image make notebook ``` ### How do I upgrade my Docker container? ```bash make image DOCKER_ARGS=--pull make notebook ``` The first line pulls the latest version of the Docker image used in the local Dockerfile. Then it rebuilds the local Docker image containing any customizations you may have added to it. The second line kills your currently running notebook container, and starts a fresh one using the new image. ### Can I run on another VM provider other than VirtualBox? Yes. As an example, there's a `softlayer.makefile` included in this repo as an example. You would use it like so: ```bash make softlayer-vm NAME=myhost \ SOFTLAYER_DOMAIN=your_desired_domain \ SOFTLAYER_USER=your_user_id \ SOFTLAYER_API_KEY=your_api_key eval $(docker-machine env myhost) # optional, creates a real DNS entry for the VM using the machine name as the hostname make softlayer-dns SOFTLAYER_DOMAIN=your_desired_domain make image make notebook ``` If you'd like to add support for another docker-machine driver, use the `softlayer.makefile` as a template. ### Where are my notebooks stored? `make notebook` creates a Docker volume named after the notebook container with a `-data` suffix. ### Uh ... make? Yes, sorry Windows users. It got the job done for a simple example. We can certainly accept other deployment mechanism examples in the parent folder or in other repos. ### Are there any other options? Yes indeed. `cat` the Makefiles and look at the target parameters. ================================================ FILE: examples/make-deploy/letsencrypt.makefile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # BE CAREFUL when using Docker engine <1.10 because running a container with # `--rm` option while mounting a docker volume may wipe out the volume. # See issue: https://github.com/moby/moby/issues/17907 # Use letsencrypt production server by default to get a real cert. # Use CERT_SERVER=--staging to hit the staging server (not a real cert). letsencrypt: NAME?=notebook letsencrypt: SECRETS_VOLUME?=$(NAME)-secrets letsencrypt: TMP_CONTAINER?=$(NAME)-tmp letsencrypt: CERT_SERVER?= letsencrypt: @test -n "$(FQDN)" || \ (echo "ERROR: FQDN not defined or blank"; exit 1) @test -n "$(EMAIL)" || \ (echo "ERROR: EMAIL not defined or blank"; exit 1) @docker volume create --name $(SECRETS_VOLUME) > /dev/null @docker run -it -p 80:80 \ --name=$(TMP_CONTAINER) \ -v $(SECRETS_VOLUME):/etc/letsencrypt \ quay.io/letsencrypt/letsencrypt:latest \ certonly \ $(CERT_SERVER) \ --keep-until-expiring \ --standalone \ --standalone-supported-challenges http-01 \ --agree-tos \ --domain '$(FQDN)' \ --email '$(EMAIL)'; \ docker rm --force $(TMP_CONTAINER) > /dev/null # The letsencrypt image has an entrypoint, so we use the notebook image # instead so we can run arbitrary commands. # Here we set the permissions so nobody can read the cert and key. # We also symlink the certs into the root of the /etc/letsencrypt # directory so that the FQDN doesn't have to be known later. @docker run -it \ --name=$(TMP_CONTAINER) \ -v $(SECRETS_VOLUME):/etc/letsencrypt \ $(NOTEBOOK_IMAGE) \ bash -c "ln -s /etc/letsencrypt/live/$(FQDN)/* /etc/letsencrypt/ && \ find /etc/letsencrypt -type d -exec chmod 755 {} +"; \ docker rm --force $(TMP_CONTAINER) > /dev/null letsencrypt-notebook: PORT?=443 letsencrypt-notebook: NAME?=notebook letsencrypt-notebook: WORK_VOLUME?=$(NAME)-data letsencrypt-notebook: SECRETS_VOLUME?=$(NAME)-secrets letsencrypt-notebook: DOCKER_ARGS:=-e USE_HTTPS=yes \ -e PASSWORD=$(PASSWORD) \ -v $(SECRETS_VOLUME):/etc/letsencrypt letsencrypt-notebook: ARGS:=\ --ServerApp.certfile=/etc/letsencrypt/fullchain.pem \ --ServerApp.keyfile=/etc/letsencrypt/privkey.pem letsencrypt-notebook: check @test -n "$(PASSWORD)" || \ (echo "ERROR: PASSWORD not defined or blank"; exit 1) $(RUN_NOTEBOOK) ================================================ FILE: examples/make-deploy/self-signed.makefile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. self-signed-notebook: PORT?=443 self-signed-notebook: NAME?=notebook self-signed-notebook: WORK_VOLUME?=$(NAME)-data self-signed-notebook: DOCKER_ARGS:=-e USE_HTTPS=yes \ -e PASSWORD=$(PASSWORD) self-signed-notebook: check @test -n "$(PASSWORD)" || \ (echo "ERROR: PASSWORD not defined or blank"; exit 1) $(RUN_NOTEBOOK) ================================================ FILE: examples/make-deploy/softlayer.makefile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. softlayer-vm: export SOFTLAYER_CPU?=4 softlayer-vm: export SOFTLAYER_DISK_SIZE?=100 softlayer-vm: export SOFTLAYER_MEMORY?=4096 softlayer-vm: export SOFTLAYER_REGION?=wdc01 softlayer-vm: check @test -n "$(NAME)" || \ (echo "ERROR: NAME not defined (make help)"; exit 1) @test -n "$(SOFTLAYER_API_KEY)" || \ (echo "ERROR: SOFTLAYER_API_KEY not defined (make help)"; exit 1) @test -n "$(SOFTLAYER_USER)" || \ (echo "ERROR: SOFTLAYER_USER not defined (make help)"; exit 1) @test -n "$(SOFTLAYER_DOMAIN)" || \ (echo "ERROR: SOFTLAYER_DOMAIN not defined (make help)"; exit 1) @docker-machine create -d softlayer $(NAME) @echo "DONE: Docker host '$(NAME)' up at $$(docker-machine ip $(NAME))" softlayer-dns: HOST_NAME:=$$(docker-machine active) softlayer-dns: IP:=$$(docker-machine ip $(HOST_NAME)) softlayer-dns: check @which slcli > /dev/null || (echo "softlayer cli not found (pip install softlayer)"; exit 1) @test -n "$(SOFTLAYER_DOMAIN)" || \ (echo "ERROR: SOFTLAYER_DOMAIN not defined (make help)"; exit 1) @slcli dns record-add $(SOFTLAYER_DOMAIN) $(HOST_NAME) A $(IP) ================================================ FILE: examples/make-deploy/virtualbox.makefile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. virtualbox-vm: export VIRTUALBOX_CPU_COUNT?=4 virtualbox-vm: export VIRTUALBOX_DISK_SIZE?=100000 virtualbox-vm: export VIRTUALBOX_MEMORY_SIZE?=4096 virtualbox-vm: check @test -n "$(NAME)" || \ (echo "ERROR: NAME not defined (make help)"; exit 1) @docker-machine create -d virtualbox $(NAME) ================================================ FILE: examples/openshift/README.md ================================================ # OpenShift example This example provides templates for deploying the Jupyter Project docker-stacks images to OpenShift. ## Prerequisites Any OpenShift 3 environment. The templates were tested with OpenShift 3.7. It is believed they should work with at least OpenShift 3.6 or later. Do be aware that the Jupyter Project docker-stacks images are very large. The OpenShift environment you are using must provide sufficient quota on the per-user space for images and the file system for running containers. If the quota is too small, the pulling of the images to a node in the OpenShift cluster when deploying them, will fail due to lack of space. Even if the image is able to run, if the quota is only just larger than the space required for the image, you will not be able to install many packages into the container before running out of space. OpenShift Online, the public hosted version of OpenShift from Red Hat has a quota of only 3GB for the image and container file system. As a result, only the `minimal-notebook` can be started and there is little space remaining to install additional packages. Although OpenShift Online is suitable for demonstrating these templates work, what you can do in that environment will be limited due to the size of the images. If you want to experiment with using Jupyter Notebooks in an OpenShift environment, you should instead use [Minishift](https://www.openshift.org/minishift/). Minishift provides you the ability to run OpenShift in a virtual machine on your own local computer. ## Loading the Templates To load the templates, login to OpenShift from the command line and run: ```bash oc create -f https://raw.githubusercontent.com/jupyter-on-openshift/docker-stacks/master/examples/openshift/templates.json ``` This should create the `jupyter-notebook` template The template can be used from the command line using the `oc new-app` command, or from the OpenShift web console by selecting _Add to Project_. This `README` is only going to explain deploying from the command line. ## Deploying a Notebook To deploy a notebook from the command line using the template, run: ```bash oc new-app --template jupyter-notebook ``` The output will be similar to: ```lang-none --> Deploying template "jupyter/jupyter-notebook" to project jupyter Jupyter Notebook --------- Template for deploying Jupyter Notebook images. * With parameters: * APPLICATION_NAME=notebook * NOTEBOOK_IMAGE=docker.io/jupyter/minimal-notebook:latest * NOTEBOOK_PASSWORD=ded4d7cada554aa48e0db612e1ed1080 # generated --> Creating resources ... configmap "notebook-cfg" created deploymentconfig "notebook" created route "notebook" created service "notebook" created --> Success Access your application via route 'notebook-jupyter.b9ad.pro-us-east-1.openshiftapps.com' Run 'oc status' to view your app. ``` When no template parameters are provided, the name of the deployed notebook will be `notebook`. The image used will be: ```lang-none docker.io/jupyter/minimal-notebook:latest ``` A password you can use when accessing the notebook will be auto generated and is displayed in the output from running `oc new-app`. To see the hostname for accessing the notebook run: ```bash oc get routes ``` The output will be similar to: ```lang-none NAME HOST/PORT PATH SERVICES PORT TERMINATION WILDCARD notebook notebook-jupyter.abcd.pro-us-east-1.openshiftapps.com notebook 8888-tcp edge/Redirect None ``` A secure route will be used to expose the notebook outside the OpenShift cluster, so in this case the URL would be: ```lang-none https://notebook-jupyter.abcd.pro-us-east-1.openshiftapps.com/ ``` When prompted, enter the password for the notebook. ## Passing Template Parameters To override the name for the notebook, the image used, and the password, you can pass template parameters using the `--param` option. ```bash oc new-app --template jupyter-notebook \ --param APPLICATION_NAME=mynotebook \ --param NOTEBOOK_IMAGE=docker.io/jupyter/scipy-notebook:latest \ --param NOTEBOOK_PASSWORD=mypassword ``` You can deploy any of the Jupyter Project docker-stacks images. If you don't care what version of the image is used, add the `:latest` tag at the end of the image name, otherwise use the hash corresponding to the image version you want to use. ## Deleting the Notebook Instance To delete the notebook instance, run `oc delete` using a label selector for the application name. ```bash oc delete all,configmap --selector app=mynotebook ``` ## Adding Persistent Storage You can upload notebooks and other files using the web interface of the notebook. Any uploaded files or changes you make to them will be lost when the notebook instance is restarted. If you want to save your work, you need to add persistent storage to the notebook. To add persistent storage run: ```bash oc set volume dc/mynotebook --add \ --type=pvc --claim-size=1Gi --claim-mode=ReadWriteOnce \ --claim-name mynotebook-data --name data \ --mount-path /home/jovyan ``` When you have deleted the notebook instance, if using a persistent volume, you will need to delete it in a separate step. ```bash oc delete pvc/mynotebook-data ``` ## Customizing the Configuration If you want to set any custom configuration for the notebook, you can edit the config map created by the template. ```bash oc edit configmap/mynotebook-cfg ``` The `data` field of the config map contains Python code used as the `jupyter_server_config.py` file. If you are using a persistent volume, you can also create a configuration file at: ```lang-none /home/jovyan/.jupyter/jupyter_server_config.py ``` This will be merged at the end of the configuration from the config map. Because the configuration is Python code, ensure any indenting is correct. Any errors in the configuration file will cause the notebook to fail when starting. If the error is in the config map, edit it again to fix it and trigger a new deployment if necessary by running: ```bash oc rollout latest dc/mynotebook ``` If you make an error in the configuration file stored in the persistent volume, you will need to scale down the notebook, so it isn't running. ```bash oc scale dc/mynotebook --replicas 0 ``` Then run: ```bash oc debug dc/mynotebook ``` to run the notebook in debug mode. This will provide you with an interactive terminal session inside a running container, but the notebook will not have been started. Edit the configuration file in the volume to fix any errors and exit the terminal session. Start up the notebook again. ```bash oc scale dc/mynotebook --replicas 1 ``` ## Changing the Notebook Password The password for the notebook is supplied as a template parameter, or if not supplied will be automatically generated by the template. It will be passed into the container through an environment variable. If you want to change the password, you can do so by editing the environment variable on the deployment configuration. ```bash oc set env dc/mynotebook JUPYTER_NOTEBOOK_PASSWORD=mypassword ``` This will trigger a new deployment so ensure you have downloaded any work if not using a persistent volume. If using a persistent volume, you could instead set up a password in the file `/home/jovyan/.jupyter/jupyter_server_config.py` as per guidelines in . ## Deploying from a Custom Image If you want to deploy a custom variant of the Jupyter Project docker-stacks images, you can replace the image name with that of your own. If the image is not stored on Docker Hub, but some other public image registry, prefix the name of the image with the image registry host details. If the image is in your OpenShift project, because you imported the image into OpenShift, or used the docker build strategy of OpenShift to build a derived custom image, you can use the name of the image stream for the image name, including any image tag if necessary. This can be illustrated by first importing an image into the OpenShift project. ```bash oc import-image docker.io/jupyter/datascience-notebook:latest --confirm ``` Then deploy it using the name of the image stream created. ```bash oc new-app --template jupyter-notebook \ --param APPLICATION_NAME=mynotebook \ --param NOTEBOOK_IMAGE=datascience-notebook \ --param NOTEBOOK_PASSWORD=mypassword ``` Importing an image into OpenShift before deploying it means that when a notebook is started, the image need only be pulled from the internal OpenShift image registry rather than Docker Hub for each deployment. Because the images are so large, this can speed up deployments when the image hasn't previously been deployed to a node in the OpenShift cluster. ================================================ FILE: examples/openshift/templates.json ================================================ { "kind": "Template", "apiVersion": "v1", "metadata": { "name": "jupyter-notebook", "annotations": { "openshift.io/display-name": "Jupyter Notebook", "description": "Template for deploying Jupyter Notebook images.", "iconClass": "icon-python", "tags": "python,jupyter" } }, "parameters": [ { "name": "APPLICATION_NAME", "value": "notebook", "required": true }, { "name": "NOTEBOOK_IMAGE", "value": "docker.io/jupyter/minimal-notebook:latest", "required": true }, { "name": "NOTEBOOK_PASSWORD", "from": "[a-f0-9]{32}", "generate": "expression" } ], "objects": [ { "kind": "ConfigMap", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}-cfg", "labels": { "app": "${APPLICATION_NAME}" } }, "data": { "jupyter_server_config.py": "import os\n\npassword = os.environ.get('JUPYTER_NOTEBOOK_PASSWORD')\n\nif password:\n from jupyter_server.auth import passwd\n c.ServerApp.password = passwd(password)\n del password\n del os.environ['JUPYTER_NOTEBOOK_PASSWORD']\n\nimage_config_file = '/home/jovyan/.jupyter/jupyter_server_config.py'\n\nif os.path.exists(image_config_file):\n with open(image_config_file) as fp:\n exec(compile(fp.read(), image_config_file, 'exec'), globals())\n" } }, { "kind": "DeploymentConfig", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } }, "spec": { "strategy": { "type": "Recreate" }, "triggers": [ { "type": "ConfigChange" } ], "replicas": 1, "selector": { "app": "${APPLICATION_NAME}", "deploymentconfig": "${APPLICATION_NAME}" }, "template": { "metadata": { "annotations": { "alpha.image.policy.openshift.io/resolve-names": "*" }, "labels": { "app": "${APPLICATION_NAME}", "deploymentconfig": "${APPLICATION_NAME}" } }, "spec": { "containers": [ { "name": "jupyter-notebook", "image": "${NOTEBOOK_IMAGE}", "command": [ "start-notebook.py", "--config=/etc/jupyter/openshift/jupyter_server_config.py", "--no-browser", "--ip=0.0.0.0" ], "ports": [ { "containerPort": 8888, "protocol": "TCP" } ], "env": [ { "name": "JUPYTER_NOTEBOOK_PASSWORD", "value": "${NOTEBOOK_PASSWORD}" } ], "volumeMounts": [ { "mountPath": "/etc/jupyter/openshift", "name": "configs" } ] } ], "automountServiceAccountToken": false, "securityContext": { "supplementalGroups": [100] }, "volumes": [ { "configMap": { "name": "${APPLICATION_NAME}-cfg" }, "name": "configs" } ] } } } }, { "kind": "Route", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } }, "spec": { "host": "", "to": { "kind": "Service", "name": "${APPLICATION_NAME}", "weight": 100 }, "port": { "targetPort": "8888-tcp" }, "tls": { "termination": "edge", "insecureEdgeTerminationPolicy": "Redirect" } } }, { "kind": "Service", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } }, "spec": { "ports": [ { "name": "8888-tcp", "protocol": "TCP", "port": 8888, "targetPort": 8888 } ], "selector": { "app": "${APPLICATION_NAME}", "deploymentconfig": "${APPLICATION_NAME}" }, "type": "ClusterIP" } } ] } ================================================ FILE: examples/source-to-image/README.md ================================================ # Custom Jupyter Notebook images This example provides scripts for building custom Jupyter Notebook images containing notebooks, data files, and with Python packages required by the notebooks already installed. The scripts provided work with the Source-to-Image tool, and you can create the images from the command line on your own computer. Templates are also provided to enable running builds in OpenShift, as well as deploying the resulting image to OpenShift to make it available. The build scripts, when used with the Source-to-Image tool, provide similar capabilities to `repo2docker`. When builds are run under OpenShift with the supplied templates, it provides similar capabilities to `mybinder.org`, but where notebook instances are deployed in your existing OpenShift project and JupyterHub is not required. For separate examples of using JupyterHub with OpenShift, see the project: - ## Source-to-Image Project Source-to-Image (S2I) is an open source project which provides a tool for creating container images. It works by taking a base image, injecting additional source code or files into a running container created from the base image, and running a builder script in the container to process the source code or files to prepare the new image. Details on the S2I tool, and executable binaries for Linux, macOS and Windows, can be found on GitHub at: - The tool is standalone, and can be used on any system which provides a docker daemon for running containers. To provide an end-to-end capability to build and deploy applications in containers, support for S2I is also integrated into container platforms such as OpenShift. ## Getting Started with S2I As an example of how S2I can be used to create a custom image with a bundled set of notebooks, run: ```bash s2i build \ --scripts-url https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image \ --context-dir docs/source/examples/Notebook \ https://github.com/jupyter/notebook \ docker.io/jupyter/minimal-notebook:latest \ notebook-examples ``` This example command will pull down the Git repository and build the image `notebook-examples` using the files contained in the `docs/source/examples/Notebook` directory of that Git repository. The base image which the files will be combined with is `docker.io/jupyter/minimal-notebook:latest`, but you can specify any of the Jupyter Project `docker-stacks` images as the base image. The resulting image from running the command can be seen by running `docker images` command: ```bash docker images # REPOSITORY TAG IMAGE ID CREATED SIZE # notebook-examples latest f5899ed1241d 2 minutes ago 2.59GB ``` You can now run the image. ```bash docker run --rm -p 8888:8888 notebook-examples ``` Open your browser on the URL displayed, and you will find the notebooks from the Git repository and can work with them. ## The S2I Builder Scripts Normally when using S2I, the base image would be S2I enabled and contain the builder scripts needed to prepare the image and define how the application in the image should be run. As the Jupyter Project `docker-stacks` images are not S2I enabled (although they could be), in the above example the `--scripts-url` option has been used to specify that the example builder scripts contained in this directory of this Git repository should be used. Using the `--scripts-url` option, the builder scripts can be hosted on any HTTP server, or you could also use builder scripts local to your computer file using an appropriate `file://` format URI argument to `--scripts-url`. The builder scripts in this directory of this repository are `assemble` and `run` and are provided as examples of what can be done. You can use the scripts as is, or create your own. The supplied `assemble` script performs a few key steps. The first steps copy files into the location they need to be when the image is run, from the directory where they are initially placed by the `s2i` command. ```bash cp -Rf /tmp/src/. "/home/${NB_USER}" rm -rf /tmp/src ``` The next steps are: ```bash if [ -f "/home/${NB_USER}/environment.yml" ]; then mamba env update --name root --file "/home/${NB_USER}/environment.yml" mamba clean --all -f -y else if [ -f "/home/${NB_USER}/requirements.txt" ]; then pip --no-cache-dir install -r "/home/${NB_USER}/requirements.txt" fi fi ``` This determines whether a `environment.yml` or `requirements.txt` file exists with the files and if so, runs the appropriate package management tool to install any Python packages listed in those files. This means that so long as a set of notebook files provides one of these files listing what Python packages they need, those packages will be automatically installed into the image, so they are available when the image is run. A final step is: ```bash fix-permissions "${CONDA_DIR}" fix-permissions "/home/${NB_USER}" ``` This fixes up permissions on any new files created by the build. This is necessary to ensure that when the image is run, you can still install additional files. This is important for when an image is run in `sudo` mode, or it is hosted in a more secure container platform such as Kubernetes/OpenShift where it will be run as a set user ID that isn't known in advance. As long as you preserve the first and last set of steps, you can do whatever you want in the `assemble` script to install packages, create files etc. Do be aware though that S2I builds do not run as `root` and so you cannot install additional system packages. If you need to install additional system packages, use a `Dockerfile` and normal `docker build` to first create a new custom base image from the Jupyter Project `docker-stacks` images, with the extra system packages, and then use that image with the S2I build to combine your notebooks and have Python packages installed. The `run` script in this directory is very simple and just runs the notebook application. ```bash exec start-notebook.py "$@" ``` ## Integration with OpenShift The OpenShift platform provides integrated support for S2I type builds. Templates are provided for using the S2I build mechanism with the scripts in this directory. To load the templates run: ```bash oc create -f https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image/templates.json ``` This will create the templates: ```bash jupyter-notebook-builder jupyter-notebook-quickstart ``` The templates can be used from the OpenShift web console or command line. This `README` is only going to explain deploying from the command line. To use the OpenShift command line to build into an image, and deploy, the set of notebooks used above, run: ```bash oc new-app --template jupyter-notebook-quickstart \ --param APPLICATION_NAME=notebook-examples \ --param GIT_REPOSITORY_URL=https://github.com/jupyter/notebook \ --param CONTEXT_DIR=docs/source/examples/Notebook \ --param BUILDER_IMAGE=docker.io/jupyter/minimal-notebook:latest \ --param NOTEBOOK_PASSWORD=mypassword ``` You can provide a password using the `NOTEBOOK_PASSWORD` parameter. If you don't set that parameter, a password will be generated, with it being displayed by the `oc new-app` command. Once the image has been built, it will be deployed. To see the hostname for accessing the notebook, run `oc get routes`. ```lang-none NAME HOST/PORT PATH SERVICES PORT TERMINATION WILDCARD notebook-examples notebook-examples-jupyter.abcd.pro-us-east-1.openshiftapps.com notebook-examples 8888-tcp edge/Redirect None ``` As the deployment will use a secure connection, the URL for accessing the notebook in this case would be . If you only want to build an image but not deploy it, you can use the `jupyter-notebook-builder` template. You can then deploy it using the `jupyter-notebook` template provided with the [openshift](../openshift) examples directory. See the `openshift` examples directory for further information on customizing configuration for a Jupyter Notebook deployment and deleting a deployment. ================================================ FILE: examples/source-to-image/assemble ================================================ #!/bin/bash set -x set -eo pipefail # Remove any 'environment.yml' or 'requirements.txt' files which may # have been carried over from the base image so we don't reinstall # packages which have already been installed. This could occur where # an S2I build was used to create a new base image with pre-installed # Python packages, with the new image then subsequently being used as a # S2I builder base image. rm -f "/home/${NB_USER}/environment.yml" rm -f "/home/${NB_USER}/requirements.txt" # Copy injected files to target directory. cp -Rf /tmp/src/. "/home/${NB_USER}" rm -rf /tmp/src # Install any Python modules. If we find an 'environment.yml' file we # assume we should use 'conda' to install packages. If 'requirements.txt' # use 'pip' instead. if [ -f "/home/${NB_USER}/environment.yml" ]; then mamba env update --name root --file "/home/${NB_USER}/environment.yml" mamba clean --all -f -y else if [ -f "/home/${NB_USER}/requirements.txt" ]; then pip --no-cache-dir install -r "/home/${NB_USER}/requirements.txt" fi fi # Fix up permissions on home directory and Python installation so that # everything is still writable by 'users' group. fix-permissions "${CONDA_DIR}" fix-permissions "/home/${NB_USER}" ================================================ FILE: examples/source-to-image/run ================================================ #!/bin/bash # Start up the notebook instance. exec start-notebook.py "$@" ================================================ FILE: examples/source-to-image/save-artifacts ================================================ #!/bin/bash tar cf - --files-from /dev/null ================================================ FILE: examples/source-to-image/templates.json ================================================ { "kind": "List", "apiVersion": "v1", "items": [ { "kind": "Template", "apiVersion": "v1", "metadata": { "name": "jupyter-notebook-builder", "annotations": { "openshift.io/display-name": "Jupyter Notebook Builder", "description": "Template for building Jupyter Notebook images with bundled notebooks and files.", "iconClass": "icon-python", "tags": "python,jupyter" } }, "parameters": [ { "name": "IMAGE_NAME", "value": "notebook", "required": true }, { "name": "BUILDER_IMAGE", "value": "docker.io/jupyter/minimal-notebook:latest", "required": true }, { "name": "BUILDER_SCRIPTS", "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image", "required": true }, { "name": "GIT_REPOSITORY_URL", "value": "", "required": true }, { "name": "GIT_REFERENCE", "value": "main", "required": true }, { "name": "CONTEXT_DIR", "value": "", "required": false } ], "objects": [ { "apiVersion": "v1", "kind": "ImageStream", "metadata": { "name": "${IMAGE_NAME}", "labels": { "app": "${IMAGE_NAME}" } } }, { "apiVersion": "v1", "kind": "BuildConfig", "metadata": { "name": "${IMAGE_NAME}", "labels": { "app": "${IMAGE_NAME}" } }, "spec": { "output": { "to": { "kind": "ImageStreamTag", "name": "${IMAGE_NAME}:latest" } }, "resources": { "limits": { "memory": "1Gi" } }, "source": { "type": "Git", "git": { "uri": "${GIT_REPOSITORY_URL}", "ref": "${GIT_REFERENCE}" }, "contextDir": "${CONTEXT_DIR}" }, "strategy": { "type": "Source", "sourceStrategy": { "from": { "kind": "DockerImage", "name": "${BUILDER_IMAGE}" }, "scripts": "${BUILDER_SCRIPTS}" } }, "triggers": [ { "type": "ConfigChange" } ] } } ] }, { "kind": "Template", "apiVersion": "v1", "metadata": { "name": "jupyter-notebook-quickstart", "annotations": { "openshift.io/display-name": "Jupyter Notebook QuickStart", "description": "Template for deploying Jupyter Notebook images with bundled notebooks and files.", "iconClass": "icon-python", "tags": "python,jupyter" } }, "parameters": [ { "name": "APPLICATION_NAME", "value": "notebook", "required": true }, { "name": "BUILDER_IMAGE", "value": "docker.io/jupyter/minimal-notebook:latest", "required": true }, { "name": "BUILDER_SCRIPTS", "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image", "required": true }, { "name": "GIT_REPOSITORY_URL", "value": "", "required": true }, { "name": "GIT_REFERENCE", "value": "main", "required": true }, { "name": "CONTEXT_DIR", "value": "", "required": false }, { "name": "NOTEBOOK_PASSWORD", "from": "[a-f0-9]{32}", "generate": "expression" } ], "objects": [ { "apiVersion": "v1", "kind": "ImageStream", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } } }, { "apiVersion": "v1", "kind": "BuildConfig", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } }, "spec": { "output": { "to": { "kind": "ImageStreamTag", "name": "${APPLICATION_NAME}:latest" } }, "resources": { "limits": { "memory": "1Gi" } }, "source": { "type": "Git", "git": { "uri": "${GIT_REPOSITORY_URL}", "ref": "${GIT_REFERENCE}" }, "contextDir": "${CONTEXT_DIR}" }, "strategy": { "type": "Source", "sourceStrategy": { "from": { "kind": "DockerImage", "name": "${BUILDER_IMAGE}" }, "scripts": "${BUILDER_SCRIPTS}" } }, "triggers": [ { "type": "ConfigChange" } ] } }, { "kind": "ConfigMap", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}-cfg", "labels": { "app": "${APPLICATION_NAME}" } }, "data": { "jupyter_server_config.py": "import os\n\npassword = os.environ.get('JUPYTER_NOTEBOOK_PASSWORD')\n\nif password:\n from jupyter_server.auth import passwd\n c.ServerApp.password = passwd(password)\n del password\n del os.environ['JUPYTER_NOTEBOOK_PASSWORD']\n\nimage_config_file = '/home/jovyan/.jupyter/jupyter_server_config.py'\n\nif os.path.exists(image_config_file):\n with open(image_config_file) as fp:\n exec(compile(fp.read(), image_config_file, 'exec'), globals())\n" } }, { "kind": "DeploymentConfig", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } }, "spec": { "strategy": { "type": "Recreate" }, "triggers": [ { "type": "ConfigChange" }, { "type": "ImageChange", "imageChangeParams": { "automatic": true, "containerNames": ["jupyter-notebook"], "from": { "kind": "ImageStreamTag", "name": "${APPLICATION_NAME}:latest" } } } ], "replicas": 1, "selector": { "app": "${APPLICATION_NAME}", "deploymentconfig": "${APPLICATION_NAME}" }, "template": { "metadata": { "annotations": { "alpha.image.policy.openshift.io/resolve-names": "*" }, "labels": { "app": "${APPLICATION_NAME}", "deploymentconfig": "${APPLICATION_NAME}" } }, "spec": { "containers": [ { "name": "jupyter-notebook", "image": "${APPLICATION_NAME}:latest", "command": [ "start-notebook.py", "--config=/etc/jupyter/openshift/jupyter_server_config.py", "--no-browser", "--ip=0.0.0.0" ], "ports": [ { "containerPort": 8888, "protocol": "TCP" } ], "env": [ { "name": "JUPYTER_NOTEBOOK_PASSWORD", "value": "${NOTEBOOK_PASSWORD}" } ], "volumeMounts": [ { "mountPath": "/etc/jupyter/openshift", "name": "configs" } ] } ], "automountServiceAccountToken": false, "securityContext": { "supplementalGroups": [100] }, "volumes": [ { "configMap": { "name": "${APPLICATION_NAME}-cfg" }, "name": "configs" } ] } } } }, { "kind": "Route", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } }, "spec": { "host": "", "to": { "kind": "Service", "name": "${APPLICATION_NAME}", "weight": 100 }, "port": { "targetPort": "8888-tcp" }, "tls": { "termination": "edge", "insecureEdgeTerminationPolicy": "Redirect" } } }, { "kind": "Service", "apiVersion": "v1", "metadata": { "name": "${APPLICATION_NAME}", "labels": { "app": "${APPLICATION_NAME}" } }, "spec": { "ports": [ { "name": "8888-tcp", "protocol": "TCP", "port": 8888, "targetPort": 8888 } ], "selector": { "app": "${APPLICATION_NAME}", "deploymentconfig": "${APPLICATION_NAME}" }, "type": "ClusterIP" } } ] } ] } ================================================ FILE: images/all-spark-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/all-spark-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/pyspark-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # RSpark config ENV R_LIBS_USER="${SPARK_HOME}/R/lib" RUN fix-permissions "${R_LIBS_USER}" # R pre-requisites RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ fonts-dejavu \ gfortran \ gcc && \ apt-get clean && rm -rf /var/lib/apt/lists/* # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # R packages including IRKernel which gets installed globally. RUN mamba install --yes \ 'r-base' \ 'r-ggplot2' \ 'r-irkernel' \ 'r-rcurl' \ 'r-sparklyr' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: images/all-spark-notebook/README.md ================================================ # Jupyter Notebook Python, R, Spark Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/all-spark-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-all-spark-notebook) - [Image Specifics :: Apache Spark](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/specifics.html#apache-spark) ================================================ FILE: images/base-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/base-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/docker-stacks-foundation FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # Install all OS dependencies for the Server that starts # but lacks all features (e.g., download as all possible file formats) RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ # - Add necessary fonts for matplotlib/seaborn # See https://github.com/jupyter/docker-stacks/pull/380 for details fonts-liberation \ # - `pandoc` is used to convert notebooks to html files # it's not present in the aarch64 Ubuntu image, so we install it here pandoc \ # - `run-one` - a wrapper script that runs no more # than one unique instance of some command with a unique set of arguments, # we use `run-one-constantly` to support the `RESTARTABLE` option run-one && \ apt-get clean && rm -rf /var/lib/apt/lists/* # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # Install JupyterHub, JupyterLab, NBClassic and Jupyter Notebook # Generate a Jupyter Server config # Cleanup temporary files # Correct permissions # Do all this in a single RUN command to avoid duplicating all of the # files across image layers when the permissions change WORKDIR /tmp RUN mamba install --yes \ 'jupyterhub-singleuser' \ 'jupyterlab' \ 'nbclassic' \ # Sometimes, when the new version of `jupyterlab` is released, latest `notebook` might not support it for some time # Old versions of `notebook` (=7.2.2' && \ jupyter server --generate-config && \ mamba clean --all -f -y && \ jupyter lab clean && \ rm -rf "/home/${NB_USER}/.cache/yarn" && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ENV JUPYTER_PORT=8888 EXPOSE $JUPYTER_PORT # Configure container startup CMD ["start-notebook.py"] # Copy local files as late as possible to avoid cache busting COPY start-notebook.py start-notebook.sh start-singleuser.py start-singleuser.sh /usr/local/bin/ COPY jupyter_server_config.py docker_healthcheck.py /etc/jupyter/ # Fix permissions on /etc/jupyter as root USER root RUN fix-permissions /etc/jupyter/ # HEALTHCHECK documentation: https://docs.docker.com/engine/reference/builder/#healthcheck # This healtcheck works well for `lab`, `notebook`, `nbclassic`, `server`, and `retro` jupyter commands # https://github.com/jupyter/docker-stacks/issues/915#issuecomment-1068528799 HEALTHCHECK --interval=3s --timeout=1s --start-period=3s --retries=3 \ CMD /etc/jupyter/docker_healthcheck.py || exit 1 # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" # Switch back to jovyan to avoid accidental container runs as root USER ${NB_UID} WORKDIR "${HOME}" ================================================ FILE: images/base-notebook/README.md ================================================ # Base Jupyter Notebook Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/base-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-base-notebook) ================================================ FILE: images/base-notebook/docker_healthcheck.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import json import os import subprocess from pathlib import Path import requests # Several operations below deliberately don't check for possible errors # As this is a health check, it should succeed or raise an exception on error # Docker runs health checks using an exec # It uses the default user configured when running the image: root for the case of a custom NB_USER or jovyan for the case of the default image user. # We manually change HOME to make `jupyter --runtime-dir` report a correct path # More information: result = subprocess.run( ["jupyter", "--runtime-dir"], check=True, capture_output=True, text=True, env=dict(os.environ) | {"HOME": "/home/" + os.environ["NB_USER"]}, ) runtime_dir = Path(result.stdout.rstrip()) json_file = next(runtime_dir.glob("*server-*.json")) url = json.loads(json_file.read_bytes())["url"] url = url + "api" proxies = { "http": "", "https": "", } r = requests.get(url, proxies=proxies, verify=False) # request without SSL verification r.raise_for_status() print(r.content) ================================================ FILE: images/base-notebook/jupyter_server_config.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # mypy: ignore-errors import os import stat import subprocess from pathlib import Path from jupyter_core.paths import jupyter_data_dir c = get_config() # noqa: F821 # Listen on all interfaces (ipv4 and ipv6) c.ServerApp.ip = "" c.ServerApp.open_browser = False # to output both image/svg+xml and application/pdf plot formats in the notebook file c.InlineBackend.figure_formats = {"png", "jpeg", "svg", "pdf"} # https://github.com/jupyter/notebook/issues/3130 c.FileContentsManager.delete_to_trash = False # Generate a self-signed certificate OPENSSL_CONFIG = """\ [req] distinguished_name = req_distinguished_name [req_distinguished_name] """ if "GEN_CERT" in os.environ: dir_name = Path(jupyter_data_dir()) dir_name.mkdir(parents=True, exist_ok=True) pem_file = dir_name / "notebook.pem" # Generate an openssl.cnf file to set the distinguished name cnf_file = Path(os.getenv("CONDA_DIR", "/usr/lib")) / "ssl/openssl.cnf" if not cnf_file.exists(): cnf_file.write_text(OPENSSL_CONFIG) # Generate a certificate if one doesn't exist on a disk subprocess.check_call( [ "openssl", "req", "-new", "-newkey=rsa:2048", "-days=365", "-nodes", "-x509", "-subj=/C=XX/ST=XX/L=XX/O=generated/CN=generated", f"-keyout={pem_file}", f"-out={pem_file}", ] ) # Restrict access to the file pem_file.chmod(stat.S_IRUSR | stat.S_IWUSR) c.ServerApp.certfile = str(pem_file) # Change default umask for all subprocesses of the Server if set in the environment if "NB_UMASK" in os.environ: os.umask(int(os.environ["NB_UMASK"], 8)) ================================================ FILE: images/base-notebook/start-notebook.py ================================================ #!/usr/bin/env python # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import os import shlex import sys # If we are in a JupyterHub, we pass on to `start-singleuser.py` instead so it does the right thing if "JUPYTERHUB_API_TOKEN" in os.environ: print( "WARNING: using start-singleuser.py instead of start-notebook.py to start a server associated with JupyterHub.", flush=True, ) command = ["/usr/local/bin/start-singleuser.py"] + sys.argv[1:] os.execvp(command[0], command) # Entrypoint is start.sh command = [] # If we want to survive restarts, launch the command using `run-one-constantly` if os.environ.get("RESTARTABLE") == "yes": command.append("run-one-constantly") # We always launch a jupyter subcommand from this script command.append("jupyter") # Launch the configured subcommand. # Note that this should be a single string, so we don't split it. # We default to `lab`. jupyter_command = os.environ.get("DOCKER_STACKS_JUPYTER_CMD", "lab") command.append(jupyter_command) # Append any optional NOTEBOOK_ARGS we were passed in. # This is supposed to be multiple args passed on to the notebook command, # so we split it correctly with shlex if "NOTEBOOK_ARGS" in os.environ: command += shlex.split(os.environ["NOTEBOOK_ARGS"]) # Pass through any other args we were passed on the command line command += sys.argv[1:] # Execute the command! print("Executing: " + " ".join(command), flush=True) os.execvp(command[0], command) ================================================ FILE: images/base-notebook/start-notebook.sh ================================================ #!/bin/bash # Shim to emit warning and call start-notebook.py echo "WARNING: Use start-notebook.py instead" exec /usr/local/bin/start-notebook.py "$@" ================================================ FILE: images/base-notebook/start-singleuser.py ================================================ #!/usr/bin/env python # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import os import shlex import sys # Entrypoint is start.sh command = ["jupyterhub-singleuser"] # JupyterHub singleuser arguments are set using environment variables # Append any optional NOTEBOOK_ARGS we were passed in. # This is supposed to be multiple args passed on to the notebook command, # so we split it correctly with shlex if "NOTEBOOK_ARGS" in os.environ: command += shlex.split(os.environ["NOTEBOOK_ARGS"]) # Pass any other args we have been passed through command += sys.argv[1:] # Execute the command! print("Executing: " + " ".join(command), flush=True) os.execvp(command[0], command) ================================================ FILE: images/base-notebook/start-singleuser.sh ================================================ #!/bin/bash # Shim to emit warning and call start-singleuser.py echo "WARNING: Use start-singleuser.py instead" exec /usr/local/bin/start-singleuser.py "$@" ================================================ FILE: images/datascience-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/datascience-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/scipy-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # R pre-requisites RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ fonts-dejavu \ gfortran \ gcc && \ apt-get clean && rm -rf /var/lib/apt/lists/* # Julia dependencies # install Julia packages in /opt/julia instead of ${HOME} ENV JULIA_DEPOT_PATH=/opt/julia \ JULIA_PKGDIR=/opt/julia # Setup Julia RUN /opt/setup-scripts/setup_julia.py # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 # hadolint ignore=DL3059 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # Setup IJulia kernel & other packages RUN /opt/setup-scripts/setup-julia-packages.bash # R packages including IRKernel which gets installed globally. # r-e1071: dependency of the caret R package RUN mamba install --yes \ 'r-base' \ 'r-caret' \ 'r-crayon' \ 'r-devtools' \ 'r-e1071' \ 'r-forecast' \ 'r-hexbin' \ 'r-htmltools' \ 'r-htmlwidgets' \ 'r-irkernel' \ 'r-nycflights13' \ 'r-randomforest' \ 'r-rcurl' \ 'r-rmarkdown' \ 'r-rodbc' \ 'r-rsqlite' \ 'r-shiny' \ 'r-tidymodels' \ 'r-tidyverse' \ 'rpy2' \ 'unixodbc' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: images/datascience-notebook/README.md ================================================ # Jupyter Notebook Data Science Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/datascience-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-datascience-notebook) ================================================ FILE: images/docker-stacks-foundation/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/docker-stacks-foundation/10activate-conda-env.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # This registers the initialization code for the conda shell code # It also activates default environment in the end, so we don't need to activate it manually # Documentation: https://docs.conda.io/projects/conda/en/latest/dev-guide/deep-dives/activation.html eval "$(conda shell.bash hook)" ================================================ FILE: images/docker-stacks-foundation/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Ubuntu 24.04 (noble) # https://hub.docker.com/_/ubuntu/tags?page=1&name=noble ARG ROOT_IMAGE=ubuntu:24.04 FROM $ROOT_IMAGE LABEL maintainer="Jupyter Project " ARG NB_USER="jovyan" ARG NB_UID="1000" ARG NB_GID="100" # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # Install all OS dependencies for the Server that starts # but lacks all features (e.g., download as all possible file formats) ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update --yes && \ # - `apt-get upgrade` is run to patch known vulnerabilities in system packages # as the Ubuntu base image is rebuilt too seldom sometimes (less than once a month) apt-get upgrade --yes && \ apt-get install --yes --no-install-recommends \ # - bzip2 is necessary to extract the micromamba executable. bzip2 \ ca-certificates \ locales \ # - `netbase` provides /etc/{protocols,rpc,services}, part of POSIX # and required by various C functions like getservbyname and getprotobyname # https://github.com/jupyter/docker-stacks/pull/2129 netbase \ sudo \ # - `tini` is installed as a helpful container entrypoint, # that reaps zombie processes and such of the actual executable we want to start # See https://github.com/krallin/tini#why-tini for details tini \ wget && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ echo "C.UTF-8 UTF-8" >> /etc/locale.gen && \ locale-gen # Configure environment ENV CONDA_DIR=/opt/conda \ SHELL=/bin/bash \ NB_USER="${NB_USER}" \ NB_UID=${NB_UID} \ NB_GID=${NB_GID} \ LC_ALL=C.UTF-8 \ LANG=C.UTF-8 \ LANGUAGE=C.UTF-8 ENV PATH="${CONDA_DIR}/bin:${PATH}" \ HOME="/home/${NB_USER}" # Copy a script that we will use to correct permissions after running certain commands COPY fix-permissions /usr/local/bin/fix-permissions RUN chmod a+rx /usr/local/bin/fix-permissions # Enable prompt color in the skeleton .bashrc before creating the default NB_USER # hadolint ignore=SC2016 RUN sed -i 's/^#force_color_prompt=yes/force_color_prompt=yes/' /etc/skel/.bashrc && \ # More information in: https://github.com/jupyter/docker-stacks/pull/2047 # and docs: https://docs.conda.io/projects/conda/en/latest/dev-guide/deep-dives/activation.html echo 'eval "$(conda shell.bash hook)"' >> /etc/skel/.bashrc # Delete existing user with UID="${NB_UID}" if it exists # hadolint ignore=SC2046 RUN if grep -q "${NB_UID}" /etc/passwd; then \ userdel --remove $(id -un "${NB_UID}"); \ fi # Create "${NB_USER}" user (`jovyan` by default) with UID="${NB_UID}" (`1000` by default) and in the 'users' group # and make sure these dirs are writable by the `users` group. RUN echo "auth requisite pam_deny.so" >> /etc/pam.d/su && \ sed -i.bak -e 's/^%admin/#%admin/' /etc/sudoers && \ sed -i.bak -e 's/^%sudo/#%sudo/' /etc/sudoers && \ useradd --no-log-init --create-home --shell /bin/bash --uid "${NB_UID}" --no-user-group "${NB_USER}" && \ mkdir -p "${CONDA_DIR}" && \ chown "${NB_USER}:${NB_GID}" "${CONDA_DIR}" && \ chmod g+w /etc/passwd && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # Pin the Python version here, or set it to "default" ARG PYTHON_VERSION=3.13 # Setup work directory for backward-compatibility RUN mkdir "/home/${NB_USER}/work" && \ fix-permissions "/home/${NB_USER}" # Download and install Micromamba, and initialize the Conda prefix. # # Similar projects using Micromamba: # - Micromamba-Docker: # - repo2docker: # Install Python, Mamba, and jupyter_core # Cleanup temporary files and remove Micromamba # Correct permissions # Do all this in a single RUN command to avoid duplicating all of the # files across image layers when the permissions change COPY --chown="${NB_UID}:${NB_GID}" initial-condarc "${CONDA_DIR}/.condarc" WORKDIR /tmp RUN set -x && \ arch=$(uname -m) && \ if [ "${arch}" = "x86_64" ]; then \ # Should be simpler, see arch="64"; \ fi && \ # https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html#linux-and-macos wget --progress=dot:giga -O - \ "https://micro.mamba.pm/api/micromamba/linux-${arch}/latest" | tar -xvj bin/micromamba && \ PYTHON_SPECIFIER="python=${PYTHON_VERSION}" && \ if [[ "${PYTHON_VERSION}" == "default" ]]; then PYTHON_SPECIFIER="python"; fi && \ # Install the packages ./bin/micromamba install \ --root-prefix="${CONDA_DIR}" \ --prefix="${CONDA_DIR}" \ --yes \ 'jupyter_core' \ 'conda' \ 'mamba' \ "${PYTHON_SPECIFIER}" && \ rm -rf /tmp/bin/ && \ # Pin major.minor version of python # https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-pkgs.html#preventing-packages-from-updating-pinning mamba list --full-name 'python' | awk 'END{sub("[^.]*$", "*", $2); print $1 " " $2}' >> "${CONDA_DIR}/conda-meta/pinned" && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Copy local files as late as possible to avoid cache busting COPY run-hooks.sh start.sh /usr/local/bin/ # Configure container entrypoint ENTRYPOINT ["tini", "-g", "--", "start.sh"] USER root # Create dirs for startup hooks RUN mkdir /usr/local/bin/start-notebook.d && \ mkdir /usr/local/bin/before-notebook.d COPY 10activate-conda-env.sh /usr/local/bin/before-notebook.d/ # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" # Switch back to jovyan to avoid accidental container runs as root USER ${NB_UID} WORKDIR "${HOME}" ================================================ FILE: images/docker-stacks-foundation/README.md ================================================ # Foundation Jupyter Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/docker-stacks-foundation](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-docker-stacks-foundation) ================================================ FILE: images/docker-stacks-foundation/fix-permissions ================================================ #!/bin/bash # Set permissions on a directory # After any installation, if a directory needs to be (human) user-writable, run this script on it. # It will make everything in the directory owned by the group ${NB_GID} and writable by that group. # Deployments that want to set a specific user id can preserve permissions # by adding the `--group-add users` line to `docker run`. # Uses find to avoid touching files that already have the right permissions, # which would cause a massive image explosion # Right permissions are: # group=${NB_GID} # AND permissions include group rwX (directory-execute) # AND directories have setuid,setgid bits set set -e for d in "$@"; do find "${d}" \ ! \( \ -group "${NB_GID}" \ -a -perm -g+rwX \ \) \ -exec chgrp "${NB_GID}" -- {} \+ \ -exec chmod g+rwX -- {} \+ # setuid, setgid *on directories only* find "${d}" \ \( \ -type d \ -a ! -perm -6000 \ \) \ -exec chmod +6000 -- {} \+ done ================================================ FILE: images/docker-stacks-foundation/initial-condarc ================================================ # Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html auto_update_conda: false show_channel_urls: true channels: - conda-forge ================================================ FILE: images/docker-stacks-foundation/run-hooks.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # identical _log to start.sh # only used when _not_ sourced from start.sh (i.e. unittests) if ! declare -F _log > /dev/null; then _log () { if [[ "$*" == "ERROR:"* ]] || [[ "$*" == "WARNING:"* ]] || [[ "${JUPYTER_DOCKER_STACKS_QUIET}" == "" ]]; then echo "$@" >&2 fi } fi # The run-hooks.sh script looks for *.sh scripts to source # and executable files to run within a passed directory if [ "$#" -ne 1 ]; then _log "ERROR: Should pass exactly one directory" return 1 fi if [[ ! -d "${1}" ]]; then _log "ERROR: Directory ${1} doesn't exist or is not a directory" return 1 fi _log "Running hooks in: ${1} as uid: $(id -u) gid: $(id -g)" for f in "${1}/"*; do # Handling a case when the directory is empty [ -e "${f}" ] || continue case "${f}" in *.sh) _log "Sourcing shell script: ${f}" # shellcheck disable=SC1090 source "${f}" # shellcheck disable=SC2181 if [ $? -ne 0 ]; then _log "ERROR: ${f} has failed, continuing execution" fi ;; *) if [ -x "${f}" ]; then _log "Running executable: ${f}" "${f}" # shellcheck disable=SC2181 if [ $? -ne 0 ]; then _log "ERROR: ${f} has failed, continuing execution" fi else _log "Ignoring non-executable: ${f}" fi ;; esac done _log "Done running hooks in: ${1}" ================================================ FILE: images/docker-stacks-foundation/start.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. set -e # The _log function is used for everything this script wants to log. # It will always log errors and warnings but can be silenced for other messages # by setting the JUPYTER_DOCKER_STACKS_QUIET environment variable. _log () { if [[ "$*" == "ERROR:"* ]] || [[ "$*" == "WARNING:"* ]] || [[ "${JUPYTER_DOCKER_STACKS_QUIET}" == "" ]]; then echo "$@" >&2 fi } _log "Entered start.sh with args:" "$@" # A helper function to unset env vars listed in the value of the env var # JUPYTER_ENV_VARS_TO_UNSET. unset_explicit_env_vars () { if [ -n "${JUPYTER_ENV_VARS_TO_UNSET}" ]; then for env_var_to_unset in $(echo "${JUPYTER_ENV_VARS_TO_UNSET}" | tr ',' ' '); do _log "Unset ${env_var_to_unset} due to JUPYTER_ENV_VARS_TO_UNSET" unset "${env_var_to_unset}" done unset JUPYTER_ENV_VARS_TO_UNSET fi } # Default to starting bash if no command was specified if [ $# -eq 0 ]; then cmd=( "bash" ) else cmd=( "$@" ) fi # Backwards compatibility: `start.sh` is executed by default in ENTRYPOINT # so it should no longer be specified in CMD if [ "${_START_SH_EXECUTED}" = "1" ]; then _log "WARNING: start.sh is the default ENTRYPOINT, do not include it in CMD" _log "Executing the command:" "${cmd[@]}" exec "${cmd[@]}" else export _START_SH_EXECUTED=1 fi # NOTE: This hook will run as the user the container was started with! # shellcheck source=images/docker-stacks-foundation/run-hooks.sh source /usr/local/bin/run-hooks.sh /usr/local/bin/start-notebook.d # If the container started as the root user, then we have permission to refit # the jovyan user, and ensure file permissions, grant sudo rights, and such # things before we run the command passed to start.sh as the desired user # (NB_USER). # if [ "$(id -u)" == 0 ]; then # Environment variables: # - NB_USER: the desired username and associated home folder # - NB_UID: the desired user id # - NB_GID: a group id we want our user to belong to # - NB_GROUP: a group name we want for the group # - GRANT_SUDO: a boolean ("1" or "yes") to grant the user sudo rights # - CHOWN_HOME: a boolean ("1" or "yes") to chown the user's home folder # - CHOWN_EXTRA: a comma-separated list of paths to chown # - CHOWN_HOME_OPTS / CHOWN_EXTRA_OPTS: arguments to the chown commands # Refit the jovyan user to the desired user (NB_USER) if id jovyan &> /dev/null; then if ! usermod --home "/home/${NB_USER}" --login "${NB_USER}" jovyan 2>&1 | grep "no changes" > /dev/null; then _log "Updated the jovyan user:" _log "- username: jovyan -> ${NB_USER}" _log "- home dir: /home/jovyan -> /home/${NB_USER}" fi elif ! id -u "${NB_USER}" &> /dev/null; then _log "ERROR: Neither the jovyan user nor '${NB_USER}' exists. This could be the result of stopping and starting, the container with a different NB_USER environment variable." exit 1 fi # Ensure the desired user (NB_USER) gets its desired user id (NB_UID) and is # a member of the desired group (NB_GROUP, NB_GID) if [ "${NB_UID}" != "$(id -u "${NB_USER}")" ] || [ "${NB_GID}" != "$(id -g "${NB_USER}")" ]; then _log "Update ${NB_USER}'s UID:GID to ${NB_UID}:${NB_GID}" # Ensure the desired group's existence if [ "${NB_GID}" != "$(id -g "${NB_USER}")" ]; then groupadd --force --gid "${NB_GID}" --non-unique "${NB_GROUP:-${NB_USER}}" fi # Recreate the desired user as we want it userdel "${NB_USER}" useradd --no-log-init --home "/home/${NB_USER}" --shell /bin/bash --uid "${NB_UID}" --gid "${NB_GID}" --groups 100 "${NB_USER}" fi # Update the home directory if the desired user (NB_USER) is root and the # desired user id (NB_UID) is 0 and the desired group id (NB_GID) is 0. if [ "${NB_USER}" = "root" ] && [ "${NB_UID}" = "$(id -u "${NB_USER}")" ] && [ "${NB_GID}" = "$(id -g "${NB_USER}")" ]; then sed -i "s|/root|/home/root|g" /etc/passwd # Do not preserve ownership in rootless mode CP_OPTS="-a --no-preserve=ownership" fi # Move or symlink the jovyan home directory to the desired user's home # directory if it doesn't already exist, and update the current working # directory to the new location if needed. if [[ "${NB_USER}" != "jovyan" ]]; then if [[ ! -e "/home/${NB_USER}" ]]; then _log "Attempting to copy /home/jovyan to /home/${NB_USER}..." mkdir "/home/${NB_USER}" # shellcheck disable=SC2086 if cp ${CP_OPTS:--a} /home/jovyan/. "/home/${NB_USER}/"; then _log "Success!" else _log "Failed to copy data from /home/jovyan to /home/${NB_USER}!" _log "Attempting to symlink /home/jovyan to /home/${NB_USER}..." if ln -s /home/jovyan "/home/${NB_USER}"; then _log "Success creating symlink!" else _log "ERROR: Failed copy data from /home/jovyan to /home/${NB_USER} or to create symlink!" exit 1 fi fi fi # Ensure the current working directory is updated to the new path if [[ "${PWD}/" == "/home/jovyan/"* ]]; then new_wd="/home/${NB_USER}/${PWD:13}" _log "Changing working directory to ${new_wd}" cd "${new_wd}" fi fi # Optionally ensure the desired user gets filesystem ownership of its home # folder and/or additional folders if [[ "${CHOWN_HOME}" == "1" || "${CHOWN_HOME}" == "yes" ]]; then _log "Ensuring /home/${NB_USER} is owned by ${NB_UID}:${NB_GID} ${CHOWN_HOME_OPTS:+(chown options: ${CHOWN_HOME_OPTS})}" # shellcheck disable=SC2086 chown ${CHOWN_HOME_OPTS} "${NB_UID}:${NB_GID}" "/home/${NB_USER}" fi if [ -n "${CHOWN_EXTRA}" ]; then for extra_dir in $(echo "${CHOWN_EXTRA}" | tr ',' ' '); do _log "Ensuring ${extra_dir} is owned by ${NB_UID}:${NB_GID} ${CHOWN_EXTRA_OPTS:+(chown options: ${CHOWN_EXTRA_OPTS})}" # shellcheck disable=SC2086 chown ${CHOWN_EXTRA_OPTS} "${NB_UID}:${NB_GID}" "${extra_dir}" done fi # Prepend ${CONDA_DIR}/bin to sudo secure_path sed -r "s#Defaults\s+secure_path\s*=\s*\"?([^\"]+)\"?#Defaults secure_path=\"${CONDA_DIR}/bin:\1\"#" /etc/sudoers | grep secure_path > /etc/sudoers.d/path # Optionally grant passwordless sudo rights for the desired user if [[ "${GRANT_SUDO}" == "1" || "${GRANT_SUDO}" == "yes" ]]; then _log "Granting ${NB_USER} passwordless sudo rights!" echo "${NB_USER} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers.d/added-by-start-script fi # NOTE: This hook is run as the root user! # shellcheck source=images/docker-stacks-foundation/run-hooks.sh source /usr/local/bin/run-hooks.sh /usr/local/bin/before-notebook.d unset_explicit_env_vars _log "Running as ${NB_USER}:" "${cmd[@]}" if [ "${NB_USER}" = "root" ] && [ "${NB_UID}" = "$(id -u "${NB_USER}")" ] && [ "${NB_GID}" = "$(id -g "${NB_USER}")" ]; then HOME="/home/root" exec "${cmd[@]}" else exec sudo --preserve-env --set-home --user "${NB_USER}" \ LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ PATH="${PATH}" \ PYTHONPATH="${PYTHONPATH:-}" \ "${cmd[@]}" # Notes on how we ensure that the environment that this container is started # with is preserved (except vars listed in JUPYTER_ENV_VARS_TO_UNSET) when # we transition from running as root to running as NB_USER. # # - We use `sudo` to execute the command as NB_USER. What then # happens to the environment will be determined by configuration in # /etc/sudoers and /etc/sudoers.d/* as well as flags we pass to the sudo # command. The behavior can be inspected with `sudo -V` run as root. # # ref: `man sudo` https://linux.die.net/man/8/sudo # ref: `man sudoers` https://www.sudo.ws/docs/man/sudoers.man/ # # - We use the `--preserve-env` flag to pass through most environment # variables, but understand that exceptions are caused by the sudoers # configuration: `env_delete` and `env_check`. # # - We use the `--set-home` flag to set the HOME variable appropriately. # # - To reduce the default list of variables deleted by sudo, we could have # used `env_delete` from /etc/sudoers. It has a higher priority than the # `--preserve-env` flag and the `env_keep` configuration. # # - We preserve LD_LIBRARY_PATH, PATH and PYTHONPATH explicitly. Note however that sudo # resolves `${cmd[@]}` using the "secure_path" variable we modified # above in /etc/sudoers.d/path. Thus PATH is irrelevant to how the above # sudo command resolves the path of `${cmd[@]}`. The PATH will be relevant # for resolving paths of any subprocesses spawned by `${cmd[@]}`. fi # The container didn't start as the root user, so we will have to act as the # user we started as. else # Warn about misconfiguration of: granting sudo rights if [[ "${GRANT_SUDO}" == "1" || "${GRANT_SUDO}" == "yes" ]]; then _log "WARNING: container must be started as root to grant sudo permissions!" fi JOVYAN_UID="$(id -u jovyan 2>/dev/null)" # The default UID for the jovyan user JOVYAN_GID="$(id -g jovyan 2>/dev/null)" # The default GID for the jovyan user # Attempt to ensure the user uid we currently run as has a named entry in # the /etc/passwd file, as it avoids software crashing on hard assumptions # on such entry. Writing to the /etc/passwd was allowed for the root group # from the Dockerfile during the build. # # ref: https://github.com/jupyter/docker-stacks/issues/552 if ! whoami &> /dev/null; then _log "There is no entry in /etc/passwd for our UID=$(id -u). Attempting to fix..." if [[ -w /etc/passwd ]]; then _log "Renaming old jovyan user to nayvoj ($(id -u jovyan):$(id -g jovyan))" # We cannot use "sed --in-place" since sed tries to create a temp file in # /etc/ and we may not have write access. Apply sed on our own temp file: sed --expression="s/^jovyan:/nayvoj:/" /etc/passwd > /tmp/passwd echo "${NB_USER}:x:$(id -u):$(id -g):,,,:/home/jovyan:/bin/bash" >> /tmp/passwd cat /tmp/passwd > /etc/passwd rm /tmp/passwd _log "Added new ${NB_USER} user ($(id -u):$(id -g)). Fixed UID!" if [[ "${NB_USER}" != "jovyan" ]]; then _log "WARNING: user is ${NB_USER} but home is /home/jovyan. You must run as root to rename the home directory!" fi else _log "WARNING: unable to fix missing /etc/passwd entry because we don't have write permission. Try setting gid=0 with \"--user=$(id -u):0\"." fi fi # Warn about misconfiguration of: desired username, user id, or group id. # A misconfiguration occurs when the user modifies the default values of # NB_USER, NB_UID, or NB_GID, but we cannot update those values because we # are not root. if [[ "${NB_USER}" != "jovyan" && "${NB_USER}" != "$(id -un)" ]]; then _log "WARNING: container must be started as root to change the desired user's name with NB_USER=\"${NB_USER}\"!" fi if [[ "${NB_UID}" != "${JOVYAN_UID}" && "${NB_UID}" != "$(id -u)" ]]; then _log "WARNING: container must be started as root to change the desired user's id with NB_UID=\"${NB_UID}\"!" fi if [[ "${NB_GID}" != "${JOVYAN_GID}" && "${NB_GID}" != "$(id -g)" ]]; then _log "WARNING: container must be started as root to change the desired user's group id with NB_GID=\"${NB_GID}\"!" fi # Warn if the user isn't able to write files to ${HOME} if [[ ! -w /home/jovyan ]]; then _log "WARNING: no write access to /home/jovyan. Try starting the container with group 'users' (100), e.g. using \"--group-add=users\"." fi # NOTE: This hook is run as the user we started the container as! # shellcheck source=images/docker-stacks-foundation/run-hooks.sh source /usr/local/bin/run-hooks.sh /usr/local/bin/before-notebook.d unset_explicit_env_vars _log "Executing the command:" "${cmd[@]}" exec "${cmd[@]}" fi ================================================ FILE: images/julia-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/julia-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/minimal-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # Julia dependencies # install Julia packages in /opt/julia instead of ${HOME} ENV JULIA_DEPOT_PATH=/opt/julia \ JULIA_PKGDIR=/opt/julia # Setup Julia RUN /opt/setup-scripts/setup_julia.py # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 # hadolint ignore=DL3059 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # Setup IJulia kernel & other packages RUN /opt/setup-scripts/setup-julia-packages.bash ================================================ FILE: images/julia-notebook/README.md ================================================ # Jupyter Notebook Julia Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/julia-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-julia-notebook) ================================================ FILE: images/minimal-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/minimal-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/base-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # Install all OS dependencies for a fully functional Server RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ # Common useful utilities curl \ git \ nano-tiny \ tzdata \ unzip \ vim-tiny \ # git-over-ssh openssh-client \ # `less` is needed to run help in R # see: https://github.com/jupyter/docker-stacks/issues/1588 less \ # `nbconvert` dependencies # https://nbconvert.readthedocs.io/en/latest/install.html#installing-tex texlive-xetex \ texlive-fonts-recommended \ texlive-plain-generic \ # Enable clipboard on Linux host systems xclip && \ apt-get clean && rm -rf /var/lib/apt/lists/* # Create alternative for nano -> nano-tiny RUN update-alternatives --install /usr/bin/nano nano /bin/nano-tiny 10 # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 # hadolint ignore=DL3059 RUN rm -rf "/home/${NB_USER}/.cache/" # Switch back to jovyan to avoid accidental container runs as root USER ${NB_UID} # Add an R mimetype option to specify how the plot returns from R to the browser COPY --chown=${NB_UID}:${NB_GID} Rprofile.site /opt/conda/lib/R/etc/ # Add setup scripts that may be used by downstream images or inherited images COPY setup-scripts/ /opt/setup-scripts/ ================================================ FILE: images/minimal-notebook/README.md ================================================ # Minimal Jupyter Notebook Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/minimal-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-minimal-notebook) ================================================ FILE: images/minimal-notebook/Rprofile.site ================================================ # Add R mimetype to specify how the plot returns from R to the browser. # https://notebook.community/andrie/jupyter-notebook-samples/Changing%20R%20plot%20options%20in%20Jupyter options(jupyter.plot_mimetypes = c('text/plain', 'image/png', 'image/jpeg', 'image/svg+xml', 'application/pdf')) ================================================ FILE: images/minimal-notebook/setup-scripts/activate_notebook_custom_env.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import json import os import sys from pathlib import Path env_name = sys.argv[1] CONDA_DIR = os.environ["CONDA_DIR"] file = Path.home() / f".local/share/jupyter/kernels/{env_name}/kernel.json" content = json.loads(file.read_text()) content["env"] = { "XML_CATALOG_FILES": "", "PATH": f"{CONDA_DIR}/envs/{env_name}/bin:$PATH", "CONDA_PREFIX": f"{CONDA_DIR}/envs/{env_name}", "CONDA_PROMPT_MODIFIER": f"({env_name}) ", "CONDA_SHLVL": "2", "CONDA_DEFAULT_ENV": env_name, "CONDA_PREFIX_1": CONDA_DIR, } file.write_text(json.dumps(content, indent=1)) ================================================ FILE: images/minimal-notebook/setup-scripts/setup-julia-packages.bash ================================================ #!/bin/bash set -exuo pipefail # Requirements: # - Run as a non-root user # - The JULIA_PKGDIR environment variable is set # - Julia is already set up, with the setup_julia.py command # If we don't specify what CPUs the precompilation should be done for, it's # *only* done for the target of the host doing the compilation. When the # container runs on a host that's the same architecture, but a *different* # generation of CPU than what the build host was, the precompilation is useless # and Julia takes a long long time to start up. This specific multitarget comes # from https://github.com/JuliaCI/julia-buildkite/blob/9f354745a1f2bf31a5952462aa1ff2d869507cb8/utilities/build_envs.sh#L20-L82, # and may need to be updated as new CPU generations come out. # If the architecture the container runs on is different, # precompilation may still have to be re-done on first startup - but this # *should* catch most of the issues. See # https://github.com/jupyter/docker-stacks/issues/2015 for more information if [ "$(uname -m)" == "x86_64" ]; then # See https://github.com/JuliaCI/julia-buildkite/blob/9f354745a1f2bf31a5952462aa1ff2d869507cb8/utilities/build_envs.sh#L23 # for an explanation of these options export JULIA_CPU_TARGET="generic;sandybridge,-xsaveopt,clone_all;haswell,-rdrnd,base(1);x86-64-v4,-rdrnd,base(1)" elif [ "$(uname -m)" == "aarch64" ]; then # See https://github.com/JuliaCI/julia-buildkite/blob/9f354745a1f2bf31a5952462aa1ff2d869507cb8/utilities/build_envs.sh#L56 # for an explanation of these options export JULIA_CPU_TARGET="generic;cortex-a57;thunderx2t99;carmel,clone_all;apple-m1,base(3);neoverse-512tvb,base(3)" fi # Install base Julia packages julia -e ' import Pkg; Pkg.update(); Pkg.add([ "HDF5", "IJulia", "Pluto" ]); Pkg.precompile(); ' # Move the kernelspec out of ${HOME} to the system share location. # Avoids problems with runtime UID change not taking effect properly # on the .local folder in the jovyan home dir. mv "${HOME}/.local/share/jupyter/kernels/julia"* "${CONDA_DIR}/share/jupyter/kernels/" chmod -R go+rx "${CONDA_DIR}/share/jupyter" rm -rf "${HOME}/.local" fix-permissions "${JULIA_PKGDIR}" "${CONDA_DIR}/share/jupyter" # Install jupyter-pluto-proxy to get Pluto to work on JupyterHub mamba install --yes \ 'jupyter-pluto-proxy' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: images/minimal-notebook/setup-scripts/setup_julia.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Requirements: # - Run as the root user # - The JULIA_PKGDIR environment variable is set import logging import os import platform import shutil import subprocess from pathlib import Path import requests LOGGER = logging.getLogger(__name__) def unify_aarch64(platform: str) -> str: """ Renames arm64->aarch64 to support local builds on aarch64 Macs """ return {"arm64": "aarch64"}.get(platform, platform) def get_latest_julia_url() -> tuple[str, str]: """ Get the last stable version of Julia Based on: https://github.com/JuliaLang/www.julialang.org/issues/878#issuecomment-749234813 """ LOGGER.info("Downloading Julia versions information") versions = requests.get( "https://julialang-s3.julialang.org/bin/versions.json" ).json() stable_versions = {k: v for k, v in versions.items() if v["stable"]} # Compare versions semantically latest_stable_version = max( stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")] ) latest_version_files = stable_versions[latest_stable_version]["files"] triplet = unify_aarch64(platform.machine()) + "-linux-gnu" file_info = [vf for vf in latest_version_files if vf["triplet"] == triplet][0] LOGGER.info(f"Latest version: {file_info['version']} url: {file_info['url']}") return file_info["url"], file_info["version"] def download_julia(julia_url: str) -> None: """ Downloads and unpacks julia The resulting julia directory is "/opt/julia-VERSION/" """ LOGGER.info("Downloading and unpacking Julia") tmp_file = Path("/tmp/julia.tar.gz") subprocess.check_call( ["curl", "--progress-bar", "--location", "--output", tmp_file, julia_url] ) shutil.unpack_archive(tmp_file, "/opt/") tmp_file.unlink() def configure_julia(julia_version: str) -> None: """ Creates /usr/local/bin/julia symlink Make Julia aware of conda libraries Creates a directory for Julia user libraries """ LOGGER.info("Configuring Julia") # Link Julia installed version to /usr/local/bin, so julia launches it subprocess.check_call( ["ln", "-fs", f"/opt/julia-{julia_version}/bin/julia", "/usr/local/bin/julia"] ) # Tell Julia where conda libraries are Path("/etc/julia").mkdir() Path("/etc/julia/juliarc.jl").write_text( f'push!(Libdl.DL_LOAD_PATH, "{os.environ["CONDA_DIR"]}/lib")\n' ) # Create JULIA_PKGDIR, where user libraries are installed JULIA_PKGDIR = Path(os.environ["JULIA_PKGDIR"]) JULIA_PKGDIR.mkdir() subprocess.check_call(["chown", os.environ["NB_USER"], JULIA_PKGDIR]) subprocess.check_call(["fix-permissions", JULIA_PKGDIR]) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) julia_url, julia_version = get_latest_julia_url() download_julia(julia_url=julia_url) configure_julia(julia_version=julia_version) ================================================ FILE: images/pyspark-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/pyspark-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/scipy-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # Spark dependencies # Default values can be overridden at build time # (ARGS are in lowercase to distinguish them from ENV) ARG openjdk_version="17" RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ "openjdk-${openjdk_version}-jre-headless" \ ca-certificates-java && \ apt-get clean && rm -rf /var/lib/apt/lists/* # If spark_version is not set, latest Spark will be installed ARG spark_version ARG hadoop_version="3" # If scala_version is not set, Spark without Scala will be installed ARG scala_version # URL to use for Spark downloads # You need to use https://archive.apache.org/dist/spark/ website if you want to download old Spark versions # But it seems to be slower, that's why we use the recommended site for download ARG spark_download_url="https://dlcdn.apache.org/spark/" ENV SPARK_HOME=/usr/local/spark ENV PATH="${PATH}:${SPARK_HOME}/bin" ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" COPY setup_spark.py /opt/setup-scripts/ # Setup Spark RUN /opt/setup-scripts/setup_spark.py \ --spark-version="${spark_version}" \ --hadoop-version="${hadoop_version}" \ --scala-version="${scala_version}" \ --spark-download-url="${spark_download_url}" # Configure IPython system-wide COPY ipython_kernel_config.py "/etc/ipython/" RUN fix-permissions "/etc/ipython/" # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 # hadolint ignore=DL3059 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # Install pyarrow # NOTE: It's important to ensure compatibility between Pandas versions. # The pandas version in this Dockerfile should match the version # on which the Pandas API for Spark is built. # To find the right version, check the pandas version being installed here: # https://github.com/apache/spark/blob//dev/infra/Dockerfile RUN mamba install --yes \ 'grpcio-status' \ 'grpcio' \ 'pandas=2.2.3' \ 'pyarrow' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" WORKDIR "${HOME}" EXPOSE 4040 ================================================ FILE: images/pyspark-notebook/README.md ================================================ # Jupyter Notebook Python, Spark Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/pyspark-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-pyspark-notebook) - [Image Specifics :: Apache Spark](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/specifics.html#apache-spark) ================================================ FILE: images/pyspark-notebook/ipython_kernel_config.py ================================================ # Configuration file for ipython-kernel. # See # With IPython >= 6.0.0, all outputs to stdout/stderr are captured. # It is the case for subprocesses and output of compiled libraries like Spark. # Those logs now both head to notebook logs and in notebooks outputs. # Logs are particularly verbose with Spark, that is why we turn them off through this flag. # # Attempt to capture and forward low-level output, e.g. produced by Extension libraries. # Default: True # type: ignore c.IPKernelApp.capture_fd_output = False # noqa: F821 ================================================ FILE: images/pyspark-notebook/setup_spark.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Requirements: # - Run as the root user # - Required env variable: SPARK_HOME import argparse import logging import os import re import subprocess from pathlib import Path import requests from bs4 import BeautifulSoup LOGGER = logging.getLogger(__name__) def get_all_refs(url: str) -> list[str]: """ Get all the references for a given webpage """ resp = requests.get(url) soup = BeautifulSoup(resp.text, "html.parser") return [a["href"] for a in soup.find_all("a", href=True)] def get_latest_spark_version() -> str: """ Returns the last version of Spark using spark archive """ LOGGER.info("Downloading Spark versions information") all_refs = get_all_refs("https://archive.apache.org/dist/spark/") LOGGER.info(f"All refs: {all_refs}") pattern = re.compile(r"^spark-(\d+\.\d+\.\d+)/$") versions = [match.group(1) for ref in all_refs if (match := pattern.match(ref))] LOGGER.info(f"Available versions: {versions}") # Compare versions semantically def version_array(ver: str) -> tuple[int, int, int, str]: # 3.5.3 -> [3, 5, 3, ""] # 4.0.0-preview2 -> [4, 0, 0, "preview2"] arr = ver.split(".") assert len(arr) == 3, arr major, minor = int(arr[0]), int(arr[1]) patch, _, preview = arr[2].partition("-") return (major, minor, int(patch), preview) latest_version = max(versions, key=lambda ver: version_array(ver)) LOGGER.info(f"Latest version: {latest_version}") return latest_version def download_spark( *, spark_version: str, hadoop_version: str, scala_version: str, spark_download_url: Path, ) -> str: """ Downloads and unpacks spark The resulting spark directory name is returned """ LOGGER.info("Downloading and unpacking Spark") spark_dir_name = f"spark-{spark_version}-bin-hadoop{hadoop_version}" if scala_version: spark_dir_name += f"-scala{scala_version}" LOGGER.info(f"Spark directory name: {spark_dir_name}") spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz" LOGGER.info(f"Spark download URL: {spark_url}") tmp_file = Path("/tmp/spark.tar.gz") subprocess.check_call( ["curl", "--progress-bar", "--location", "--output", tmp_file, spark_url] ) subprocess.check_call( [ "tar", "xzf", tmp_file, "-C", "/usr/local", "--owner", "root", "--group", "root", "--no-same-owner", ] ) tmp_file.unlink() return spark_dir_name def configure_spark(spark_dir_name: str, spark_home: Path) -> None: """ Creates a ${SPARK_HOME} symlink to a versioned spark directory Creates a 10spark-config.sh symlink to source PYTHONPATH automatically """ LOGGER.info("Configuring Spark") subprocess.check_call(["ln", "-s", f"/usr/local/{spark_dir_name}", spark_home]) # Add a link in the before_notebook hook in order to source PYTHONPATH automatically CONFIG_SCRIPT = "/usr/local/bin/before-notebook.d/10spark-config.sh" subprocess.check_call( ["ln", "-s", spark_home / "sbin/spark-config.sh", CONFIG_SCRIPT] ) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) arg_parser = argparse.ArgumentParser() arg_parser.add_argument("--spark-version", required=True) arg_parser.add_argument("--hadoop-version", required=True) arg_parser.add_argument("--scala-version", required=True) arg_parser.add_argument("--spark-download-url", type=Path, required=True) args = arg_parser.parse_args() args.spark_version = args.spark_version or get_latest_spark_version() spark_dir_name = download_spark( spark_version=args.spark_version, hadoop_version=args.hadoop_version, scala_version=args.scala_version, spark_download_url=args.spark_download_url, ) configure_spark( spark_dir_name=spark_dir_name, spark_home=Path(os.environ["SPARK_HOME"]) ) ================================================ FILE: images/pytorch-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/pytorch-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/scipy-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Install PyTorch with pip (https://pytorch.org/get-started/locally/) # hadolint ignore=DL3013 RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' \ 'torch' \ 'torchaudio' \ 'torchvision' && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: images/pytorch-notebook/README.md ================================================ # Jupyter Notebook PyTorch Deep Learning Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/pytorch-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-pytorch-notebook) ================================================ FILE: images/pytorch-notebook/cuda12/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/scipy-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Install PyTorch with pip (https://pytorch.org/get-started/locally/) # hadolint ignore=DL3013 RUN pip install --no-cache-dir --extra-index-url=https://pypi.nvidia.com --index-url 'https://download.pytorch.org/whl/cu128' \ 'torch' \ 'torchaudio' \ 'torchvision' && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#dockerfiles ENV NVIDIA_VISIBLE_DEVICES="all" \ NVIDIA_DRIVER_CAPABILITIES="compute,utility" # Puts the nvidia-smi binary (system management interface) on path # with associated library files to execute it ENV PATH="${PATH}:/usr/local/nvidia/bin" \ LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib64" ================================================ FILE: images/pytorch-notebook/cuda13/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/scipy-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Install PyTorch with pip (https://pytorch.org/get-started/locally/) # hadolint ignore=DL3013 RUN pip install --no-cache-dir --extra-index-url=https://pypi.nvidia.com --index-url 'https://download.pytorch.org/whl/cu130' \ 'torch' \ 'torchaudio' \ 'torchvision' && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#dockerfiles ENV NVIDIA_VISIBLE_DEVICES="all" \ NVIDIA_DRIVER_CAPABILITIES="compute,utility" # Puts the nvidia-smi binary (system management interface) on path # with associated library files to execute it ENV PATH="${PATH}:/usr/local/nvidia/bin" \ LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib64" ================================================ FILE: images/r-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/r-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/minimal-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root # R pre-requisites RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ fonts-dejavu \ unixodbc \ unixodbc-dev \ r-cran-rodbc \ gfortran \ gcc && \ apt-get clean && rm -rf /var/lib/apt/lists/* # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # R packages including IRKernel which gets installed globally. # r-e1071: dependency of the caret R package RUN mamba install --yes \ 'r-base' \ 'r-caret' \ 'r-crayon' \ 'r-devtools' \ 'r-e1071' \ 'r-forecast' \ 'r-hexbin' \ 'r-htmltools' \ 'r-htmlwidgets' \ 'r-irkernel' \ 'r-nycflights13' \ 'r-randomforest' \ 'r-rcurl' \ 'r-rmarkdown' \ 'r-rodbc' \ 'r-rsqlite' \ 'r-shiny' \ 'r-tidymodels' \ 'r-tidyverse' \ 'unixodbc' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" ================================================ FILE: images/r-notebook/README.md ================================================ # Jupyter Notebook R Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/r-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-r-notebook) ================================================ FILE: images/scipy-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/scipy-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/minimal-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] USER root RUN apt-get update --yes && \ apt-get install --yes --no-install-recommends \ # for cython: https://cython.readthedocs.io/en/latest/src/quickstart/install.html build-essential \ # for latex labels cm-super \ dvipng \ # for matplotlib anim ffmpeg && \ apt-get clean && rm -rf /var/lib/apt/lists/* # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} # Install Python 3 packages RUN mamba install --yes \ 'altair' \ 'beautifulsoup4' \ 'bokeh' \ 'bottleneck' \ 'cloudpickle' \ 'conda-forge::blas=*=openblas' \ 'cython' \ 'dask' \ 'dill' \ 'h5py' \ 'ipympl' \ 'ipywidgets' \ 'jupyterlab-git' \ 'matplotlib-base' \ 'numba' \ 'numexpr' \ 'openpyxl' \ 'pandas' \ 'patsy' \ 'protobuf' \ 'pytables' \ 'scikit-image' \ 'scikit-learn' \ 'scipy' \ 'seaborn' \ 'sqlalchemy' \ 'statsmodels' \ 'sympy' \ 'widgetsnbextension' \ 'xlrd' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Import matplotlib the first time to build the font cache RUN MPLBACKEND=Agg python -c "import matplotlib.pyplot" && \ fix-permissions "/home/${NB_USER}" # macOS Rosetta virtualization creates junk directory which gets owned by root further up. # It'll get re-created, but as USER runner after the next directive so hopefully should not cause permission issues. # # More info: https://github.com/jupyter/docker-stacks/issues/2296 RUN rm -rf "/home/${NB_USER}/.cache/" USER ${NB_UID} WORKDIR "${HOME}" ================================================ FILE: images/scipy-notebook/README.md ================================================ # Jupyter Notebook Scientific Python Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/scipy-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-scipy-notebook) ================================================ FILE: images/tensorflow-notebook/.dockerignore ================================================ # Documentation README.md ================================================ FILE: images/tensorflow-notebook/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/scipy-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Pin protobuf version for tensorflow 2.20 to avoid user warning. # The fix can be removed once tensorflow is built with newer version of protobuf: # https://github.com/tensorflow/tensorflow/issues/98980 RUN mamba install --yes \ 'jupyter-server-proxy' \ 'protobuf>=5.28.3,<6' && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Install tensorflow with pip, on x86_64 tensorflow-cpu RUN [[ $(uname -m) = x86_64 ]] && TF_POSTFIX="-cpu" || TF_POSTFIX="" && \ pip install --no-cache-dir \ "tensorflow${TF_POSTFIX}" && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" COPY --chown="${NB_UID}:${NB_GID}" cuda/20tensorboard-proxy-env.sh /usr/local/bin/before-notebook.d/ ================================================ FILE: images/tensorflow-notebook/README.md ================================================ # Jupyter Notebook TensorFlow Deep Learning Stack GitHub Actions in the project builds and pushes this image to the Registry. Please visit the project documentation site for help to use and contribute to this image and others. - [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) - [Selecting an Image :: Core Stacks :: jupyter/tensorflow-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-tensorflow-notebook) - [Image Specifics :: Tensorflow](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/specifics.html#tensorflow) ================================================ FILE: images/tensorflow-notebook/cuda/20tensorboard-proxy-env.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. set -e # Initialize the TENSORBOARD_PROXY_URL with the appropriate path # to use jupyter-server-proxy. export TENSORBOARD_PROXY_URL="${JUPYTERHUB_SERVICE_PREFIX:-/}proxy/%PORT%/" ================================================ FILE: images/tensorflow-notebook/cuda/Dockerfile ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. ARG REGISTRY=quay.io ARG OWNER=jupyter ARG BASE_IMAGE=$REGISTRY/$OWNER/scipy-notebook FROM $BASE_IMAGE LABEL maintainer="Jupyter Project " # Fix: https://github.com/hadolint/hadolint/wiki/DL4006 # Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Pin protobuf version for tensorflow 2.20 to avoid user warning. # The fix can be removed once tensorflow is built with newer version of protobuf: # https://github.com/tensorflow/tensorflow/issues/98980 RUN mamba install --yes \ 'jupyter-server-proxy' \ "protobuf>=5.28.3,<6" && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" # Install TensorFlow, CUDA and cuDNN with pip RUN pip install --no-cache-dir \ 'tensorflow[and-cuda]' && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${NB_USER}" COPY --chown="${NB_UID}:${NB_GID}" 20tensorboard-proxy-env.sh /usr/local/bin/before-notebook.d/ # workaround for https://github.com/tensorflow/tensorflow/issues/63362 RUN mkdir -p "${CONDA_DIR}/etc/conda/activate.d/" && \ fix-permissions "${CONDA_DIR}" COPY --chown="${NB_UID}:${NB_GID}" nvidia-lib-dirs.sh "${CONDA_DIR}/etc/conda/activate.d/" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#dockerfiles ENV NVIDIA_VISIBLE_DEVICES="all" \ NVIDIA_DRIVER_CAPABILITIES="compute,utility" # Puts the nvidia-smi binary (system management interface) on path # with associated library files to execute it ENV PATH="${PATH}:/usr/local/nvidia/bin" \ LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib64" ================================================ FILE: images/tensorflow-notebook/cuda/nvidia-lib-dirs.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # This adds NVIDIA Python package libraries to the LD_LIBRARY_PATH. # Workaround for https://github.com/tensorflow/tensorflow/issues/63362 NVIDIA_DIR=$(dirname "$(python -c 'import nvidia;print(nvidia.__file__)')") LD_LIBRARY_PATH=$(echo "${NVIDIA_DIR}"/*/lib/ | sed -r 's/\s+/:/g')${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} export LD_LIBRARY_PATH ================================================ FILE: mypy.ini ================================================ # Mypy is an optional static type checker for Python that aims to combine # the benefits of dynamic (or "duck") typing and static typing. # # Documentation: https://www.mypy-lang.org # Project: https://github.com/python/mypy # Config reference: https://mypy.readthedocs.io/en/stable/config_file.html # # We use mypy as part of pre-commit checks [mypy] python_version = 3.12 follow_imports = error strict = True no_incremental = True # This allows us to use pytest decorators, which are not typed yet disallow_untyped_decorators = False # These sections allow us to ignore mypy errors for packages # which are not (hopefully yet) statically typed [mypy-Cython.*] ignore_missing_imports = True [mypy-docker.*] ignore_missing_imports = True [mypy-matplotlib.*] ignore_missing_imports = True [mypy-pandas.*] ignore_missing_imports = True [mypy-plumbum.*] ignore_missing_imports = True [mypy-pyspark.*] ignore_missing_imports = True [mypy-setuptools.*] ignore_missing_imports = True [mypy-tensorflow.*] ignore_missing_imports = True [mypy-torch.*] ignore_missing_imports = True ================================================ FILE: requirements-dev.txt ================================================ docker plumbum pre-commit pytest pytest-rerunfailures # `pytest-xdist` is a plugin that provides the `--numprocesses` flag, # allowing us to run `pytest` tests in parallel pytest-xdist python-dateutil requests tabulate tenacity ================================================ FILE: tagging/README.md ================================================ # Docker stacks tagging and manifest creation Please, refer to the [tagging section of documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/maintaining/tagging.html) to see how tags and manifests are created. ================================================ FILE: tagging/__init__.py ================================================ ================================================ FILE: tagging/apps/__init__.py ================================================ ================================================ FILE: tagging/apps/apply_tags.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import plumbum from tagging.apps.common_cli_arguments import common_arguments_parser from tagging.apps.config import Config from tagging.utils.get_prefix import get_file_prefix_for_platform docker = plumbum.local["docker"] LOGGER = logging.getLogger(__name__) def apply_tags(config: Config) -> None: LOGGER.info(f"Tagging image: {config.image}") file_prefix = get_file_prefix_for_platform( platform=config.platform, variant=config.variant ) filename = f"{file_prefix}-{config.image}.txt" tags = (config.tags_dir / filename).read_text().splitlines() for tag in tags: LOGGER.info(f"Applying tag: {tag}") docker["tag", config.full_image(), tag] & plumbum.FG LOGGER.info(f"All tags applied to image: {config.image}") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) config = common_arguments_parser( registry=True, owner=True, image=True, variant=True, platform=True, tags_dir=True, ) apply_tags(config) ================================================ FILE: tagging/apps/common_cli_arguments.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import argparse from pathlib import Path from tagging.apps.config import Config from tagging.utils.get_platform import unify_aarch64 def common_arguments_parser( *, registry: bool = False, owner: bool = False, image: bool = False, variant: bool = False, platform: bool = False, tags_dir: bool = False, hist_lines_dir: bool = False, manifests_dir: bool = False, repository: bool = False, ) -> Config: """Add common CLI arguments to parser""" parser = argparse.ArgumentParser() if registry: parser.add_argument( "--registry", required=True, choices=["docker.io", "quay.io"], help="Image registry", ) if owner: parser.add_argument( "--owner", required=True, help="Owner of the image", ) if image: parser.add_argument( "--image", required=True, help="Short image name", ) if variant: parser.add_argument( "--variant", required=True, help="Variant tag prefix", ) if platform: parser.add_argument( "--platform", required=True, type=str, choices=["x86_64", "aarch64", "arm64"], help="Image platform", ) if tags_dir: parser.add_argument( "--tags-dir", required=True, type=Path, help="Directory for tags file", ) if hist_lines_dir: parser.add_argument( "--hist-lines-dir", required=True, type=Path, help="Directory for hist_lines file", ) if manifests_dir: parser.add_argument( "--manifests-dir", required=True, type=Path, help="Directory for manifests file", ) if repository: parser.add_argument( "--repository", required=True, help="Repository name on GitHub", ) args = parser.parse_args() if platform: args.platform = unify_aarch64(args.platform) return Config(**vars(args)) ================================================ FILE: tagging/apps/config.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from dataclasses import dataclass from pathlib import Path @dataclass(frozen=True) class Config: registry: str = "" owner: str = "" image: str = "" variant: str = "" platform: str = "" tags_dir: Path = Path() hist_lines_dir: Path = Path() manifests_dir: Path = Path() repository: str = "" def full_image(self) -> str: return f"{self.registry}/{self.owner}/{self.image}" ================================================ FILE: tagging/apps/merge_tags.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import os import plumbum from tenacity import ( # type: ignore RetryError, retry, stop_after_attempt, wait_exponential, ) from tagging.apps.common_cli_arguments import common_arguments_parser from tagging.apps.config import Config from tagging.utils.get_platform import ALL_PLATFORMS from tagging.utils.get_prefix import get_file_prefix_for_platform docker = plumbum.local["docker"] LOGGER = logging.getLogger(__name__) def read_local_tags_from_files(config: Config) -> set[str]: LOGGER.info(f"Read tags from file(s) for image: {config.image}") merged_local_tags = set() for platform in ALL_PLATFORMS: LOGGER.info(f"Reading tags for platform: {platform}") file_prefix = get_file_prefix_for_platform( platform=platform, variant=config.variant ) filename = f"{file_prefix}-{config.image}.txt" path = config.tags_dir / filename if not path.exists(): LOGGER.info(f"Tag file: {path} doesn't exist") continue LOGGER.info(f"Tag file: {path} found") for tag in path.read_text().splitlines(): merged_local_tags.add(tag.replace(platform + "-", "")) LOGGER.info(f"Tags read for image: {config.image}") return merged_local_tags @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4)) def inspect_manifest(tag: str) -> None: LOGGER.info(f"Inspecting manifest for tag: {tag}") docker["buildx", "imagetools", "inspect", tag] & plumbum.FG LOGGER.info(f"Manifest {tag} exists") def find_platform_tags(merged_tag: str) -> list[str]: platform_tags = [] for platform in ALL_PLATFORMS: platform_tag = merged_tag.replace(":", f":{platform}-") LOGGER.warning(f"Trying to inspect: {platform_tag} in the registry") try: inspect_manifest(platform_tag) platform_tags.append(platform_tag) LOGGER.info(f"Tag {platform_tag} found successfully") except RetryError: LOGGER.warning(f"Manifest for tag {platform_tag} doesn't exist") return platform_tags def merge_tags(merged_tag: str, push_to_registry: bool) -> None: LOGGER.info(f"Trying to merge tag: {merged_tag}") platform_tags = find_platform_tags(merged_tag) if not platform_tags: assert not push_to_registry, ( f"No platform tags found for merged tag: {merged_tag}, " "and push to registry is enabled. " "Cannot create a manifest for a non-existing image." ) LOGGER.info( f"Not running merge for tag: {merged_tag} as no platform tags found" ) return args = [ "buildx", "imagetools", "create", *platform_tags, "--tag", merged_tag, ] if not push_to_registry: args.append("--dry-run") LOGGER.info(f"Running command: {' '.join(args)}") docker[args] & plumbum.FG if push_to_registry: LOGGER.info(f"Pushed merged tag: {merged_tag}") else: LOGGER.info(f"Skipped push for tag: {merged_tag}") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) config = common_arguments_parser(image=True, variant=True, tags_dir=True) push_to_registry = os.environ.get("PUSH_TO_REGISTRY", "false").lower() == "true" LOGGER.info(f"Merging tags for image: {config.image}") merged_local_tags = read_local_tags_from_files(config) for tag in merged_local_tags: merge_tags(tag, push_to_registry) LOGGER.info(f"Successfully merged tags for image: {config.image}") ================================================ FILE: tagging/apps/write_manifest.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import datetime import logging from docker.models.containers import Container from tagging.apps.common_cli_arguments import common_arguments_parser from tagging.apps.config import Config from tagging.hierarchy.get_manifests import get_manifests from tagging.hierarchy.get_taggers import get_taggers from tagging.manifests.build_info import BuildInfoConfig, build_info_manifest from tagging.utils.docker_runner import DockerRunner from tagging.utils.get_prefix import get_file_prefix, get_tag_prefix from tagging.utils.git_helper import GitHelper LOGGER = logging.getLogger(__name__) # We use a manifest creation timestamp, which happens right after a build BUILD_TIMESTAMP = datetime.datetime.now(datetime.UTC).isoformat()[:-13] + "Z" MARKDOWN_LINE_BREAK = "
" def get_build_history_line(config: Config, container: Container, filename: str) -> str: LOGGER.info(f"Calculating build history line for image: {config.image}") taggers = get_taggers(config.image) tags_prefix = get_tag_prefix(config.variant) all_tags = [tags_prefix + "-" + tagger(container) for tagger in taggers] date_column = f"`{BUILD_TIMESTAMP}`" image_column = MARKDOWN_LINE_BREAK.join( f"`{config.full_image()}:{tag_value}`" for tag_value in all_tags ) commit_hash = GitHelper.commit_hash() links_column = MARKDOWN_LINE_BREAK.join( [ f"[Git diff](https://github.com/{config.repository}/commit/{commit_hash})", f"[Dockerfile](https://github.com/{config.repository}/blob/{commit_hash}/images/{config.image}/Dockerfile)", f"[Build manifest](./{filename})", ] ) build_history_line = f"| {date_column} | {image_column} | {links_column} |" LOGGER.info(f"Build history line calculated for image: {config.image}") return build_history_line def write_build_history_line( config: Config, container: Container, filename: str ) -> None: LOGGER.info(f"Writing tags for image: {config.image}") path = config.hist_lines_dir / f"{filename}.txt" path.parent.mkdir(parents=True, exist_ok=True) build_history_line = get_build_history_line(config, container, filename) path.write_text(build_history_line) LOGGER.info(f"Build history line written to: {path}") def get_manifest(config: Config, container: Container, commit_hash_tag: str) -> str: LOGGER.info(f"Calculating manifest file for image: {config.image}") manifests = get_manifests(config.image) manifest_names = [manifest.__name__ for manifest in manifests] LOGGER.info(f"Using manifests: {manifest_names}") build_info_config = BuildInfoConfig( registry=config.registry, owner=config.owner, image=config.image, repository=config.repository, build_timestamp=BUILD_TIMESTAMP, ) markdown_pieces = [ f"# Build manifest for image: {config.image}:{commit_hash_tag}", build_info_manifest(build_info_config).get_str(), *(manifest(container).get_str() for manifest in manifests), ] markdown_content = "\n\n".join(markdown_pieces) + "\n" LOGGER.info(f"Manifest file calculated for image: {config.image}") return markdown_content def write_manifest( config: Config, container: Container, *, filename: str, commit_hash_tag: str ) -> None: LOGGER.info(f"Writing manifest file for image: {config.image}") path = config.manifests_dir / f"{filename}.md" path.parent.mkdir(parents=True, exist_ok=True) manifest = get_manifest(config, container, commit_hash_tag) path.write_text(manifest) LOGGER.info(f"Manifest file wrtitten to: {path}") def write_all(config: Config) -> None: LOGGER.info(f"Writing all files for image: {config.image}") file_prefix = get_file_prefix(config.variant) commit_hash_tag = GitHelper.commit_hash_tag() filename = f"{file_prefix}-{config.image}-{commit_hash_tag}" with DockerRunner(config.full_image()) as container: write_build_history_line(config, container, filename) write_manifest( config, container, filename=filename, commit_hash_tag=commit_hash_tag ) LOGGER.info(f"All files written for image: {config.image}") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) LOGGER.info(f"Current build timestamp: {BUILD_TIMESTAMP}") config = common_arguments_parser( registry=True, owner=True, image=True, variant=True, hist_lines_dir=True, manifests_dir=True, repository=True, ) write_all(config) ================================================ FILE: tagging/apps/write_tags_file.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from tagging.apps.common_cli_arguments import common_arguments_parser from tagging.apps.config import Config from tagging.hierarchy.get_taggers import get_taggers from tagging.utils.docker_runner import DockerRunner from tagging.utils.get_prefix import get_file_prefix, get_tag_prefix LOGGER = logging.getLogger(__name__) def get_tags(config: Config) -> list[str]: LOGGER.info(f"Calculating tags for image: {config.image}") taggers = get_taggers(config.image) tags_prefix = get_tag_prefix(config.variant) tags = [f"{config.full_image()}:{tags_prefix}-latest"] with DockerRunner(config.full_image()) as container: for tagger in taggers: tagger_name = tagger.__name__ tag_value = tagger(container) LOGGER.info( f"Calculated tag, tagger_name: {tagger_name} tag_value: {tag_value}" ) tags.append(f"{config.full_image()}:{tags_prefix}-{tag_value}") LOGGER.info(f"Tags calculated for image: {config.image}") return tags def write_tags_file(config: Config) -> None: LOGGER.info(f"Writing tags for image: {config.image}") file_prefix = get_file_prefix(config.variant) filename = f"{file_prefix}-{config.image}.txt" path = config.tags_dir / filename path.parent.mkdir(parents=True, exist_ok=True) tags = get_tags(config) path.write_text("\n".join(tags)) LOGGER.info(f"Tags wrtitten to: {path}") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) config = common_arguments_parser( registry=True, owner=True, image=True, variant=True, tags_dir=True ) write_tags_file(config) ================================================ FILE: tagging/hierarchy/__init__.py ================================================ ================================================ FILE: tagging/hierarchy/get_manifests.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tagging.hierarchy.images_hierarchy import ALL_IMAGES from tagging.manifests.manifest_interface import ManifestInterface def get_manifests(image: str | None) -> list[ManifestInterface]: if image is None: return [] image_description = ALL_IMAGES[image] parent_manifests = get_manifests(image_description.parent_image) return parent_manifests + image_description.manifests ================================================ FILE: tagging/hierarchy/get_taggers.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tagging.hierarchy.images_hierarchy import ALL_IMAGES from tagging.taggers.tagger_interface import TaggerInterface def get_taggers(image: str | None) -> list[TaggerInterface]: if image is None: return [] image_description = ALL_IMAGES[image] parent_taggers = get_taggers(image_description.parent_image) return parent_taggers + image_description.taggers ================================================ FILE: tagging/hierarchy/images_hierarchy.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from dataclasses import dataclass, field from tagging.manifests.apt_packages import apt_packages_manifest from tagging.manifests.conda_environment import conda_environment_manifest from tagging.manifests.julia_packages import julia_packages_manifest from tagging.manifests.manifest_interface import ManifestInterface from tagging.manifests.r_packages import r_packages_manifest from tagging.manifests.spark_info import spark_info_manifest from tagging.taggers import versions from tagging.taggers.date import date_tagger from tagging.taggers.sha import commit_sha_tagger from tagging.taggers.tagger_interface import TaggerInterface from tagging.taggers.ubuntu_version import ubuntu_version_tagger @dataclass class ImageDescription: parent_image: str | None taggers: list[TaggerInterface] = field(default_factory=list) manifests: list[ManifestInterface] = field(default_factory=list) ALL_IMAGES = { "docker-stacks-foundation": ImageDescription( parent_image=None, taggers=[ commit_sha_tagger, date_tagger, ubuntu_version_tagger, versions.python_major_minor_tagger, versions.python_tagger, versions.mamba_tagger, versions.conda_tagger, ], manifests=[conda_environment_manifest, apt_packages_manifest], ), "base-notebook": ImageDescription( parent_image="docker-stacks-foundation", taggers=[ versions.jupyter_notebook_tagger, versions.jupyter_lab_tagger, versions.jupyter_hub_tagger, ], ), "minimal-notebook": ImageDescription(parent_image="base-notebook"), "scipy-notebook": ImageDescription(parent_image="minimal-notebook"), "r-notebook": ImageDescription( parent_image="minimal-notebook", taggers=[versions.r_tagger], manifests=[r_packages_manifest], ), "julia-notebook": ImageDescription( parent_image="minimal-notebook", taggers=[versions.julia_tagger], manifests=[julia_packages_manifest], ), "tensorflow-notebook": ImageDescription( parent_image="scipy-notebook", taggers=[versions.tensorflow_tagger] ), "pytorch-notebook": ImageDescription( parent_image="scipy-notebook", taggers=[versions.python_tagger] ), "datascience-notebook": ImageDescription( parent_image="scipy-notebook", taggers=[versions.r_tagger, versions.julia_tagger], manifests=[r_packages_manifest, julia_packages_manifest], ), "pyspark-notebook": ImageDescription( parent_image="scipy-notebook", taggers=[versions.spark_tagger, versions.java_tagger], manifests=[spark_info_manifest], ), "all-spark-notebook": ImageDescription( parent_image="pyspark-notebook", taggers=[versions.r_tagger], manifests=[r_packages_manifest], ), } ================================================ FILE: tagging/manifests/__init__.py ================================================ ================================================ FILE: tagging/manifests/apt_packages.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.manifests.manifest_interface import MarkdownPiece from tagging.utils.quoted_output import quoted_output def apt_packages_manifest(container: Container) -> MarkdownPiece: return MarkdownPiece( title="## Apt Packages", sections=[quoted_output(container, "apt list --installed")], ) ================================================ FILE: tagging/manifests/build_info.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import textwrap from dataclasses import dataclass import plumbum from tagging.manifests.manifest_interface import MarkdownPiece from tagging.utils.git_helper import GitHelper docker = plumbum.local["docker"] @dataclass(frozen=True) class BuildInfoConfig: registry: str owner: str image: str repository: str build_timestamp: str def full_image(self) -> str: return f"{self.registry}/{self.owner}/{self.image}" def build_info_manifest(config: BuildInfoConfig) -> MarkdownPiece: """BuildInfo doesn't fall under common interface, and we run it separately""" commit_hash = GitHelper.commit_hash() commit_hash_tag = GitHelper.commit_hash_tag() commit_message = GitHelper.commit_message() # Unfortunately, `docker images` doesn't work when specifying `docker.io` as registry fixed_registry = config.registry + "/" if config.registry != "docker.io" else "" image_size = docker[ "images", f"{fixed_registry}{config.owner}/{config.image}:latest", "--format", "{{.Size}}", ]().rstrip() build_info = textwrap.dedent(f"""\ - Build timestamp: {config.build_timestamp} - Docker image: `{config.full_image()}:{commit_hash_tag}` - Docker image size: {image_size} - Git commit SHA: [{commit_hash}](https://github.com/{config.repository}/commit/{commit_hash}) - Git commit message: ```text {{message}} ```""").format(message=commit_message) return MarkdownPiece(title="## Build Info", sections=[build_info]) ================================================ FILE: tagging/manifests/conda_environment.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.manifests.manifest_interface import MarkdownPiece from tagging.utils.docker_runner import DockerRunner from tagging.utils.quoted_output import quoted_output def conda_environment_manifest(container: Container) -> MarkdownPiece: return MarkdownPiece( title="## Python Packages", sections=[ DockerRunner.exec_cmd(container, "python --version"), quoted_output(container, "conda info"), quoted_output(container, "mamba info"), quoted_output(container, "mamba list"), ], ) ================================================ FILE: tagging/manifests/julia_packages.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.manifests.manifest_interface import MarkdownPiece from tagging.utils.quoted_output import quoted_output def julia_packages_manifest(container: Container) -> MarkdownPiece: return MarkdownPiece( title="## Julia Packages", sections=[ quoted_output( container, "julia -E 'using InteractiveUtils; versioninfo()'" ), quoted_output(container, "julia -E 'import Pkg; Pkg.status()'"), ], ) ================================================ FILE: tagging/manifests/manifest_interface.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from collections.abc import Callable from dataclasses import dataclass from docker.models.containers import Container @dataclass(frozen=True) class MarkdownPiece: title: str sections: list[str] def __post_init__(self) -> None: # All pieces are H2 assert self.title.startswith("## ") def get_str(self) -> str: return "\n\n".join([self.title, *self.sections]) ManifestInterface = Callable[[Container], MarkdownPiece] ================================================ FILE: tagging/manifests/r_packages.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.manifests.manifest_interface import MarkdownPiece from tagging.utils.quoted_output import quoted_output def r_packages_manifest(container: Container) -> MarkdownPiece: return MarkdownPiece( title="## R Packages", sections=[ quoted_output(container, "R --version"), quoted_output( container, "R --silent -e 'installed.packages(.Library)[, c(1,3)]'" ), ], ) ================================================ FILE: tagging/manifests/spark_info.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.manifests.manifest_interface import MarkdownPiece from tagging.utils.quoted_output import quoted_output def spark_info_manifest(container: Container) -> MarkdownPiece: return MarkdownPiece( title="## Apache Spark", sections=[ quoted_output(container, "/usr/local/spark/bin/spark-submit --version") ], ) ================================================ FILE: tagging/taggers/__init__.py ================================================ ================================================ FILE: tagging/taggers/date.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import datetime from docker.models.containers import Container def date_tagger(container: Container) -> str: return datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d") ================================================ FILE: tagging/taggers/sha.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.utils.git_helper import GitHelper def commit_sha_tagger(container: Container) -> str: return GitHelper.commit_hash_tag() ================================================ FILE: tagging/taggers/tagger_interface.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from collections.abc import Callable from docker.models.containers import Container TaggerInterface = Callable[[Container], str] ================================================ FILE: tagging/taggers/ubuntu_version.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.utils.docker_runner import DockerRunner def ubuntu_version_tagger(container: Container) -> str: os_release = DockerRunner.exec_cmd( container, "cat /etc/os-release", ).split("\n") for line in os_release: if line.startswith("VERSION_ID"): return "ubuntu-" + line.split("=")[1].strip('"') raise RuntimeError(f"did not find ubuntu version in: {os_release}") ================================================ FILE: tagging/taggers/versions.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from docker.models.containers import Container from tagging.utils.docker_runner import DockerRunner def _get_program_version(container: Container, program: str) -> str: return DockerRunner.exec_cmd(container, cmd=f"{program} --version") def _get_pip_package_version(container: Container, package: str) -> str: PIP_VERSION_PREFIX = "Version: " package_info = DockerRunner.exec_cmd( container, cmd=f"pip show {package}", ) version_line = package_info.split("\n")[1] assert version_line.startswith(PIP_VERSION_PREFIX) return version_line[len(PIP_VERSION_PREFIX) :] def python_tagger(container: Container) -> str: return "python-" + _get_program_version(container, "python").split()[1] def python_major_minor_tagger(container: Container) -> str: full_version = python_tagger(container) return full_version[: full_version.rfind(".")] def mamba_tagger(container: Container) -> str: return "mamba-" + _get_program_version(container, "mamba") def conda_tagger(container: Container) -> str: return "conda-" + _get_program_version(container, "conda").split()[1] def jupyter_notebook_tagger(container: Container) -> str: return "notebook-" + _get_program_version(container, "jupyter-notebook") def jupyter_lab_tagger(container: Container) -> str: return "lab-" + _get_program_version(container, "jupyter-lab") def jupyter_hub_tagger(container: Container) -> str: return "hub-" + _get_program_version(container, "jupyterhub") def r_tagger(container: Container) -> str: return "r-" + _get_program_version(container, "R").split()[2] def julia_tagger(container: Container) -> str: return "julia-" + _get_program_version(container, "julia").split()[2] def tensorflow_tagger(container: Container) -> str: try: return "tensorflow-" + _get_pip_package_version(container, "tensorflow") except AssertionError: return "tensorflow-" + _get_pip_package_version(container, "tensorflow-cpu") def pytorch_tagger(container: Container) -> str: return "pytorch-" + _get_pip_package_version(container, "torch").split("+")[0] def spark_tagger(container: Container) -> str: SPARK_VERSION_LINE_PREFIX = r" /___/ .__/\_,_/_/ /_/\_\ version" spark_version = _get_program_version(container, "spark-submit") version_line = next( filter( lambda line: line.startswith(SPARK_VERSION_LINE_PREFIX), spark_version.split("\n"), ) ) return "spark-" + version_line.split(" ")[-1] def java_tagger(container: Container) -> str: return "java-" + _get_program_version(container, "java").split()[1] ================================================ FILE: tagging/utils/__init__.py ================================================ ================================================ FILE: tagging/utils/docker_runner.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from types import TracebackType import docker from docker.models.containers import Container LOGGER = logging.getLogger(__name__) class DockerRunner: def __init__( self, image_name: str, docker_client: docker.DockerClient = docker.from_env(), command: str = "sleep infinity", ): self.container: Container | None = None self.image_name: str = image_name self.command: str = command self.docker_client: docker.DockerClient = docker_client def __enter__(self) -> Container: LOGGER.info(f"Creating a container for the image: {self.image_name} ...") default_kwargs = {"detach": True, "tty": True} self.container = self.docker_client.containers.run( image=self.image_name, command=self.command, **default_kwargs ) LOGGER.info(f"Container {self.container.name} created") return self.container def __exit__( self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: assert self.container is not None LOGGER.info(f"Removing container {self.container.name} ...") self.container.remove(force=True) LOGGER.info(f"Container {self.container.name} removed") @staticmethod def exec_cmd(container: Container, cmd: str) -> str: LOGGER.info(f"Running cmd: `{cmd}` on container: {container.name}") exec_result = container.exec_run(cmd) output = exec_result.output.decode().rstrip() assert isinstance(output, str) if exec_result.exit_code != 0: LOGGER.error(f"Command output:\n{output}") raise AssertionError(f"Command: `{cmd}` failed") else: LOGGER.debug(f"Command output:\n{output}") return output ================================================ FILE: tagging/utils/get_platform.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import platform ALL_PLATFORMS = {"x86_64", "aarch64"} def unify_aarch64(platform: str) -> str: """ Renames arm64->aarch64 to support local builds on aarch64 Macs """ return {"arm64": "aarch64"}.get(platform, platform) def get_platform() -> str: machine = platform.machine() return unify_aarch64(machine) ================================================ FILE: tagging/utils/get_prefix.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tagging.utils.get_platform import get_platform DEFAULT_VARIANT = "default" def get_file_prefix_for_platform(*, platform: str, variant: str) -> str: return f"{platform}-{variant}" def _get_tag_prefix_for_platform(*, platform: str, variant: str) -> str: if variant == DEFAULT_VARIANT: return platform return f"{platform}-{variant}" def get_file_prefix(variant: str) -> str: platform = get_platform() return get_file_prefix_for_platform(platform=platform, variant=variant) def get_tag_prefix(variant: str) -> str: platform = get_platform() return _get_tag_prefix_for_platform(platform=platform, variant=variant) ================================================ FILE: tagging/utils/git_helper.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import plumbum git = plumbum.local["git"] class GitHelper: @staticmethod def commit_hash() -> str: return git["rev-parse", "HEAD"]().strip() # type: ignore @staticmethod def commit_hash_tag() -> str: return GitHelper.commit_hash()[:12] @staticmethod def commit_message() -> str: return git["log", -1, "--pretty=%B"]().strip() # type: ignore if __name__ == "__main__": print("Git hash:", GitHelper.commit_hash()) print("Git message:", GitHelper.commit_message()) ================================================ FILE: tagging/utils/quoted_output.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import textwrap from docker.models.containers import Container from tagging.utils.docker_runner import DockerRunner def quoted_output(container: Container, cmd: str) -> str: cmd_output = DockerRunner.exec_cmd(container, cmd) # For example, `mamba info` adds redundant empty lines cmd_output = cmd_output.strip("\n") # For example, R packages list contains trailing backspaces cmd_output = "\n".join(line.rstrip() for line in cmd_output.split("\n")) assert cmd_output, f"Command `{cmd}` returned empty output" return textwrap.dedent(f"""\ `{cmd}`: ```text {{output}} ```""").format(output=cmd_output) ================================================ FILE: tests/README.md ================================================ # Docker stacks testing Please, refer to the [testing section of documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/tests.html) to see how the tests are run. ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/by_image/all-spark-notebook/data/local_sparkR.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "library(SparkR)\n", "\n", "# Spark session & context\n", "sc <- sparkR.session(\"local\")\n", "\n", "# Sum of the first 100 whole numbers\n", "sdf <- createDataFrame(list(1:100))\n", "dapplyCollect(sdf,\n", " function(x) \n", " { x <- sum(x)}\n", " )\n", "# 5050" ] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: tests/by_image/all-spark-notebook/data/local_sparklyr.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "library(sparklyr)\n", "\n", "# get the default config\n", "conf <- spark_config()\n", "# Set the catalog implementation in-memory\n", "conf$spark.sql.catalogImplementation <- \"in-memory\"\n", "\n", "# Spark session & context\n", "sc <- spark_connect(master = \"local\", config = conf)\n", "\n", "# Sum of the first 100 whole numbers\n", "sdf_len(sc, 100, repartition = 1) %>% \n", " spark_apply(function(e) sum(e))\n", "# 5050" ] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: tests/by_image/all-spark-notebook/test_spark_r_nbconvert.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from pathlib import Path import pytest # type: ignore from tests.shared_checks.nbconvert_check import check_nbconvert from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) THIS_DIR = Path(__file__).parent.resolve() @pytest.mark.flaky(reruns=3, reruns_delay=1) @pytest.mark.parametrize( "test_file,expected_warnings", [ ("local_sparkR", ["WARNING: Using incubator modules: jdk.incubator.vector"]), ("local_sparklyr", []), ], ) @pytest.mark.parametrize("output_format", ["pdf", "html", "markdown"]) def test_spark_r_nbconvert( container: TrackedContainer, test_file: str, output_format: str, expected_warnings: list[str], ) -> None: host_data_file = THIS_DIR / "data" / f"{test_file}.ipynb" logs = check_nbconvert( container, host_data_file, output_format, execute=True, no_warnings=(not expected_warnings), ) warnings = TrackedContainer.get_warnings(logs) assert warnings == expected_warnings ================================================ FILE: tests/by_image/base-notebook/data/check_listening.py ================================================ #!/usr/bin/env python # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import socket import time import requests def make_get_request() -> None: # Give some time for server to start finish_time = time.time() + 10 sleep_time = 1 while time.time() < finish_time: time.sleep(sleep_time) try: resp = requests.get("http://localhost:8888/api") resp.raise_for_status() except requests.RequestException: pass resp.raise_for_status() def check_addrs(family: socket.AddressFamily) -> None: assert family in {socket.AF_INET, socket.AF_INET6} # https://docs.python.org/3/library/socket.html#socket.getaddrinfo addrs = { s[4][0] for s in socket.getaddrinfo(host=socket.gethostname(), port=None, family=family) } loopback_addr = "127.0.0.1" if family == socket.AF_INET else "::1" addrs.discard(loopback_addr) assert addrs, f"No external addresses found for family: {family}" for addr in addrs: url = ( f"http://{addr}:8888/api" if family == socket.AF_INET else f"http://[{addr}]:8888/api" ) r = requests.get(url) r.raise_for_status() assert "version" in r.json() print(f"Successfully connected to: {url}") def test_connect() -> None: make_get_request() check_addrs(socket.AF_INET) check_addrs(socket.AF_INET6) if __name__ == "__main__": test_connect() ================================================ FILE: tests/by_image/base-notebook/test_container_options.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import time import pytest # type: ignore import requests from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def test_cli_args( container: TrackedContainer, http_client: requests.Session, free_host_port: int ) -> None: """Image should respect command line args (e.g., disabling token security)""" container.run_detached( command=["start-notebook.py", "--IdentityProvider.token=''"], ports={"8888/tcp": free_host_port}, ) resp = http_client.get(f"http://localhost:{free_host_port}") resp.raise_for_status() logs = container.get_logs() LOGGER.debug(logs) assert "ERROR" not in logs warnings = TrackedContainer.get_warnings(logs) assert not warnings assert "login_submit" not in resp.text def test_nb_user_change(container: TrackedContainer) -> None: """Container should change the username (`NB_USER`) of the default user.""" nb_user = "nayvoj" container.run_detached( user="root", environment=[f"NB_USER={nb_user}", "CHOWN_HOME=yes"], command=["sleep", "infinity"], ) # Give the chown time to complete. # Use sleep, not wait, because the container sleeps forever. time.sleep(5) LOGGER.info( f"Checking if a home folder of {nb_user} contains the hidden '.jupyter' folder with appropriate permissions ..." ) command = f'stat -c "%F %U %G" /home/{nb_user}/.jupyter' expected_output = f"directory {nb_user} users" output = container.exec_cmd(command, workdir=f"/home/{nb_user}") assert ( output == expected_output ), f"Hidden folder .jupyter was not copied properly to {nb_user} home folder. stat: {output}, expected {expected_output}" @pytest.mark.filterwarnings("ignore:Unverified HTTPS request") def test_unsigned_ssl( container: TrackedContainer, http_client: requests.Session, free_host_port: int ) -> None: """Container should generate a self-signed SSL certificate and Jupyter Server should use it to enable HTTPS. """ container.run_detached( environment=["GEN_CERT=yes"], ports={"8888/tcp": free_host_port}, ) # NOTE: The requests.Session backing the http_client fixture # does not retry properly while the server is booting up. # An SSL handshake error seems to abort the retry logic. # Forcing a long sleep for the moment until I have time to dig more. time.sleep(1) resp = http_client.get(f"https://localhost:{free_host_port}", verify=False) resp.raise_for_status() assert "login_submit" in resp.text logs = container.get_logs() assert "ERROR" not in logs warnings = TrackedContainer.get_warnings(logs) assert not warnings @pytest.mark.parametrize( "env", [ {}, {"JUPYTER_PORT": 1234, "DOCKER_STACKS_JUPYTER_CMD": "lab"}, {"JUPYTER_PORT": 2345, "DOCKER_STACKS_JUPYTER_CMD": "notebook"}, {"JUPYTER_PORT": 3456, "DOCKER_STACKS_JUPYTER_CMD": "server"}, {"JUPYTER_PORT": 4567, "DOCKER_STACKS_JUPYTER_CMD": "nbclassic"}, {"JUPYTER_PORT": 5678, "RESTARTABLE": "yes"}, {"JUPYTER_PORT": 6789}, {"JUPYTER_PORT": 7890, "DOCKER_STACKS_JUPYTER_CMD": "notebook"}, ], ) def test_custom_internal_port( container: TrackedContainer, http_client: requests.Session, free_host_port: int, env: dict[str, str], ) -> None: """Container should be accessible from the host when using custom internal port""" internal_port = env.get("JUPYTER_PORT", 8888) container.run_detached( command=["start-notebook.py", "--IdentityProvider.token=''"], environment=env, ports={internal_port: free_host_port}, ) resp = http_client.get(f"http://localhost:{free_host_port}") resp.raise_for_status() logs = container.get_logs() LOGGER.debug(logs) assert "ERROR" not in logs warnings = TrackedContainer.get_warnings(logs) assert not warnings ================================================ FILE: tests/by_image/base-notebook/test_healthcheck.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import time import pytest # type: ignore from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def get_healthy_status( container: TrackedContainer, *, env: list[str] | None, cmd: list[str] | None, user: str | None, ) -> str: container.run_detached( environment=env, command=cmd, user=user, ) # giving some time to let the server start finish_time = time.time() + 10 sleep_time = 1 while time.time() < finish_time: time.sleep(sleep_time) status = container.get_health() if status == "healthy": return status return status @pytest.mark.parametrize( "env,cmd,user", [ (None, None, None), (["DOCKER_STACKS_JUPYTER_CMD=lab"], None, None), (["DOCKER_STACKS_JUPYTER_CMD=notebook"], None, None), (["DOCKER_STACKS_JUPYTER_CMD=server"], None, None), (["DOCKER_STACKS_JUPYTER_CMD=nbclassic"], None, None), (["RESTARTABLE=yes"], None, None), (["JUPYTER_PORT=8171"], None, None), (["JUPYTER_PORT=8117", "DOCKER_STACKS_JUPYTER_CMD=notebook"], None, None), (None, ["start-notebook.sh"], None), (None, ["start-notebook.py", "--ServerApp.base_url=/test"], None), (None, ["start-notebook.py", "--ServerApp.base_url=/test/"], None), (["GEN_CERT=1"], ["start-notebook.py", "--ServerApp.base_url=/test"], None), ( ["GEN_CERT=1", "JUPYTER_PORT=7891"], ["start-notebook.py", "--ServerApp.base_url=/test"], None, ), (["NB_USER=testuser", "CHOWN_HOME=1"], None, "root"), ( ["NB_USER=testuser", "CHOWN_HOME=1"], ["start-notebook.py", "--ServerApp.base_url=/test"], "root", ), ( ["NB_USER=testuser", "CHOWN_HOME=1", "JUPYTER_PORT=8123"], ["start-notebook.py", "--ServerApp.base_url=/test"], "root", ), (["JUPYTER_RUNTIME_DIR=/tmp/jupyter-runtime"], ["start-notebook.sh"], None), ( [ "NB_USER=testuser", "CHOWN_HOME=1", "JUPYTER_RUNTIME_DIR=/tmp/jupyter-runtime", ], ["start-notebook.sh"], "root", ), ], ) def test_healthy( container: TrackedContainer, env: list[str] | None, cmd: list[str] | None, user: str | None, ) -> None: assert get_healthy_status(container, env=env, cmd=cmd, user=user) == "healthy" @pytest.mark.parametrize( "env,cmd,user", [ ( [ "HTTPS_PROXY=https://host.docker.internal", "HTTP_PROXY=http://host.docker.internal", ], None, None, ), ( [ "NB_USER=testuser", "CHOWN_HOME=1", "JUPYTER_PORT=8123", "HTTPS_PROXY=https://host.docker.internal", "HTTP_PROXY=http://host.docker.internal", ], ["start-notebook.py", "--ServerApp.base_url=/test"], "root", ), ], ) def test_healthy_with_proxy( container: TrackedContainer, env: list[str] | None, cmd: list[str] | None, user: str | None, ) -> None: assert get_healthy_status(container, env=env, cmd=cmd, user=user) == "healthy" @pytest.mark.parametrize( "env,cmd", [ (["NB_USER=testuser", "CHOWN_HOME=1"], None), ( ["NB_USER=testuser", "CHOWN_HOME=1"], ["start-notebook.py", "--ServerApp.base_url=/test"], ), ( ["NB_USER=testuser", "CHOWN_HOME=1", "JUPYTER_PORT=8123"], ["start-notebook.py", "--ServerApp.base_url=/test"], ), ], ) def test_not_healthy( container: TrackedContainer, env: list[str] | None, cmd: list[str] | None, ) -> None: assert ( get_healthy_status(container, env=env, cmd=cmd, user=None) != "healthy" ), "Container should not be healthy for this testcase" ================================================ FILE: tests/by_image/base-notebook/test_ips.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from collections.abc import Generator from pathlib import Path from random import randint import docker import pytest # type: ignore from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) THIS_DIR = Path(__file__).parent.resolve() @pytest.fixture(scope="session") def ipv6_network(docker_client: docker.DockerClient) -> Generator[str, None, None]: """Create a dual-stack IPv6 docker network""" # Doesn't have to be routable since we're testing inside the container subnet64 = "fc00:" + ":".join(hex(randint(0, 2**16))[2:] for _ in range(3)) name = subnet64.replace(":", "-") docker_client.networks.create( name, ipam=docker.types.IPAMPool( subnet=subnet64 + "::/64", gateway=subnet64 + "::1", ), enable_ipv6=True, internal=True, ) yield name docker_client.networks.get(name).remove() def test_ipv46(container: TrackedContainer, ipv6_network: str) -> None: """Check server is listening on the expected IP families""" file_name = "check_listening.py" host_file = THIS_DIR / "data" / file_name cont_file = f"/home/jovyan/data/{file_name}" LOGGER.info("Testing that server is listening on IPv4 and IPv6 ...") container.run_detached( network=ipv6_network, volumes={host_file: {"bind": cont_file, "mode": "ro"}}, ) container.exec_cmd(f"python {cont_file}") ================================================ FILE: tests/by_image/base-notebook/test_notebook.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import requests from tests.utils.tracked_container import TrackedContainer def test_secured_server( container: TrackedContainer, http_client: requests.Session, free_host_port: int ) -> None: """Jupyter Server should eventually request user login.""" container.run_detached(ports={"8888/tcp": free_host_port}) resp = http_client.get(f"http://localhost:{free_host_port}") resp.raise_for_status() assert "login_submit" in resp.text, "User login not requested" ================================================ FILE: tests/by_image/base-notebook/test_pandoc.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def test_pandoc(container: TrackedContainer) -> None: """Pandoc shall be able to convert MD to HTML.""" logs = container.run_and_wait( timeout=10, command=["bash", "-c", 'echo "**BOLD**" | pandoc'], ) assert "

BOLD

" in logs ================================================ FILE: tests/by_image/base-notebook/test_start_container.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import time import pytest # type: ignore import requests from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) @pytest.mark.parametrize( "env,expected_command,expected_start,expected_warnings", [ (None, "jupyter lab", True, []), (["DOCKER_STACKS_JUPYTER_CMD=lab"], "jupyter lab", True, []), (["RESTARTABLE=yes"], "run-one-constantly jupyter lab", True, []), (["DOCKER_STACKS_JUPYTER_CMD=notebook"], "jupyter notebook", True, []), (["DOCKER_STACKS_JUPYTER_CMD=server"], "jupyter server", True, []), (["DOCKER_STACKS_JUPYTER_CMD=nbclassic"], "jupyter nbclassic", True, []), ( ["JUPYTERHUB_API_TOKEN=my_token"], "jupyterhub-singleuser", False, ["WARNING: using start-singleuser.py"], ), ], ) def test_start_notebook( container: TrackedContainer, http_client: requests.Session, free_host_port: int, env: list[str] | None, expected_command: str, expected_start: bool, expected_warnings: list[str], ) -> None: """Test the notebook start-notebook.py script""" LOGGER.info( f"Test that the start-notebook.py launches the {expected_command} server from the env {env} ..." ) container.run_detached(environment=env, ports={"8888/tcp": free_host_port}) # sleeping some time to let the server start time.sleep(2) logs = container.get_logs() LOGGER.debug(logs) # checking that the expected command is launched assert ( f"Executing: {expected_command}" in logs ), f"Not the expected command ({expected_command}) was launched" # checking errors and warnings in logs assert "ERROR" not in logs, "ERROR(s) found in logs" for exp_warning in expected_warnings: assert exp_warning in logs, f"Expected warning {exp_warning} not found in logs" warnings = TrackedContainer.get_warnings(logs) assert len(expected_warnings) == len(warnings) # checking if the server is listening if expected_start: resp = http_client.get(f"http://localhost:{free_host_port}") assert resp.status_code == 200, "Server is not listening" def test_tini_entrypoint( container: TrackedContainer, pid: int = 1, command: str = "tini" ) -> None: """Check that tini is launched as PID 1 Credits to the following answer for the ps options used in the test: https://superuser.com/questions/632979/if-i-know-the-pid-number-of-a-process-how-can-i-get-its-name """ LOGGER.info(f"Test that {command} is launched as PID {pid} ...") container.run_detached() # Select the PID 1 and get the corresponding command output = container.exec_cmd(f"ps -p {pid} -o comm=") assert "ERROR" not in output assert "WARNING" not in output assert output == command, f"{command} shall be launched as pid {pid}, got {output}" ================================================ FILE: tests/by_image/datascience-notebook/test_julia_datascience.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tests.utils.tracked_container import TrackedContainer def test_julia(container: TrackedContainer) -> None: container.run_and_wait(timeout=10, command=["julia", "--version"]) ================================================ FILE: tests/by_image/datascience-notebook/test_mimetypes.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tests.shared_checks.R_mimetype_check import check_r_mimetypes from tests.utils.tracked_container import TrackedContainer def test_mimetypes(container: TrackedContainer) -> None: """Check if Rscript command for mimetypes can be executed""" check_r_mimetypes(container) ================================================ FILE: tests/by_image/datascience-notebook/test_pluto_datascience.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import requests from tests.shared_checks.pluto_check import check_pluto_proxy from tests.utils.tracked_container import TrackedContainer def test_pluto_proxy( container: TrackedContainer, http_client: requests.Session, free_host_port: int ) -> None: """Pluto proxy starts Pluto correctly""" check_pluto_proxy(container, http_client, free_host_port) ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/change/a.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. export MY_VAR=123 echo "Inside a.sh MY_VAR variable has ${MY_VAR} value" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/change/b.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. echo "Inside b.sh MY_VAR variable has ${MY_VAR} value" echo "Changing value of MY_VAR" export MY_VAR=456 echo "After change inside b.sh MY_VAR variable has ${MY_VAR} value" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/change/c.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. echo "Inside c.sh MY_VAR variable has ${MY_VAR} value" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/executables/executable.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. print("Executable python file was successfully run") ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/executables/non_executable.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. assert False ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/executables/run-me.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. export SOME_VAR=123 ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/failures/a.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. echo "Started: a.sh" export OTHER_VAR=456 run-unknown-command echo "Finished: a.sh" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/failures/b.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import os import sys print("Started: b.py") print(f"OTHER_VAR={os.environ['OTHER_VAR']}") sys.exit(1) print("Finished: b.py") ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/failures/c.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. echo "Started: c.sh" run-unknown-command ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/failures/d.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. set -e echo "Started: d.sh" run-unknown-command echo "Finished: d.sh" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/sh-files/executable.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. export MY_VAR=0 echo "Inside executable.sh MY_VAR variable has ${MY_VAR} value" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/sh-files/non-executable.sh ================================================ # shellcheck disable=SC2148 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. export MY_VAR=1 echo "Inside non-executable.sh MY_VAR variable has ${MY_VAR} value" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/unset/a.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. export MY_VAR=123 echo "Inside a.sh MY_VAR variable has ${MY_VAR} value" ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/unset/b.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. echo "Inside b.sh MY_VAR variable has ${MY_VAR} value" echo "Unsetting MY_VAR" unset MY_VAR ================================================ FILE: tests/by_image/docker-stacks-foundation/data/run-hooks/unset/c.sh ================================================ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. echo "Inside c.sh MY_VAR variable has ${MY_VAR} value" ================================================ FILE: tests/by_image/docker-stacks-foundation/test_outdated.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import pytest # type: ignore from tests.utils.conda_package_helper import CondaPackageHelper from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) @pytest.mark.parametrize("requested_only", [True, False]) @pytest.mark.info def test_outdated_packages(container: TrackedContainer, requested_only: bool) -> None: """Getting the list of updatable packages""" LOGGER.info(f"Checking outdated packages in {container.image_name} ...") pkg_helper = CondaPackageHelper(container) updatable = pkg_helper.find_updatable_packages(requested_only) LOGGER.info(pkg_helper.get_outdated_summary(updatable, requested_only)) LOGGER.info( f"Outdated packages table:\n{pkg_helper.get_outdated_table(updatable)}\n" ) ================================================ FILE: tests/by_image/docker-stacks-foundation/test_package_managers.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import pytest # type: ignore from tests.utils.tracked_container import TrackedContainer @pytest.mark.parametrize( "package_manager_command", ["apt", "conda", "mamba", "pip"], ) def test_package_manager( container: TrackedContainer, package_manager_command: str ) -> None: """Test that package managers are installed and run.""" container.run_and_wait(timeout=10, command=[package_manager_command, "--version"]) ================================================ FILE: tests/by_image/docker-stacks-foundation/test_packages.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. """ test_packages ~~~~~~~~~~~~~~~ This test module tests if the R and Python packages installed can be imported. It's a basic test aiming to prove that the package is working properly. The goal is to detect import errors that can be caused by incompatibilities between packages, for example: - #1012: issue importing `sympy` - #966: issue importing `pyarrow` This module checks dynamically, through the `CondaPackageHelper`, only the requested packages i.e. packages requested by `mamba install` in the `Dockerfile`s. This means that it does not check dependencies. This choice is a tradeoff to cover the main requirements while achieving a reasonable test duration. However, it could be easily changed (or completed) to cover dependencies as well. Use `package_helper.installed_packages` instead of `package_helper.requested_packages`. """ import logging from collections.abc import Callable import pytest # type: ignore from tests.utils.conda_package_helper import CondaPackageHelper from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) # Mapping between package and module name PACKAGE_MAPPING = { # Python "beautifulsoup4": "bs4", "jupyter-pluto-proxy": "jupyter_pluto_proxy", "matplotlib-base": "matplotlib", "pytables": "tables", "scikit-image": "skimage", "scikit-learn": "sklearn", # R "randomforest": "randomForest", "rcurl": "RCurl", "rodbc": "RODBC", "rsqlite": "DBI", } # List of packages that cannot be tested in a standard way EXCLUDED_PACKAGES = [ "conda-forge::blas=*", "grpcio-status", "grpcio", "jupyter-server-proxy", "jupyterhub-singleuser", "jupyterlab-git", "mamba", "notebook>", "protobuf", "protobuf>=5.28.3<6", "python", "r-irkernel", "unixodbc", ] def is_r_package(package: str) -> bool: """Check if a package is an R package""" return package.startswith("r-") def get_package_import_name(package: str) -> str: """Perform a mapping between the package name and the name used for the import""" if is_r_package(package): package = package[2:] return PACKAGE_MAPPING.get(package, package) def check_import_python_package(container: TrackedContainer, package: str) -> None: """Try to import a Python package from the command line""" container.exec_cmd(f'python -c "import {package}"') def check_import_r_package(container: TrackedContainer, package: str) -> None: """Try to import an R package from the command line""" container.exec_cmd(f"R --slave -e library({package})") def _check_import_packages( container: TrackedContainer, packages_to_check: list[str], check_function: Callable[[TrackedContainer, str], None], ) -> None: """Test if packages can be imported""" failed_imports = [] LOGGER.info("Testing the import of packages ...") for package in packages_to_check: LOGGER.info(f"Trying to import {package}") try: check_function(container, package) except AssertionError as err: failed_imports.append(package) LOGGER.error(f"Failed to import package: {package}, output:\n {err}") if failed_imports: pytest.fail(f"following packages are not import-able: {failed_imports}") def get_r_packages(package_helper: CondaPackageHelper) -> list[str]: """Return a list of R packages""" return [ get_package_import_name(pkg) for pkg in package_helper.requested_packages if is_r_package(pkg) and pkg not in EXCLUDED_PACKAGES ] def test_r_packages(container: TrackedContainer) -> None: """Test the import of specified R packages""" r_packages = get_r_packages(CondaPackageHelper(container)) _check_import_packages(container, r_packages, check_import_r_package) def get_python_packages(package_helper: CondaPackageHelper) -> list[str]: """Return a list of Python packages""" return [ get_package_import_name(pkg) for pkg in package_helper.requested_packages if not is_r_package(pkg) and pkg not in EXCLUDED_PACKAGES ] def test_python_packages(container: TrackedContainer) -> None: """Test the import of specified python packages""" python_packages = get_python_packages(CondaPackageHelper(container)) _check_import_packages(container, python_packages, check_import_python_package) ================================================ FILE: tests/by_image/docker-stacks-foundation/test_python_version.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) EXPECTED_PYTHON_VERSION = "3.13" def test_python_version(container: TrackedContainer) -> None: LOGGER.info( f"Checking that python major.minor version is {EXPECTED_PYTHON_VERSION}" ) logs = container.run_and_wait( timeout=10, command=["python", "--version"], ) python = next(line for line in logs.splitlines() if line.startswith("Python ")) full_version = python.split()[1] major_minor_version = full_version[: full_version.rfind(".")] assert major_minor_version == EXPECTED_PYTHON_VERSION def test_python_pinned_version(container: TrackedContainer) -> None: LOGGER.info(f"Checking that pinned python version is {EXPECTED_PYTHON_VERSION}.*") logs = container.run_and_wait( timeout=10, command=["cat", "/opt/conda/conda-meta/pinned"], ) assert f"python {EXPECTED_PYTHON_VERSION}.*" in logs ================================================ FILE: tests/by_image/docker-stacks-foundation/test_run_hooks.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from __future__ import annotations import logging from pathlib import Path from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) THIS_DIR = Path(__file__).parent.resolve() def test_run_hooks_zero_args(container: TrackedContainer) -> None: stdout, stderr = container.run_and_wait( timeout=10, no_errors=False, no_failure=False, split_stderr=True, command=["bash", "-c", "source /usr/local/bin/run-hooks.sh"], ) assert not stdout assert "Should pass exactly one directory" in stderr def test_run_hooks_two_args(container: TrackedContainer) -> None: stdout, stderr = container.run_and_wait( timeout=10, no_errors=False, no_failure=False, split_stderr=True, command=[ "bash", "-c", "source /usr/local/bin/run-hooks.sh first-arg second-arg", ], ) assert not stdout assert "Should pass exactly one directory" in stderr def test_run_hooks_missing_dir(container: TrackedContainer) -> None: stdout, stderr = container.run_and_wait( timeout=10, no_errors=False, no_failure=False, split_stderr=True, command=[ "bash", "-c", "source /usr/local/bin/run-hooks.sh /tmp/missing-dir/", ], ) assert not stdout assert "Directory /tmp/missing-dir/ doesn't exist or is not a directory" in stderr def test_run_hooks_dir_is_file(container: TrackedContainer) -> None: stdout, stderr = container.run_and_wait( timeout=10, no_errors=False, no_failure=False, split_stderr=True, command=[ "bash", "-c", "touch /tmp/some-file && source /usr/local/bin/run-hooks.sh /tmp/some-file", ], ) assert not stdout assert "Directory /tmp/some-file doesn't exist or is not a directory" in stderr def test_run_hooks_empty_dir(container: TrackedContainer) -> None: stdout, stderr = container.run_and_wait( timeout=10, split_stderr=True, command=[ "bash", "-c", "mkdir /tmp/empty-dir && source /usr/local/bin/run-hooks.sh /tmp/empty-dir/", ], ) assert not stdout assert "Running hooks in: /tmp/empty-dir/" in stderr def run_source_in_dir( container: TrackedContainer, *, subdir: str, command_suffix: str = "", no_errors: bool = True, no_failure: bool = True, ) -> tuple[str, str]: host_data_dir = THIS_DIR / subdir cont_data_dir = "/home/jovyan/data" # https://forums.docker.com/t/all-files-appear-as-executable-in-file-paths-using-bind-mount/99921 # Unfortunately, Docker treats all files in mounted dir as executable files # So we make a copy of the mounted dir inside a container command = ( "cp -r /home/jovyan/data/ /home/jovyan/data-copy/ &&" "source /usr/local/bin/run-hooks.sh /home/jovyan/data-copy/" + command_suffix ) return container.run_and_wait( timeout=10, volumes={host_data_dir: {"bind": cont_data_dir, "mode": "ro"}}, no_errors=no_errors, no_failure=no_failure, split_stderr=True, command=["bash", "-c", command], ) def test_run_hooks_change(container: TrackedContainer) -> None: stdout, logs = run_source_in_dir(container, subdir="data/run-hooks/change") assert "Inside a.sh MY_VAR variable has 123 value" in stdout assert "Inside b.sh MY_VAR variable has 123 value" in stdout assert "Changing value of MY_VAR" in stdout assert "After change inside b.sh MY_VAR variable has 456 value" in stdout assert "Inside c.sh MY_VAR variable has 456 value" in stdout def test_run_hooks_executables(container: TrackedContainer) -> None: stdout, logs = run_source_in_dir( container, subdir="data/run-hooks/executables", command_suffix="&& echo SOME_VAR is ${SOME_VAR}", ) assert "Executable python file was successfully run" in stdout assert "Ignoring non-executable: /home/jovyan/data-copy//non_executable.py" in logs assert "SOME_VAR is 123" in stdout def test_run_hooks_failures(container: TrackedContainer) -> None: stdout, logs = run_source_in_dir( container, subdir="data/run-hooks/failures", no_errors=False, no_failure=False, ) for file in ["a.sh", "b.py", "c.sh", "d.sh"]: assert f"Started: {file}" in stdout for file in ["a.sh"]: assert f"Finished: {file}" in stdout for file in ["b.py", "c.sh", "d.sh"]: assert f"Finished: {file}" not in stdout for file in ["b.py", "c.sh"]: assert ( f"/home/jovyan/data-copy//{file} has failed, continuing execution" in logs ) assert "OTHER_VAR=456" in stdout def test_run_hooks_sh_files(container: TrackedContainer) -> None: stdout, _ = run_source_in_dir(container, subdir="data/run-hooks/sh-files") assert "Inside executable.sh MY_VAR variable has 0 value" in stdout assert "Inside non-executable.sh MY_VAR variable has 1 value" in stdout def test_run_hooks_unset(container: TrackedContainer) -> None: stdout, _ = run_source_in_dir(container, subdir="data/run-hooks/unset") assert "Inside a.sh MY_VAR variable has 123 value" in stdout assert "Inside b.sh MY_VAR variable has 123 value" in stdout assert "Unsetting MY_VAR" in stdout assert "Inside c.sh MY_VAR variable has value" in stdout ================================================ FILE: tests/by_image/docker-stacks-foundation/test_units.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from tests.hierarchy.get_test_dirs import get_test_dirs from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def test_units(container: TrackedContainer) -> None: """Various units tests Add a py file in the `tests/by_image//units` dir, and it will be automatically tested """ image = container.image_name[container.image_name.rfind("/") + 1 :] LOGGER.info(f"Running unit tests for: {image}") test_dirs = get_test_dirs(image) for test_dir in test_dirs: host_data_dir = test_dir / "units" LOGGER.info(f"Searching for units tests in {host_data_dir}") cont_data_dir = "/home/jovyan/data" LOGGER.info(f"Units tests dir found: {host_data_dir.exists()}") if not host_data_dir.exists(): continue for host_file in host_data_dir.iterdir(): cont_file = f"{cont_data_dir}/{host_file.name}" LOGGER.info(f"Running unit test: {host_file}") container.run_and_wait( timeout=30, volumes={host_file: {"bind": cont_file, "mode": "ro"}}, command=["python", cont_file], ) ================================================ FILE: tests/by_image/docker-stacks-foundation/test_user_options.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import pathlib import time import pytest # type: ignore from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def test_uid_change(container: TrackedContainer) -> None: """Container should change the UID of the default user.""" logs = container.run_and_wait( timeout=120, # usermod is slow so give it some time user="root", environment=["NB_UID=1010"], command=["bash", "-c", "id && touch /opt/conda/test-file"], ) assert "uid=1010(jovyan)" in logs def test_gid_change(container: TrackedContainer) -> None: """Container should change the GID of the default user.""" logs = container.run_and_wait( timeout=20, user="root", environment=["NB_GID=110"], command=["id"], ) assert "gid=110(jovyan)" in logs assert "groups=110(jovyan),100(users)" in logs def test_nb_user_change(container: TrackedContainer) -> None: """Container should change the username (`NB_USER`) of the default user.""" nb_user = "nayvoj" container.run_detached( user="root", environment=[f"NB_USER={nb_user}", "CHOWN_HOME=yes"], command=["sleep", "infinity"], ) # Give the chown time to complete. # Use sleep, not wait, because the container sleeps forever. time.sleep(1) LOGGER.info(f"Checking if the user is changed to {nb_user} by the start script ...") output = container.get_logs() assert "ERROR" not in output assert "WARNING" not in output assert ( f"username: jovyan -> {nb_user}" in output ), f"User is not changed to {nb_user}" LOGGER.info(f"Checking {nb_user} id ...") command = "id" expected_output = f"uid=1000({nb_user}) gid=100(users) groups=100(users)" output = container.exec_cmd(command, user=nb_user, workdir=f"/home/{nb_user}") assert output == expected_output, f"Bad user {output}, expected {expected_output}" LOGGER.info(f"Checking if {nb_user} owns his home folder ...") command = f'stat -c "%U %G" /home/{nb_user}/' expected_output = f"{nb_user} users" output = container.exec_cmd(command, workdir=f"/home/{nb_user}") assert ( output == expected_output ), f"Bad owner for the {nb_user} home folder {output}, expected {expected_output}" LOGGER.info( f"Checking if a home folder of {nb_user} contains the 'work' folder with appropriate permissions ..." ) command = f'stat -c "%F %U %G" /home/{nb_user}/work' expected_output = f"directory {nb_user} users" output = container.exec_cmd(command, workdir=f"/home/{nb_user}") assert ( output == expected_output ), f"Folder work was not copied properly to {nb_user} home folder. stat: {output}, expected {expected_output}" def test_chown_extra(container: TrackedContainer) -> None: """Container should change the UID/GID of a comma-separated CHOWN_EXTRA list of folders.""" logs = container.run_and_wait( timeout=120, # chown is slow so give it some time user="root", environment=[ "NB_UID=1010", "NB_GID=101", "CHOWN_EXTRA=/home/jovyan,/opt/conda/bin", "CHOWN_EXTRA_OPTS=-R", ], command=[ "stat", "-c", "%n:%u:%g", "/home/jovyan/.bashrc", "/opt/conda/bin/jupyter", ], ) assert "/home/jovyan/.bashrc:1010:101" in logs assert "/opt/conda/bin/jupyter:1010:101" in logs def test_chown_home(container: TrackedContainer) -> None: """Container should change the NB_USER home directory owner and group to the current value of NB_UID and NB_GID.""" logs = container.run_and_wait( timeout=120, # chown is slow so give it some time user="root", environment=[ "CHOWN_HOME=yes", "CHOWN_HOME_OPTS=-R", "NB_USER=kitten", "NB_UID=1010", "NB_GID=101", ], command=["stat", "-c", "%n:%u:%g", "/home/kitten/.bashrc"], ) assert "/home/kitten/.bashrc:1010:101" in logs def test_sudo(container: TrackedContainer) -> None: """Container should grant passwordless sudo to the default user.""" logs = container.run_and_wait( timeout=10, user="root", environment=["GRANT_SUDO=yes"], command=["sudo", "id"], ) assert "uid=0(root)" in logs def test_sudo_path(container: TrackedContainer) -> None: """Container should include /opt/conda/bin in the sudo secure_path.""" logs = container.run_and_wait( timeout=10, user="root", environment=["GRANT_SUDO=yes"], command=["sudo", "which", "jupyter"], ) assert logs.rstrip().endswith("/opt/conda/bin/jupyter") def test_sudo_path_without_grant(container: TrackedContainer) -> None: """Container should include /opt/conda/bin in the sudo secure_path.""" logs = container.run_and_wait( timeout=10, user="root", command=["which", "jupyter"], ) assert logs.rstrip().endswith("/opt/conda/bin/jupyter") def test_group_add(container: TrackedContainer) -> None: """Container should run with the specified uid, gid, and secondary group. It won't be possible to modify /etc/passwd since gid is nonzero, so additionally verify that setting gid=0 is suggested in a warning. """ logs = container.run_and_wait( timeout=10, no_warnings=False, user="1010:1010", group_add=["users"], # Ensures write access to /home/jovyan command=["id"], ) warnings = TrackedContainer.get_warnings(logs) assert len(warnings) == 1 assert "Try setting gid=0" in warnings[0] assert "uid=1010 gid=1010 groups=1010,100(users)" in logs def test_set_uid(container: TrackedContainer) -> None: """Container should run with the specified uid and NB_USER. The /home/jovyan directory will not be writable since it's owned by 1000:users. Additionally, verify that "--group-add=users" is suggested in a warning to restore write access. """ # This test needs to have tty disabled, the reason is explained here: # https://github.com/jupyter/docker-stacks/pull/2260#discussion_r2008821257 logs = container.run_and_wait( timeout=10, no_warnings=False, user="1010", command=["id"], tty=False ) assert "uid=1010(jovyan) gid=0(root)" in logs warnings = TrackedContainer.get_warnings(logs) assert len(warnings) == 1 assert "--group-add=users" in warnings[0] def test_set_uid_and_nb_user(container: TrackedContainer) -> None: """Container should run with the specified uid and NB_USER.""" logs = container.run_and_wait( timeout=10, no_warnings=False, user="1010", environment=["NB_USER=kitten"], group_add=["users"], # Ensures write access to /home/jovyan command=["id"], ) assert "uid=1010(kitten) gid=0(root)" in logs warnings = TrackedContainer.get_warnings(logs) assert len(warnings) == 1 assert "user is kitten but home is /home/jovyan" in warnings[0] def test_container_not_delete_bind_mount( container: TrackedContainer, tmp_path: pathlib.Path ) -> None: """Container should not delete host system files when using the (docker) -v bind mount flag and mapping to /home/jovyan. """ host_data_dir = tmp_path / "data" host_data_dir.mkdir() host_file = host_data_dir / "foo.txt" host_file.write_text("some-content") container.run_and_wait( timeout=10, user="root", working_dir="/home/", environment=[ "NB_USER=user", "CHOWN_HOME=yes", ], volumes={host_data_dir: {"bind": "/home/jovyan/data", "mode": "rw"}}, command=["ls"], ) assert host_file.read_text() == "some-content" assert len(list(tmp_path.iterdir())) == 1 @pytest.mark.parametrize("enable_root", [False, True]) def test_jupyter_env_vars_to_unset( container: TrackedContainer, enable_root: bool ) -> None: """Environment variables names listed in JUPYTER_ENV_VARS_TO_UNSET should be unset in the final environment.""" root_args = {"user": "root"} if enable_root else {} logs = container.run_and_wait( timeout=10, environment=[ "JUPYTER_ENV_VARS_TO_UNSET=SECRET_ANIMAL,UNUSED_ENV,SECRET_FRUIT", "FRUIT=bananas", "SECRET_ANIMAL=cats", "SECRET_FRUIT=mango", ], command=[ "bash", "-c", "echo I like ${FRUIT} and ${SECRET_FRUIT:-stuff}, and love ${SECRET_ANIMAL:-to keep secrets}!", ], **root_args, # type: ignore ) assert "I like bananas and stuff, and love to keep secrets!" in logs def test_secure_path(container: TrackedContainer, tmp_path: pathlib.Path) -> None: """Make sure that the sudo command has conda's python (not system's) on PATH. See . """ host_data_dir = tmp_path / "data" host_data_dir.mkdir() host_file = host_data_dir / "wrong_python.sh" host_file.write_text('#!/bin/bash\necho "Wrong python executable invoked!"') host_file.chmod(0o755) logs = container.run_and_wait( timeout=10, user="root", volumes={host_file: {"bind": "/usr/bin/python", "mode": "ro"}}, command=["python", "--version"], ) assert "Wrong python" not in logs assert "Python" in logs def test_startsh_multiple_exec(container: TrackedContainer) -> None: """If start.sh is executed multiple times check that configuration only occurs once.""" logs = container.run_and_wait( timeout=10, no_warnings=False, user="root", environment=["GRANT_SUDO=yes"], command=["start.sh", "sudo", "id"], ) assert "uid=0(root)" in logs warnings = TrackedContainer.get_warnings(logs) assert len(warnings) == 1 assert ( "WARNING: start.sh is the default ENTRYPOINT, do not include it in CMD" in warnings[0] ) def test_rootless_triplet_change(container: TrackedContainer) -> None: """Container should change the username (`NB_USER`), the UID and the GID of the default user.""" logs = container.run_and_wait( timeout=10, user="root", environment=["NB_USER=root", "NB_UID=0", "NB_GID=0"], command=["id"], ) assert "uid=0(root)" in logs assert "gid=0(root)" in logs assert "groups=0(root)" in logs def test_rootless_triplet_home(container: TrackedContainer) -> None: """Container should change the home directory for triplet NB_USER=root, NB_UID=0, NB_GID=0.""" logs = container.run_and_wait( timeout=10, user="root", environment=["NB_USER=root", "NB_UID=0", "NB_GID=0"], command=["bash", "-c", "echo HOME=${HOME} && getent passwd root"], ) assert "HOME=/home/root" in logs assert "root:x:0:0:root:/home/root:/bin/bash" in logs def test_rootless_triplet_sudo(container: TrackedContainer) -> None: """Container should not be started with sudo for triplet NB_USER=root, NB_UID=0, NB_GID=0.""" logs = container.run_and_wait( timeout=10, user="root", environment=["NB_USER=root", "NB_UID=0", "NB_GID=0"], command=["env"], ) assert "SUDO" not in logs def test_log_stderr(container: TrackedContainer) -> None: """Logs should go to stderr, not stdout""" stdout, stderr = container.run_and_wait( timeout=10, user="root", environment=["NB_USER=root", "NB_UID=0", "NB_GID=0"], command=["echo", "stdout"], split_stderr=True, ) # no logs should be on stdout assert stdout.strip() == "stdout" # check that logs were captured assert "Entered start.sh" in stderr assert "Running as root" in stderr ================================================ FILE: tests/by_image/julia-notebook/test_julia.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tests.utils.tracked_container import TrackedContainer def test_julia(container: TrackedContainer) -> None: container.run_and_wait(timeout=10, command=["julia", "--version"]) ================================================ FILE: tests/by_image/julia-notebook/test_pluto.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import requests from tests.shared_checks.pluto_check import check_pluto_proxy from tests.utils.tracked_container import TrackedContainer def test_pluto_proxy( container: TrackedContainer, http_client: requests.Session, free_host_port: int ) -> None: """Pluto proxy starts Pluto correctly""" check_pluto_proxy(container, http_client, free_host_port) ================================================ FILE: tests/by_image/minimal-notebook/data/notebook_math.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# A simple SymPy example" ] }, { "cell_type": "markdown", "id": "1", "metadata": {}, "source": [ "First we import SymPy and initialize printing:" ] }, { "cell_type": "code", "execution_count": null, "id": "2", "metadata": { "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "from sympy import diff, init_printing, integrate, sin, symbols" ] }, { "cell_type": "code", "execution_count": null, "id": "3", "metadata": {}, "outputs": [], "source": [ "init_printing()" ] }, { "cell_type": "markdown", "id": "4", "metadata": {}, "source": [ "Create a few symbols:" ] }, { "cell_type": "code", "execution_count": null, "id": "5", "metadata": { "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "x, y, z = symbols(\"x y z\")" ] }, { "cell_type": "markdown", "id": "6", "metadata": {}, "source": [ "Here is a basic expression:" ] }, { "cell_type": "code", "execution_count": null, "id": "7", "metadata": { "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "e = x**2 + 2.0 * y + sin(z)\n", "e" ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": { "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "diff(e, x)" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": { "jupyter": { "outputs_hidden": false } }, "outputs": [], "source": [ "integrate(e, z)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 5 } ================================================ FILE: tests/by_image/minimal-notebook/data/notebook_svg.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import SVG, display" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display(SVG(filename=\"Jupyter_logo.svg\"))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: tests/by_image/minimal-notebook/test_nbconvert.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from pathlib import Path import pytest # type: ignore from tests.shared_checks.nbconvert_check import check_nbconvert from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) THIS_DIR = Path(__file__).parent.resolve() @pytest.mark.parametrize("test_file", ["notebook_math", "notebook_svg"]) @pytest.mark.parametrize("output_format", ["pdf", "html", "markdown"]) def test_nbconvert( container: TrackedContainer, test_file: str, output_format: str ) -> None: host_data_file = THIS_DIR / "data" / f"{test_file}.ipynb" check_nbconvert(container, host_data_file, output_format, execute=False) ================================================ FILE: tests/by_image/pyspark-notebook/data/issue_1168.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This is a test for the issue [#1168](https://github.com/jupyter/docker-stacks/issues/1168)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pyspark.sql import SparkSession\n", "\n", "# Spark session & context\n", "spark = SparkSession.builder.master(\"local\").getOrCreate()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = spark.createDataFrame([(1, 21), (2, 30)], (\"id\", \"age\"))\n", "\n", "\n", "def filter_func(iterator):\n", " for pdf in iterator:\n", " yield pdf[pdf.id == 1]\n", "\n", "\n", "df.mapInPandas(filter_func, df.schema).show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: tests/by_image/pyspark-notebook/data/local_pyspark.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pyspark.sql import SparkSession\n", "\n", "# Spark session & context\n", "spark = SparkSession.builder.master(\"local\").getOrCreate()\n", "sc = spark.sparkContext\n", "\n", "# Sum of the first 100 whole numbers\n", "rdd = sc.parallelize(range(100 + 1))\n", "rdd.sum()\n", "# 5050" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: tests/by_image/pyspark-notebook/test_spark.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def test_spark_shell(container: TrackedContainer) -> None: """Checking if Spark (spark-shell) is running properly""" logs = container.run_and_wait( timeout=60, no_warnings=False, command=["bash", "-c", 'spark-shell <<< "1+1"'], ) warnings = TrackedContainer.get_warnings(logs) assert warnings == ["WARNING: Using incubator modules: jdk.incubator.vector"] assert "res0: Int = 2" in logs, "spark-shell does not work" ================================================ FILE: tests/by_image/pyspark-notebook/test_spark_nbconvert.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from pathlib import Path import pytest # type: ignore from tests.shared_checks.nbconvert_check import check_nbconvert from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) THIS_DIR = Path(__file__).parent.resolve() @pytest.mark.parametrize("test_file", ["issue_1168", "local_pyspark"]) @pytest.mark.parametrize("output_format", ["pdf", "html", "markdown"]) def test_spark_nbconvert( container: TrackedContainer, test_file: str, output_format: str ) -> None: host_data_file = THIS_DIR / "data" / f"{test_file}.ipynb" logs = check_nbconvert( container, host_data_file, output_format, execute=True, no_warnings=False ) warnings = TrackedContainer.get_warnings(logs) assert warnings == ["WARNING: Using incubator modules: jdk.incubator.vector"] ================================================ FILE: tests/by_image/pyspark-notebook/units/unit_pandas_version.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import pandas assert pandas.__version__ == "2.2.3" ================================================ FILE: tests/by_image/pyspark-notebook/units/unit_spark.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import pyspark # noqa: F401 ================================================ FILE: tests/by_image/pytorch-notebook/units/unit_pytorch.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import torch print(torch.tensor([[1.0, 4.0, 7.0], [4.0, 9.0, 11.0]])) # Check if GPU is available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Create large tensors size = 10000 torch.randn(size, size, device=device) ================================================ FILE: tests/by_image/r-notebook/test_R_mimetypes.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from tests.shared_checks.R_mimetype_check import check_r_mimetypes from tests.utils.tracked_container import TrackedContainer def test_mimetypes(container: TrackedContainer) -> None: """Check if Rscript command for mimetypes can be executed""" check_r_mimetypes(container) ================================================ FILE: tests/by_image/scipy-notebook/data/cython/helloworld.pyx ================================================ print("Hello World") ================================================ FILE: tests/by_image/scipy-notebook/data/cython/setup.py ================================================ # These lines are not sorted by isort on purpose # see: https://stackoverflow.com/a/53356077/4881441 from setuptools import setup # isort:skip from Cython.Build import cythonize # isort:skip setup(ext_modules=cythonize("helloworld.pyx")) ================================================ FILE: tests/by_image/scipy-notebook/data/matplotlib/matplotlib_1.py ================================================ # type: ignore # Matplotlib: Create a simple plot example. # Refs: https://matplotlib.org/stable/gallery/lines_bars_and_markers/simple_plot.html # Optional test with [Matplotlib Jupyter Integration](https://github.com/matplotlib/ipympl) # %matplotlib widget import matplotlib.pyplot as plt import numpy as np # Data for plotting t = np.arange(0.0, 2.0, 0.01) s = 1 + np.sin(2 * np.pi * t) fig, ax = plt.subplots() ax.plot(t, s) ax.set( xlabel="time (s)", ylabel="voltage (mV)", title="About as simple as it gets, folks", ) ax.grid() # Note that the test can be run headless by checking if an image is produced file_path = "/tmp/test.png" fig.savefig(file_path) print(f"File {file_path} saved") ================================================ FILE: tests/by_image/scipy-notebook/data/matplotlib/matplotlib_fonts_1.py ================================================ # Matplotlib: Test tex fonts import matplotlib import matplotlib.pyplot as plt matplotlib.rcParams["pgf.texsystem"] = "pdflatex" matplotlib.rcParams.update( { "font.family": "serif", "font.size": 18, "axes.labelsize": 20, "axes.titlesize": 24, "figure.titlesize": 28, } ) matplotlib.rcParams["text.usetex"] = True fig, ax = plt.subplots(1, 1) x = [1, 2] y = [1, 2] ax.plot(x, y, label="a label") ax.legend(fontsize=15) file_path = "/tmp/test_fonts.png" fig.savefig(file_path) print(f"File {file_path} saved") ================================================ FILE: tests/by_image/scipy-notebook/test_cython.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from pathlib import Path from tests.utils.tracked_container import TrackedContainer THIS_DIR = Path(__file__).parent.resolve() def test_cython(container: TrackedContainer) -> None: host_data_dir = THIS_DIR / "data/cython" cont_data_dir = "/home/jovyan/data" logs = container.run_and_wait( timeout=10, volumes={host_data_dir: {"bind": cont_data_dir, "mode": "ro"}}, command=[ "bash", "-c", # We copy our data to a temporary folder to be able to modify the directory f"cp -r {cont_data_dir}/ /tmp/test/ && cd /tmp/test && python3 setup.py build_ext", ], ) assert "building 'helloworld' extension" in logs ================================================ FILE: tests/by_image/scipy-notebook/test_extensions.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import pytest # type: ignore from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) @pytest.mark.skip(reason="Not yet compliant with JupyterLab 4") @pytest.mark.parametrize( "extension", [ "@bokeh/jupyter_bokeh", "@jupyter-widgets/jupyterlab-manager", "jupyter-matplotlib", ], ) def test_check_extension(container: TrackedContainer, extension: str) -> None: """Basic check of each extension The list of installed extensions can be obtained through this command: $ jupyter labextension list """ LOGGER.info(f"Checking the extension: {extension} ...") container.run_and_wait( timeout=10, command=["jupyter", "labextension", "check", extension], ) ================================================ FILE: tests/by_image/scipy-notebook/test_matplotlib.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from pathlib import Path import pytest # type: ignore from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) THIS_DIR = Path(__file__).parent.resolve() @pytest.mark.parametrize( "test_file,expected_file,description", [ ( "matplotlib_1.py", "test.png", "Test that matplotlib can plot a graph and write it as an image ...", ), ( "matplotlib_fonts_1.py", "test_fonts.png", "Test cm-super latex labels in matplotlib ...", ), ], ) def test_matplotlib( container: TrackedContainer, test_file: str, expected_file: str, description: str ) -> None: """Various tests performed on matplotlib - Test that matplotlib is able to plot a graph and write it as an image - Test matplotlib latex fonts, which depend on the cm-super package """ host_file = THIS_DIR / "data/matplotlib" / test_file cont_file = f"/home/jovyan/data/{test_file}" output_dir = "/tmp" LOGGER.info(description) container.run_detached( volumes={host_file: {"bind": cont_file, "mode": "ro"}}, command=["sleep", "infinity"], ) container.exec_cmd(f"python {cont_file}") # Checking if the file is generated # https://stackoverflow.com/a/15895594/4413446 command = f"test -s {output_dir}/{expected_file}" container.exec_cmd(command) ================================================ FILE: tests/by_image/scipy-notebook/units/unit_pandas.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # type: ignore import numpy as np import pandas as pd np.random.seed(0) print(pd.Series(np.random.randint(0, 7, size=10)).sum()) ================================================ FILE: tests/by_image/tensorflow-notebook/units/unit_tensorflow.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import os import sys if "NVIDIA_VISIBLE_DEVICES" in os.environ: print("Not running this test in GPU mode") sys.exit(0) import tensorflow as tf print(tf.constant("Hello, TensorFlow")) print(tf.reduce_sum(tf.random.normal([1000, 1000]))) ================================================ FILE: tests/conftest.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import socket from collections.abc import Generator from contextlib import closing import docker import pytest # type: ignore import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) @pytest.fixture(scope="session") def http_client() -> requests.Session: """Requests session with retries and backoff.""" s = requests.Session() retries = Retry(total=5, backoff_factor=1) s.mount("http://", HTTPAdapter(max_retries=retries)) s.mount("https://", HTTPAdapter(max_retries=retries)) return s @pytest.fixture(scope="session") def docker_client() -> docker.DockerClient: """Docker client configured based on the host environment""" client = docker.from_env() LOGGER.debug(f"Docker client created: {client.version()}") return client def pytest_addoption(parser: pytest.Parser) -> None: """Add custom command-line options to pytest.""" parser.addoption( "--registry", required=True, choices=["docker.io", "quay.io"], help="Image registry", ) parser.addoption( "--owner", required=True, help="Owner of the image", ) parser.addoption( "--image", required=True, help="Short image name", ) @pytest.fixture(scope="session") def image_name(request: pytest.FixtureRequest) -> str: """Image name to test""" def option(name: str) -> str: value = request.config.getoption(name) assert isinstance(value, str) return value return f"{option('--registry')}/{option('--owner')}/{option('--image')}" @pytest.fixture(scope="function") def container( docker_client: docker.DockerClient, image_name: str ) -> Generator[TrackedContainer]: """Notebook container with initial configuration appropriate for testing (e.g., HTTP port exposed to the host for HTTP calls). Yields the container instance and kills it when the caller is done with it. """ container = TrackedContainer( docker_client, image_name, ) yield container container.remove() @pytest.fixture(scope="function") def free_host_port() -> Generator[int]: """Finds a free port on the host machine""" with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(("", 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) yield s.getsockname()[1] ================================================ FILE: tests/hierarchy/__init__.py ================================================ ================================================ FILE: tests/hierarchy/get_test_dirs.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from pathlib import Path from tests.hierarchy.images_hierarchy import IMAGE_PARENT THIS_DIR = Path(__file__).parent.resolve() IMAGE_SPECIFIC_TESTS_DIR = THIS_DIR.parent / "by_image" assert IMAGE_SPECIFIC_TESTS_DIR.exists(), f"{IMAGE_SPECIFIC_TESTS_DIR} does not exist." def get_test_dirs(image: str | None) -> list[Path]: if image is None: return [] test_dirs = get_test_dirs(IMAGE_PARENT[image]) current_test_dir = IMAGE_SPECIFIC_TESTS_DIR / image assert current_test_dir.exists(), f"{current_test_dir} does not exist." test_dirs.append(current_test_dir) return test_dirs ================================================ FILE: tests/hierarchy/images_hierarchy.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # Please, take a look at the hierarchy of the images here: # https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#image-relationships IMAGE_PARENT = { "docker-stacks-foundation": None, "base-notebook": "docker-stacks-foundation", "minimal-notebook": "base-notebook", "scipy-notebook": "minimal-notebook", "r-notebook": "minimal-notebook", "julia-notebook": "minimal-notebook", "tensorflow-notebook": "scipy-notebook", "pytorch-notebook": "scipy-notebook", "datascience-notebook": "scipy-notebook", "pyspark-notebook": "scipy-notebook", "all-spark-notebook": "pyspark-notebook", } ================================================ FILE: tests/pytest.ini ================================================ [pytest] addopts = -ra --color=yes log_cli = 1 log_cli_level = INFO log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) log_cli_date_format=%Y-%m-%d %H:%M:%S markers = info: marks tests as info (deselect with '-m "not info"') ================================================ FILE: tests/run_tests.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import argparse import logging import plumbum from tests.hierarchy.get_test_dirs import get_test_dirs python3 = plumbum.local["python3"] LOGGER = logging.getLogger(__name__) def test_image(*, registry: str, owner: str, image: str) -> None: LOGGER.info(f"Testing image: {image}") test_dirs = get_test_dirs(image) LOGGER.info(f"Test dirs to be run: {test_dirs}") ( python3[ "-m", "pytest", "--numprocesses", "auto", "-m", "not info", test_dirs, "--registry", registry, "--owner", owner, "--image", image, ] & plumbum.FG ) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) arg_parser = argparse.ArgumentParser() arg_parser.add_argument( "--registry", required=True, choices=["docker.io", "quay.io"], help="Image registry", ) arg_parser.add_argument( "--owner", required=True, help="Owner of the image", ) arg_parser.add_argument( "--image", required=True, help="Short image name", ) args = arg_parser.parse_args() test_image(**vars(args)) ================================================ FILE: tests/shared_checks/R_mimetype_check.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def check_r_mimetypes(container: TrackedContainer) -> None: """Check if Rscript command can be executed""" LOGGER.info("Test that R command can be executed ...") R_MIMETYPES_CHECK_CMD = 'if (length(getOption("jupyter.plot_mimetypes")) != 5) {stop("missing jupyter.plot_mimetypes")}' command = ["Rscript", "-e", R_MIMETYPES_CHECK_CMD] logs = container.run_and_wait(timeout=10, command=command) LOGGER.debug(f"{logs=}") # If there is any output after this it means there was an error assert logs.splitlines()[-1] == "Executing the command: " + " ".join( command ), f"Command {R_MIMETYPES_CHECK_CMD=} failed" ================================================ FILE: tests/shared_checks/__init__.py ================================================ ================================================ FILE: tests/shared_checks/nbconvert_check.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from pathlib import Path from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def check_nbconvert( container: TrackedContainer, host_file: Path, output_format: str, *, execute: bool, no_warnings: bool = True, ) -> str: """Check if nbconvert is able to convert a notebook file""" cont_data_file = "/home/jovyan/" + host_file.name output_dir = "/tmp" LOGGER.info( f"Test that the example notebook {host_file.name} can be converted to {output_format} ..." ) command = [ "jupyter", "nbconvert", cont_data_file, "--output-dir", output_dir, "--to", output_format, ] if execute: conversion_timeout_ms = 5000 command += [ "--execute", f"--ExecutePreprocessor.timeout={conversion_timeout_ms}", ] logs = container.run_and_wait( timeout=60, volumes={host_file: {"bind": cont_data_file, "mode": "ro"}}, command=command, no_warnings=no_warnings, ) output_ext = "md" if output_format == "markdown" else output_format expected_file = f"{output_dir}/{host_file.stem}.{output_ext}" assert expected_file in logs, f"Expected file {expected_file} not generated" return logs ================================================ FILE: tests/shared_checks/pluto_check.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging import secrets import time import requests from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) def check_pluto_proxy( container: TrackedContainer, http_client: requests.Session, free_host_port: int ) -> None: token = secrets.token_hex() container.run_detached( command=[ "start-notebook.py", f"--IdentityProvider.token={token}", ], ports={"8888/tcp": free_host_port}, ) # Give the server a bit of time to start time.sleep(2) resp = http_client.get(f"http://localhost:{free_host_port}/pluto?token={token}") resp.raise_for_status() assert "Pluto.jl notebooks" in resp.text, "Pluto.jl text not found in /pluto page" ================================================ FILE: tests/utils/__init__.py ================================================ ================================================ FILE: tests/utils/conda_package_helper.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. # CondaPackageHelper is partially based on the work https://oerpli.github.io/post/2019/06/conda-outdated/. # See copyright below. # # MIT License # Copyright (c) 2019 Abraham Hinteregger # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import json import logging import re from collections import defaultdict from functools import cached_property from itertools import chain from tabulate import tabulate from tests.utils.tracked_container import TrackedContainer LOGGER = logging.getLogger(__name__) class CondaPackageHelper: """Conda package helper permitting to get information about packages""" def __init__(self, container: TrackedContainer): self.container = container self.container.run_detached(command=["sleep", "infinity"]) @cached_property def installed_packages(self) -> dict[str, set[str]]: """Return the installed packages""" LOGGER.info("Grabbing the list of installed packages ...") env_export = self.container.exec_cmd("mamba env export --no-build --json") return self._parse_package_versions(env_export) @cached_property def requested_packages(self) -> dict[str, set[str]]: """Return the requested package (i.e. `mamba install `)""" LOGGER.info("Grabbing the list of manually requested packages ...") env_export = self.container.exec_cmd( "mamba env export --no-build --json --from-history" ) return self._parse_package_versions(env_export) @staticmethod def _parse_package_versions(env_export: str) -> dict[str, set[str]]: """Extract packages and versions from the lines returned by the list of specifications""" try: dependencies = json.loads(env_export).get("dependencies") except json.JSONDecodeError: # Try to fix the invalid JSON from mamba (bug in mamba 2.x) # which doesn't escape double quotes in version specs # e.g. "protobuf[version=">=5.28.3,<6"]" fixed_export = re.sub(r'(\[[^\]]*=)"([^"]+)"(\])', r"\1'\2'\3", env_export) dependencies = json.loads(fixed_export).get("dependencies") # Filtering packages installed through pip # since we only manage packages installed through mamba here # They are represented by a dict with a key 'pip' dependencies = filter(lambda x: isinstance(x, str), dependencies) packages_dict: dict[str, set[str]] = {} for dependency in dependencies: # If it's a package with [] notation, we strip the [] part for the package name # but we keep it if we want to try to extract version (not needed for these tests) package_with_name = dependency.split("[")[0] split = re.split("=?=", package_with_name) # default values package = split[0] version = set() # This normally means we have package=version notation if len(split) > 1: # checking if it's a proper version by testing if the first char is a digit if split[1][0].isdigit(): # package + version case version = set(split[1:]) # The split was incorrect and the package shall not be split else: package = f"{split[0]}={split[1]}" packages_dict[package] = version return packages_dict @cached_property def available_packages(self) -> dict[str, set[str]]: """Return the available packages""" LOGGER.info("Grabbing the list of available packages (can take a while) ...") return self._extract_available( self.container.exec_cmd("conda search --outdated --quiet") ) @staticmethod def _extract_available(lines: str) -> defaultdict[str, set[str]]: """Extract packages and versions from the lines returned by the list of packages""" ddict = defaultdict(set) for line in lines.splitlines()[2:]: match = re.match(r"^(\S+)\s+(\S+)", line, re.MULTILINE) assert match is not None pkg, version = match.groups() ddict[pkg].add(version) return ddict def find_updatable_packages(self, requested_only: bool) -> list[dict[str, str]]: """Check the updatable packages including or not dependencies""" updatable = [] for pkg, inst_vs in self.installed_packages.items(): avail_vs = self.available_packages[pkg] if not avail_vs or (requested_only and pkg not in self.requested_packages): continue newest = max(avail_vs, key=CondaPackageHelper.semantic_cmp) current = min(inst_vs, key=CondaPackageHelper.semantic_cmp) if CondaPackageHelper.semantic_cmp( current ) < CondaPackageHelper.semantic_cmp(newest): updatable.append({"Package": pkg, "Current": current, "Newest": newest}) return updatable @staticmethod def semantic_cmp(version_string: str) -> tuple[int, ...]: """Manage semantic versioning for comparison""" def my_split(string: str) -> list[list[str]]: def version_substrs(x: str) -> list[str]: return re.findall(r"([A-Za-z]+|\d+)", x) return list(chain(map(version_substrs, string.split(".")))) def str_ord(string: str) -> int: num = 0 for char in string: num *= 255 num += ord(char) return num def try_int(version_str: str) -> int: try: return int(version_str) except ValueError: return str_ord(version_str) mss = list(chain(*my_split(version_string))) return tuple(map(try_int, mss)) def get_outdated_summary( self, updatable: list[dict[str, str]], requested_only: bool ) -> str: """Return a summary of outdated packages""" packages = ( self.requested_packages if requested_only else self.installed_packages ) nb_packages = len(packages) nb_updatable = len(updatable) updatable_ratio = nb_updatable / nb_packages return f"{nb_updatable}/{nb_packages} ({updatable_ratio:.0%}) packages could be updated" def get_outdated_table(self, updatable: list[dict[str, str]]) -> str: """Return a table of outdated packages""" return tabulate(updatable, headers="keys") ================================================ FILE: tests/utils/tracked_container.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import logging from typing import Any, Literal, LiteralString, overload import docker from docker.models.containers import Container LOGGER = logging.getLogger(__name__) class TrackedContainer: """Wrapper that collects docker container configuration and delays container creation/execution. Parameters ---------- docker_client: docker.DockerClient Docker client instance image_name: str Name of the docker image to launch """ def __init__( self, docker_client: docker.DockerClient, image_name: str, ): self.container: Container | None = None self.docker_client: docker.DockerClient = docker_client self.image_name: str = image_name def run_detached(self, **kwargs: Any) -> None: """Runs a docker container using the pre-configured image name and a mix of the pre-configured container options and those passed to this method. Keeps track of the docker.Container instance spawned to kill it later. Parameters ---------- **kwargs: dict, optional Keyword arguments to pass to docker.DockerClient.containers.run extending and/or overriding key/value pairs passed to the constructor """ LOGGER.info( f"Creating a container for the image: {self.image_name} with args: {kwargs} ..." ) default_kwargs = {"detach": True, "tty": True} final_kwargs = default_kwargs | kwargs self.container = self.docker_client.containers.run( self.image_name, **final_kwargs ) LOGGER.info(f"Container {self.container.name} created") def get_logs(self, *, stdout: bool = True, stderr: bool = True) -> str: assert self.container is not None logs = self.container.logs(stdout=stdout, stderr=stderr).decode() assert isinstance(logs, str) return logs def get_health(self) -> str: assert self.container is not None self.container.reload() return self.container.health # type: ignore def exec_cmd(self, cmd: str, **kwargs: Any) -> str: assert self.container is not None container = self.container LOGGER.info(f"Running cmd: `{cmd}` on container: {container.name}") default_kwargs = {"tty": True} final_kwargs = default_kwargs | kwargs exec_result = container.exec_run(cmd, **final_kwargs) output = exec_result.output.decode().rstrip() assert isinstance(output, str) if exec_result.exit_code != 0: LOGGER.error(f"Command output:\n{output}") raise AssertionError(f"Command: `{cmd}` failed") else: LOGGER.debug(f"Command output:\n{output}") return output @overload def run_and_wait( self, timeout: int, *, no_warnings: bool = True, no_errors: bool = True, no_failure: bool = True, split_stderr: Literal[True], **kwargs: Any, ) -> tuple[str, str]: ... @overload def run_and_wait( self, timeout: int, *, no_warnings: bool = True, no_errors: bool = True, no_failure: bool = True, split_stderr: Literal[False] = False, **kwargs: Any, ) -> str: ... def run_and_wait( self, timeout: int, *, no_warnings: bool = True, no_errors: bool = True, no_failure: bool = True, split_stderr: bool = False, **kwargs: Any, ) -> str | tuple[str, str]: if split_stderr: kwargs.setdefault("tty", False) assert kwargs["tty"] is False, "split_stderr only works with tty=False" self.run_detached(**kwargs) assert self.container is not None rv = self.container.wait(timeout=timeout) stdout: str stderr: str if split_stderr: stdout = self.get_logs(stdout=True, stderr=False) stderr = logs = self.get_logs(stdout=False, stderr=True) else: logs = self.get_logs() rc_success = rv["StatusCode"] == 0 should_report = not ( no_failure == rc_success and no_warnings == (not self.get_warnings(logs)) and no_errors == (not self.get_errors(logs)) ) if not rc_success or should_report: LOGGER.error(f"Command output:\n{logs}") else: LOGGER.debug(f"Command output:\n{logs}") self.remove() # To see the reason, we run assert statements separately assert ( no_failure == rc_success ), f"Container exited with code {rv['StatusCode']}" warnings = self.get_warnings(logs) assert no_warnings == (not warnings), f"Warnings found: {warnings}" errors = self.get_errors(logs) assert no_errors == (not errors), f"Errors found: {errors}" if split_stderr: return (stdout, stderr) else: return logs @staticmethod def get_errors(logs: str) -> list[str]: return TrackedContainer._lines_starting_with(logs, "ERROR") @staticmethod def get_warnings(logs: str) -> list[str]: warnings = TrackedContainer._lines_starting_with(logs, "WARNING") warnings = [ line for line in warnings if "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR" not in line ] return warnings @staticmethod def _lines_starting_with(logs: str, pattern: LiteralString) -> list[str]: return [line for line in logs.splitlines() if line.startswith(pattern)] def remove(self) -> None: """Kills and removes the tracked docker container.""" if self.container is None: LOGGER.debug("No container to remove") else: LOGGER.info(f"Removing container {self.container.name} ...") self.container.remove(force=True) LOGGER.info(f"Container {self.container.name} removed") self.container = None ================================================ FILE: wiki/Home.md ================================================ # Jupyter Docker Stacks build manifests Welcome! Please see [the documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/) for help with using, contributing to, and maintaining the Jupyter Docker stacks images. ## Build History This is an auto-generated index of information from the build system. In this index, you can find image tags, links to commits, and build manifests that describe the image. All the builds are grouped by year and then month. Note: we only store the last 4500 manifest files because of GitHub limits. That's why old manifest files might not be available. If you want to clone this repo and access the Git history, use the following command: `git clone git@github.com:{REPOSITORY}.wiki.git` In the tables below, each line represents: - `YYYY-MM`: link to a page with a list of images built - `Builds`: # of times build workflow finished - `Images`: # of single platform images pushed - `Commits`: # of commits made and a GitHub link ================================================ FILE: wiki/__init__.py ================================================ ================================================ FILE: wiki/config.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from dataclasses import dataclass from pathlib import Path @dataclass(frozen=True) class Config: wiki_dir: Path hist_lines_dir: Path manifests_dir: Path repository: str allow_no_files: bool ================================================ FILE: wiki/manifest_time.py ================================================ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. from pathlib import Path def get_manifest_timestamp(manifest_file: Path) -> str: file_content = manifest_file.read_text() TIMESTAMP_PREFIX = "Build timestamp: " TIMESTAMP_LENGTH = 20 timestamp = file_content[ file_content.find(TIMESTAMP_PREFIX) + len(TIMESTAMP_PREFIX) : ][:TIMESTAMP_LENGTH] # Should be good enough till year 2100 assert timestamp.startswith("20"), timestamp assert timestamp.endswith("Z"), timestamp return timestamp def get_manifest_year_month(manifest_file: Path) -> str: return get_manifest_timestamp(manifest_file)[:7] ================================================ FILE: wiki/update_wiki.py ================================================ #!/usr/bin/env python3 # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import argparse import datetime import logging import shutil import textwrap from dataclasses import dataclass from pathlib import Path import plumbum import tabulate from dateutil import relativedelta from wiki.config import Config from wiki.manifest_time import get_manifest_timestamp, get_manifest_year_month git = plumbum.local["git"] LOGGER = logging.getLogger(__name__) THIS_DIR = Path(__file__).parent.resolve() @dataclass class YearMonthFile: month: int content: str @dataclass class Statistics: builds: int images: int commits: int def calculate_monthly_stat( year_month_file: YearMonthFile, year_month_date: datetime.date ) -> Statistics: builds = sum( "/base-notebook" in line and "aarch64" not in line for line in year_month_file.content.split("\n") ) images = year_month_file.content.count("Build manifest") with plumbum.local.env(TZ="UTC"): git_log = git[ "log", "--oneline", "--since", f"{year_month_date}.midnight", "--until", f"{year_month_date + relativedelta.relativedelta(months=1)}.midnight", "--first-parent", ]() commits = len(git_log.splitlines()) return Statistics(builds=builds, images=images, commits=commits) @dataclass class YearFiles: year: int files: list[YearMonthFile] def generate_home_wiki_tables(repository: str, all_years: list[YearFiles]) -> str: tables = "" GITHUB_COMMITS_URL = ( f"[{{}}](https://github.com/{repository}/commits/main/?since={{}}&until={{}})" ) YEAR_TABLE_HEADERS = ["Month", "Builds", "Images", "Commits"] for year_files in all_years: year = year_files.year tables += f"\n\n## {year}\n\n" year_table_rows = [] year_stat = Statistics(builds=0, images=0, commits=0) for year_month_file in year_files.files: month = year_month_file.month year_month_date = datetime.date(year=year, month=month, day=1) month_stat = calculate_monthly_stat(year_month_file, year_month_date) year_stat.builds += month_stat.builds year_stat.images += month_stat.images year_stat.commits += month_stat.commits commits_url = GITHUB_COMMITS_URL.format( month_stat.commits, year_month_date, year_month_date + relativedelta.relativedelta(day=31), ) year_month = f"{year}-{month:0>2}" year_table_rows.append( [ f"[`{year_month}`](./{year_month})", month_stat.builds, month_stat.images, commits_url, ] ) year_commits_url = GITHUB_COMMITS_URL.format( year_stat.commits, f"{year}-01-01", f"{year}-12-31" ) year_table_rows.append( ["**Total**", year_stat.builds, year_stat.images, year_commits_url] ) tables += tabulate.tabulate( year_table_rows, YEAR_TABLE_HEADERS, tablefmt="github" ) LOGGER.info("Generated home wiki tables") return tables def write_home_wiki_page(wiki_dir: Path, repository: str) -> None: all_years = [] for year_dir in sorted((wiki_dir / "monthly-files").glob("*"), reverse=True): files = sorted(year_dir.glob("*.md"), reverse=True) all_years.append( YearFiles( int(year_dir.name), [ YearMonthFile(month=int(f.stem[5:]), content=f.read_text()) for f in files ], ) ) wiki_home_tables = generate_home_wiki_tables(repository, all_years) wiki_home_content = (THIS_DIR / "Home.md").read_text() YEAR_MONTHLY_TABLES = "" assert YEAR_MONTHLY_TABLES in wiki_home_content wiki_home_content = wiki_home_content[ : wiki_home_content.find(YEAR_MONTHLY_TABLES) + len(YEAR_MONTHLY_TABLES) ] wiki_home_content = wiki_home_content.format(REPOSITORY=repository) wiki_home_content += wiki_home_tables + "\n" (wiki_dir / "Home.md").write_text(wiki_home_content) LOGGER.info("Updated Home page") def update_monthly_wiki_page(wiki_dir: Path, build_history_line: str) -> None: assert build_history_line.startswith("| `") year_month = build_history_line[3:10] MONTHLY_PAGE_HEADER = textwrap.dedent(f"""\ # Images built during {year_month} | Date | Image | Links | | - | - | - | """) year = year_month[:4] monthly_page = wiki_dir / "monthly-files" / year / (year_month + ".md") if not monthly_page.exists(): monthly_page.parent.mkdir(parents=True, exist_ok=True) monthly_page.write_text(MONTHLY_PAGE_HEADER) LOGGER.info(f"Created monthly page: {monthly_page.relative_to(wiki_dir)}") monthly_page_content = monthly_page.read_text() assert MONTHLY_PAGE_HEADER in monthly_page_content monthly_page_content = monthly_page_content.replace( MONTHLY_PAGE_HEADER, MONTHLY_PAGE_HEADER + build_history_line + "\n" ) monthly_page.write_text(monthly_page_content) LOGGER.info(f"Updated monthly page: {monthly_page.relative_to(wiki_dir)}") def remove_old_manifests(wiki_dir: Path) -> None: MAX_NUMBER_OF_MANIFESTS = 4500 manifest_files: list[tuple[str, Path]] = [] for file in (wiki_dir / "manifests").rglob("*.md"): manifest_files.append((get_manifest_timestamp(file), file)) manifest_files.sort(reverse=True) for _, file in manifest_files[MAX_NUMBER_OF_MANIFESTS:]: file.unlink() LOGGER.info(f"Removed manifest: {file.relative_to(wiki_dir)}") def copy_manifest_files(config: Config) -> None: manifest_files = list(config.manifests_dir.rglob("*.md")) if not config.allow_no_files: assert manifest_files, "expected to have some manifest files" for manifest_file in manifest_files: year_month = get_manifest_year_month(manifest_file) year = year_month[:4] copy_to = config.wiki_dir / "manifests" / year / year_month / manifest_file.name copy_to.parent.mkdir(parents=True, exist_ok=True) shutil.copy(manifest_file, copy_to) LOGGER.info(f"Added manifest file: {copy_to.relative_to(config.wiki_dir)}") def update_wiki(config: Config) -> None: LOGGER.info("Updating wiki") copy_manifest_files(config) build_history_line_files = sorted(config.hist_lines_dir.rglob("*.txt")) if not config.allow_no_files: assert ( build_history_line_files ), "expected to have some build history line files" for build_history_line_file in build_history_line_files: build_history_line = build_history_line_file.read_text() update_monthly_wiki_page(config.wiki_dir, build_history_line) write_home_wiki_page(config.wiki_dir, config.repository) remove_old_manifests(config.wiki_dir) LOGGER.info("Wiki updated") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) arg_parser = argparse.ArgumentParser() arg_parser.add_argument( "--wiki-dir", required=True, type=Path, help="Directory of the wiki repo", ) arg_parser.add_argument( "--hist-lines-dir", required=True, type=Path, help="Directory with history lines", ) arg_parser.add_argument( "--manifests-dir", required=True, type=Path, help="Directory with manifest files", ) arg_parser.add_argument( "--repository", required=True, help="Repository name on GitHub", ) arg_parser.add_argument( "--allow-no-files", action="store_true", help="Allow no manifest or history line files", ) args = arg_parser.parse_args() config = Config(**vars(args)) update_wiki(config)